Repository: ClownsharkBatwing/RES4LYF
Branch: main
Commit: 0dc91c00c4c3
Files: 134
Total size: 3.9 MB
Directory structure:
gitextract__f8xgulp/
├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── attention_masks.py
├── aura/
│ └── mmdit.py
├── beta/
│ ├── __init__.py
│ ├── constants.py
│ ├── deis_coefficients.py
│ ├── noise_classes.py
│ ├── phi_functions.py
│ ├── rk_coefficients_beta.py
│ ├── rk_guide_func_beta.py
│ ├── rk_method_beta.py
│ ├── rk_noise_sampler_beta.py
│ ├── rk_sampler_beta.py
│ ├── samplers.py
│ └── samplers_extensions.py
├── chroma/
│ ├── layers.py
│ ├── math.py
│ └── model.py
├── conditioning.py
├── example_workflows/
│ ├── chroma regional antiblur.json
│ ├── chroma txt2img.json
│ ├── comparison ksampler vs csksampler chain workflows.json
│ ├── flux faceswap sync pulid.json
│ ├── flux faceswap sync.json
│ ├── flux faceswap.json
│ ├── flux inpaint area.json
│ ├── flux inpaint bongmath.json
│ ├── flux inpainting.json
│ ├── flux regional antiblur.json
│ ├── flux regional redux (2 zone).json
│ ├── flux regional redux (3 zone, nested).json
│ ├── flux regional redux (3 zone, overlapping).json
│ ├── flux regional redux (3 zones).json
│ ├── flux style antiblur.json
│ ├── flux style transfer gguf.json
│ ├── flux upscale thumbnail large multistage.json
│ ├── flux upscale thumbnail large.json
│ ├── flux upscale thumbnail widescreen.json
│ ├── hidream guide data projection.json
│ ├── hidream guide epsilon projection.json
│ ├── hidream guide flow.json
│ ├── hidream guide fully_pseudoimplicit.json
│ ├── hidream guide lure.json
│ ├── hidream guide pseudoimplicit.json
│ ├── hidream hires fix.json
│ ├── hidream regional 3 zones.json
│ ├── hidream regional antiblur.json
│ ├── hidream style antiblur.json
│ ├── hidream style transfer txt2img.json
│ ├── hidream style transfer v2.json
│ ├── hidream style transfer.json
│ ├── hidream txt2img.json
│ ├── hidream unsampling data WF.json
│ ├── hidream unsampling data.json
│ ├── hidream unsampling pseudoimplicit.json
│ ├── hidream unsampling.json
│ ├── intro to clownsampling.json
│ ├── sd35 medium unsampling data.json
│ ├── sd35 medium unsampling.json
│ ├── sdxl regional antiblur.json
│ ├── sdxl style transfer.json
│ ├── style transfer.json
│ ├── ultracascade txt2img style transfer.json
│ ├── ultracascade txt2img.json
│ ├── wan img2vid 720p (fp8 fast).json
│ ├── wan txt2img (fp8 fast).json
│ └── wan vid2vid.json
├── flux/
│ ├── controlnet.py
│ ├── layers.py
│ ├── math.py
│ ├── model.py
│ └── redux.py
├── helper.py
├── helper_sigma_preview_image_preproc.py
├── hidream/
│ └── model.py
├── images.py
├── latent_images.py
├── latents.py
├── legacy/
│ ├── __init__.py
│ ├── conditioning.py
│ ├── constants.py
│ ├── deis_coefficients.py
│ ├── flux/
│ │ ├── controlnet.py
│ │ ├── layers.py
│ │ ├── math.py
│ │ ├── model.py
│ │ └── redux.py
│ ├── helper.py
│ ├── latents.py
│ ├── legacy_sampler_rk.py
│ ├── legacy_samplers.py
│ ├── models.py
│ ├── noise_classes.py
│ ├── noise_sigmas_timesteps_scaling.py
│ ├── phi_functions.py
│ ├── rk_coefficients.py
│ ├── rk_guide_func.py
│ ├── rk_method.py
│ ├── rk_sampler.py
│ ├── samplers.py
│ ├── samplers_extensions.py
│ ├── samplers_tiled.py
│ ├── sigmas.py
│ └── tiling.py
├── lightricks/
│ ├── model.py
│ ├── symmetric_patchifier.py
│ └── vae/
│ ├── causal_conv3d.py
│ ├── causal_video_autoencoder.py
│ ├── conv_nd_factory.py
│ ├── dual_conv3d.py
│ └── pixel_norm.py
├── loaders.py
├── misc_scripts/
│ └── replace_metadata.py
├── models.py
├── nodes_latents.py
├── nodes_misc.py
├── nodes_precision.py
├── requirements.txt
├── res4lyf.py
├── rk_method_beta.py
├── samplers_extensions.py
├── sd/
│ ├── attention.py
│ └── openaimodel.py
├── sd35/
│ └── mmdit.py
├── sigmas.py
├── style_transfer.py
├── wan/
│ ├── model.py
│ └── vae.py
└── web/
└── js/
├── RES4LYF_dynamicWidgets.js
├── conditioningToBase64.js
└── res4lyf.default.json
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
__pycache__/
.idea/
.vscode/
.tmp
.cache
tests/
/*.json
*.config.json
================================================
FILE: LICENSE
================================================
The use of this software or any derivative work for the purpose of
providing a commercial service, such as (but not limited to) an
AI image generation service, is strictly prohibited without obtaining
permission and/or a separate commercial license from the copyright holder.
This includes any service that charges users directly or indirectly for
access to this software's functionality, whether standalone or integrated
into a larger product.
GNU AFFERO GENERAL PUBLIC
Version 3, 19 November 2007
Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.
A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate. Many developers of free software are heartened and
encouraged by the resulting cooperation. However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.
The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community. It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server. Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.
An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals. This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU Affero General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Remote Network Interaction; Use with the GNU General Public License.
Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software. This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time. Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
Copyright (C)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
Also add information on how to contact you by electronic and paper mail.
If your software can interact with users remotely through a computer
network, you should also make sure that it provides a way for users to
get its source. For example, if your program is a web application, its
interface could display a "Source" link that leads users to an archive
of the code. There are many ways you could offer source, and different
solutions will be better for different programs; see section 13 for the
specific requirements.
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
.
================================================
FILE: README.md
================================================
# SUPERIOR SAMPLING WITH RES4LYF: THE POWER OF BONGMATH
RES_3M vs. Uni-PC (WAN). Typically only 20 steps are needed with RES samplers. Far more are needed with Uni-PC and other common samplers, and they never reach the same level of quality.


# INSTALLATION
If you are using a venv, you will need to first run from within your ComfyUI folder (that contains your "venv" folder):
_Linux:_
source venv/bin/activate
_Windows:_
venv\Scripts\activate
_Then, "cd" into your "custom_nodes" folder and run the following commands:_
git clone https://github.com/ClownsharkBatwing/RES4LYF/
cd RES4LYF
_If you are using a venv, run these commands:_
pip install -r requirements.txt
_Alternatively, if you are using the portable version of ComfyUI you will need to replace "pip" with the path to your embedded pip executable. For example, on Windows:_
X:\path\to\your\comfy_portable_folder\python_embedded\Scripts\pip.exe install -r requirements.txt
# IMPORTANT UPDATE INFO
The previous versions will remain available but with "Legacy" prepended to their names.
If you wish to use the sampler menu shown below, you will need to install https://github.com/rgthree/rgthree-comfy (which I highly recommend you have regardless).

If these menus do not show up after restarting ComfyUI and refreshing the page (hit F5, not just "r") verify that these menus are enabled in the rgthree settings (click the gear in the bottom left of ComfyUI, select rgthree, and ensure "Auto Nest Subdirectories" is checked):

# NEW VERSION DOCUMENTATION
I have prepared a detailed explanation of many of the concepts of sampling with exmaples in this workflow. There's also many tips, explanations of parameters, and all of the most important nodes are laid out for you to see. Some new workflow-enhancing tricks like "chainsamplers" are demonstrated, and **regional AND temporal prompting** are explained (supporting Flux, HiDream, SD3.5, AuraFlow, and WAN - you can even change the conditioning on a frame-by-frame basis!).
[[example_workflows/intro to clownsampling.json
]((https://github.com/ClownsharkBatwing/RES4LYF/blob/main/example_workflows/intro%20to%20clownsampling.json))](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/example_workflows/intro%20to%20clownsampling.json)

# STYLE TRANSFER
Supported models: HiDream, Flux, Chroma, AuraFlow, SD1.5, SDXL, SD3.5, Stable Cascade, LTXV, and WAN. Also supported: Stable Cascade (and UltraPixel) which has an excellent understanding of style (https://github.com/ClownsharkBatwing/UltraCascade).
Currently, best results are with HiDream or Chroma, or Flux with a style lora (Flux Dev is very lacking with style knowledge). Include some mention of the style you wish to use in the prompt. (Try with the guide off to confirm the prompt is not doing the heavy lifting!)

For example, the prompt for the below was simply "a gritty illustration of a japanese woman with traditional hair in traditional clothes". Mostly you just need to make clear whether it's supposed to be a photo or an illustration, etc. so that the conditioning isn't fighting the style guide (every model has its inherent biases).

**COMPOSITION GUIDE; OUTPUT; STYLE GUIDE**


# KILL FLUX BLUR (and HiDream blur)
**Consecutive seeds, no cherrypicking.**

# REGIONAL CONDITIONING
Unlimited zones! Over 10 zones have been used in one image before.
Currently supported models: HiDream, Flux, Chroma, SD3.5, SD1.5, SDXL, AuraFlow, and WAN.
Masks can be drawn freely, or more traditional rigid ones may be used, such as in this example:




# TEMPORAL CONDITIONING
Unlimited zones! Ability to change the prompt for each frame.
Currently supported models: WAN.


# VIDEO 2 VIDEO EDITING
Viable with any video model, demo with WAN:

# PREVIOUS VERSION NODE DOCUMENTATION
At the heart of this repository is the "ClownsharKSampler", which was specifically designed to support both rectified flow and probability flow models. It features 69 different selectible samplers (44 explicit, 18 fully implicit, 7 diagonally implicit) all available in both ODE or SDE modes with 20 noise types, 9 noise scaling modes, and options for implicit Runge-Kutta sampling refinement steps. Several new explicit samplers are implemented, most notably RES_2M, RES_3S, and RES_5S. Additionally, img2img capabilities include both latent image guidance and unsampling/resampling (via new forms of rectified noise inversion).
A particular emphasis of this project has been to facilitate modulating parameters vs. time, which can facilitate large gains in image quality from the sampling process. To this end, a wide variety of sigma, latent, and noise manipulation nodes are included.
Much of this work remains experimental and is subject to further changes.
# ClownSampler

# SharkSampler

# ClownsharKSampler

This is an all-in-one sampling node designed for convenience without compromising on control or quality.
There are several key sections to the parameters which will be explained below.
## INPUTS

The only two mandatory inputs here are "model" and "latent_image".
**POSITIVE and NEGATIVE:** If you connect nothing to either of these inputs, the node will automatically generate null conditioning. If you are unsampling, you actually don't need to hook up any conditioning at all (and will set CFG = 1.0). In most cases, merely using the positive conditioning will suffice, unless you really need to use a specific negative prompt.
**SIGMAS:** If a sigmas scheduler node is connected to this input, it will override the scheduler and steps settings chosen within the node.
## NOISE SETTINGS

**NOISE_TYPE_INIT:** This sets the initial noise type applied to the latent image.
**NOISE_TYPE_SDE:** This sets the noise type used during SDE sampling. Note that SDE sampling is identical to ODE sampling in most ways - the difference is that noise is added after each step. It's like a form of carefully controlled continuous noise injection.
**NOISE_MODE_SDE:** This determines what method is used for scaling the amount of noise to be added based on the "eta" setting below. They are listed in order of strength of the effect.
**ETA:** This controls how much noise is added after each step. Note that for most of the noise modes, anything equal to or greater than 1.0 will trigger internal scaling to prevent NaN errors. The exception is the noise mode "exp" which allows for settings far above 1.0.
**NOISE_SEED:** Largely identical to the setting in KSampler. Set to -1 to have it increment the most recently used seed (by the workflow) by 1.
**CONTROL_AFTER_GENERATE:** Self-explanatory. I recommend setting to "fixed" or "increment" (as you don't have to reload the workflow to regenerate something, you can just decement it by one).
## SAMPLER SETTINGS

**SAMPLER_MODE:** In virtually all situations, use "standard". However, if you are unsampling, set to "unsample", and if you are resampling (the stage after unsampling), set to "resample". Both of these modes will disable noise addition within ComfyUI, which is essential for these methods to work properly.
**SAMPLER_NAME:** This is used similarly to the KSampler setting. This selects the explicit sampler type. Note the use of numbers and letters at the end of each sampler name: "2m, 3m, 2s, 3s, 5s, etc."
Samplers that end in "s" use substeps between each step. One ending with "2s" has two stages per step, therefore costs two model calls per step (Euler costs one - model calls are what determine inference time). "3s" would take three model calls per step, and therefore take three times as long to run as Euler. However, the increase in accuracy can be very dramatic, especially when using noise (SDE sampling). The "res" family of samplers are particularly notable (they are effectively refinements of the dpmpp family, with new, higher order, much more accurate versions implemented here).
Samplers that end in "m" are "multistep" samplers, which instead of issuing new model calls for substeps, recycle previous steps as estimations for these substeps. They're less accurate, but all run at Euler speed (one model call per step). Sometimes this can be an advantage, as multistep samplers tend to converge more linearly toward a target image. This can be useful for img2img transformations, unsampling, or when using latent image guides.
**IMPLICIT_SAMPLER_NAME:** This is very useful with SD3.5 Medium for improving coherence, reducing artifacts and mutations, etc. It may be difficult to use with a model like Flux unless you plan on setting up a queue of generations and walking away. It will use the explicit step type as a predictor for each of the implicit substeps, so if you choose a slow explicit sampler, you will be waiting a long time. Euler, res_2m, deis_2m, etc. will often suffice as a predictor for implicit sampling, though any sampler may be used. Try "res_5s" as your explicit sampler type, and "gauss-legendre_5s", if you wish to demonstrate your commitment to climate change (and image quality).
Setting this to "none" has the same effect as setting implicit_steps = 0.
## SCHEDULER AND DENOISE SETTINGS

These are identical in most ways to the settings by the same name in KSampler.
**SCHEDULER:** There is one extra sigma scheduler offered by default: "beta57" which is the beta schedule with modified parameters (alpha = 0.5, beta = 0.7).
**IMPLICIT_STEPS:** This controls the number of implicit steps to run. Note that it will double, triple, etc. the runtime as you increase the stepcount. Typically, gains diminish quickly after 2-3 implicit steps.
**DENOISE:** This is identical to the KSampler setting. Controls the amount of noise removed from the image. Note that with this method, the effect will change significantly depending on your choice of scheduler.
**DENOISE_ALT:** Instead of splitting the sigma schedule like "denoise", this multiplies them. The results are different, but track more closely from one scheduler to another when using the same value. This can be particularly useful for img2img workflows.
**CFG:** This is identical to the KSampler setting. Typically, you'll set this to 1.0 (to disable it) when using Flux, if you're using Flux guidance. However, the effect is quite nice when using dedistilled models if you use "CLIP Text Encode" without any Flux guidance, and set CFG to 3.0.
If you've never quite understood CFG, you can think of it this way. Imagine you're walking down the street and see what looks like an enticing music festival in the distance (your positive conditioning). You're on the fence about attending, but then, suddenly, a horde of pickleshark cannibals come storming out of a nearby bar (your negative conditioning). Together, the two team up to drive you toward the music festival. That's CFG.
## SHIFT SETTINGS

These are present for convenience as they are used in virtually every workflow.
**SHIFT:** This is the same as "shift" for the ModelSampling nodes for SD3.5, AuraFlow, etc., and is equivalent to "max_shift" for Flux. Set this value to -1 to disable setting shift (or max_shift) within the node.
**BASE_SHIFT:** This is only used by Flux. Set this value to -1 to disable setting base_shift within the node.
**SHIFT_SCALING:** This changes how the shift values are calculated. "exponential" is the default used by Flux, whereas "linear" is the default used by SD3.5 and AuraFlow. In most cases, "exponential" leads to better results, though "linear" has some niche uses.
# Sampler and noise mode list
## Explicit samplers
Bolded samplers are added as options to the sampler dropdown in ComyfUI (an ODE and SDE version for each).
**res_2m**
**res_2/3/5s**
**deis_2/3/4m**
ralston_2/3/4s
dpmpp_2/3m
dpmpp_sde_2s
dpmpp_2/3s
midpoint_2s
heun_2/3s
houwen-wray_3s
kutta_3s
ssprk3_3s
rk38_4s
rk4_4s
dormand-prince_6s
dormand-prince_13s
bogacki-shampine_7s
ddim
euler
## Fully Implicit Samplers
gauss-legendre_2/3/4/5s
radau_(i/ii)a_2/3s
lobatto_iii(a/b/c/d/star)_2/3s
## Diagonally Implicit Samplers
kraaijevanger_spijker_2s
qin_zhang_2s
pareschi_russo_2s
pareschi_russo_alt_2s
crouzeix_2/3s
irk_exp_diag_2s (features an exponential integrator)
# PREVIOUS FLUX WORKFLOWS
## TXT2IMG:
This uses my amateur cell phone lora, which is freely available (https://huggingface.co/ClownsharkBatwing/CSBW_Style/blob/main/amateurphotos_1_amateurcellphonephoto_recapt2.safetensors). It significantly reduces the plastic, blurred look of Flux Dev.


## INPAINTING:


## UNSAMPLING (Dual guides with masks):


# PREVIOUS WORKFLOWS
**THE FOLLOWING WORKFLOWS ARE FOR A PREVIOUS VERSION OF THE NODE.**
These will still work! You will, however, need to manually delete and recreate the sampler and guide nodes and input the settings as they appear in the screenshots. The layout of the nodes has been changed slightly. To replicate their behavior precisely, add to the new extra_options box in ClownsharKSampler: truncate_conditioning=true (if that setting was used in the screenshot for the node).

**TXT2IMG Workflow:**


**TXT2IMG Workflow (Latent Image Guides):**


Input image:
https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20guided%20SD35M%20input.png
**TXT2IMG Workflow (Dual Guides with Masking):**


Input images and mask:
https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20dual%20guides%20with%20mask%20SD35M%20input1.png
https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20dual%20guides%20with%20mask%20SD35M%20input2.png
https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20dual%20guides%20with%20mask%20SD35M%20mask.png
**IMG2IMG Workflow (Unsampling):**


Input image:
https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20unsampling%20SD35L%20input.png
**IMG2IMG Workflow (Unsampling with SDXL):**


Input image:
https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20unsampling%20SDXL%20input.png
**IMG2IMG Workflow (Unsampling with latent image guide):**


Input image:
https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20guided%20unsampling%20SD35M%20input.png
**IMG2IMG Workflow (Unsampling with dual latent image guides and masking):**


Input images and mask:
https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20dual%20guided%20masked%20unsampling%20SD35M%20input1.png
https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20dual%20guided%20masked%20unsampling%20SD35M%20input2.png
https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20dual%20guided%20masked%20unsampling%20SD35M%20mask.png
================================================
FILE: __init__.py
================================================
import importlib
import os
from . import loaders
from . import sigmas
from . import conditioning
from . import images
from . import models
from . import helper_sigma_preview_image_preproc
from . import nodes_misc
from . import nodes_latents
from . import nodes_precision
import torch
from math import *
from comfy.samplers import SchedulerHandler, SCHEDULER_HANDLERS, SCHEDULER_NAMES
new_scheduler_name = "bong_tangent"
if new_scheduler_name not in SCHEDULER_HANDLERS:
bong_tangent_handler = SchedulerHandler(handler=sigmas.bong_tangent_scheduler, use_ms=True)
SCHEDULER_HANDLERS[new_scheduler_name] = bong_tangent_handler
SCHEDULER_NAMES.append(new_scheduler_name)
from .res4lyf import RESplain
#torch.use_deterministic_algorithms(True)
#torch.backends.cudnn.deterministic = True
#torch.backends.cudnn.benchmark = False
res4lyf.init()
discard_penultimate_sigma_samplers = set((
))
def add_samplers():
from comfy.samplers import KSampler, k_diffusion_sampling
if hasattr(KSampler, "DISCARD_PENULTIMATE_SIGMA_SAMPLERS"):
KSampler.DISCARD_PENULTIMATE_SIGMA_SAMPLERS |= discard_penultimate_sigma_samplers
added = 0
for sampler in extra_samplers: #getattr(self, "sample_{}".format(extra_samplers))
if sampler not in KSampler.SAMPLERS:
try:
idx = KSampler.SAMPLERS.index("uni_pc_bh2") # *should* be last item in samplers list
KSampler.SAMPLERS.insert(idx+1, sampler) # add custom samplers (presumably) to end of list
setattr(k_diffusion_sampling, "sample_{}".format(sampler), extra_samplers[sampler])
added += 1
except ValueError as _err:
pass
if added > 0:
import importlib
importlib.reload(k_diffusion_sampling)
extra_samplers = {}
extra_samplers = dict(reversed(extra_samplers.items()))
NODE_CLASS_MAPPINGS = {
"FluxLoader" : loaders.FluxLoader,
"SD35Loader" : loaders.SD35Loader,
"ClownModelLoader" : loaders.RES4LYFModelLoader,
"TextBox1" : nodes_misc.TextBox1,
"TextBox2" : nodes_misc.TextBox2,
"TextBox3" : nodes_misc.TextBox3,
"TextConcatenate" : nodes_misc.TextConcatenate,
"TextBoxConcatenate" : nodes_misc.TextBoxConcatenate,
"TextLoadFile" : nodes_misc.TextLoadFile,
"TextShuffle" : nodes_misc.TextShuffle,
"TextShuffleAndTruncate" : nodes_misc.TextShuffleAndTruncate,
"TextTruncateTokens" : nodes_misc.TextTruncateTokens,
"SeedGenerator" : nodes_misc.SeedGenerator,
"ClownRegionalConditioning" : conditioning.ClownRegionalConditioning,
"ClownRegionalConditionings" : conditioning.ClownRegionalConditionings,
"ClownRegionalConditioning2" : conditioning.ClownRegionalConditioning2,
"ClownRegionalConditioning3" : conditioning.ClownRegionalConditioning3,
"ClownRegionalConditioning_AB" : conditioning.ClownRegionalConditioning_AB,
"ClownRegionalConditioning_ABC" : conditioning.ClownRegionalConditioning_ABC,
"CLIPTextEncodeFluxUnguided" : conditioning.CLIPTextEncodeFluxUnguided,
"ConditioningOrthoCollin" : conditioning.ConditioningOrthoCollin,
"ConditioningAverageScheduler" : conditioning.ConditioningAverageScheduler,
"ConditioningMultiply" : conditioning.ConditioningMultiply,
"ConditioningAdd" : conditioning.ConditioningAdd,
"Conditioning Recast FP64" : conditioning.Conditioning_Recast64,
"StableCascade_StageB_Conditioning64" : conditioning.StableCascade_StageB_Conditioning64,
"ConditioningZeroAndTruncate" : conditioning.ConditioningZeroAndTruncate,
"ConditioningTruncate" : conditioning.ConditioningTruncate,
"StyleModelApplyStyle" : conditioning.StyleModelApplyStyle,
"CrossAttn_EraseReplace_HiDream" : conditioning.CrossAttn_EraseReplace_HiDream,
"ConditioningDownsample (T5)" : conditioning.ConditioningDownsampleT5,
"ConditioningToBase64" : conditioning.ConditioningToBase64,
"Base64ToConditioning" : conditioning.Base64ToConditioning,
"ConditioningBatch4" : conditioning.ConditioningBatch4,
"ConditioningBatch8" : conditioning.ConditioningBatch8,
"TemporalMaskGenerator" : conditioning.TemporalMaskGenerator,
"TemporalSplitAttnMask" : conditioning.TemporalSplitAttnMask,
"TemporalSplitAttnMask (Midframe)" : conditioning.TemporalSplitAttnMask_Midframe,
"TemporalCrossAttnMask" : conditioning.TemporalCrossAttnMask,
"Set Precision" : nodes_precision.set_precision,
"Set Precision Universal" : nodes_precision.set_precision_universal,
"Set Precision Advanced" : nodes_precision.set_precision_advanced,
"LatentUpscaleWithVAE" : helper_sigma_preview_image_preproc.LatentUpscaleWithVAE,
"LatentNoised" : nodes_latents.LatentNoised,
"LatentNoiseList" : nodes_latents.LatentNoiseList,
"AdvancedNoise" : nodes_latents.AdvancedNoise,
"LatentNoiseBatch_perlin" : nodes_latents.LatentNoiseBatch_perlin,
"LatentNoiseBatch_fractal" : nodes_latents.LatentNoiseBatch_fractal,
"LatentNoiseBatch_gaussian" : nodes_latents.LatentNoiseBatch_gaussian,
"LatentNoiseBatch_gaussian_channels" : nodes_latents.LatentNoiseBatch_gaussian_channels,
"LatentBatch_channels" : nodes_latents.LatentBatch_channels,
"LatentBatch_channels_16" : nodes_latents.LatentBatch_channels_16,
"Latent Get Channel Means" : nodes_latents.latent_get_channel_means,
"Latent Match Channelwise" : nodes_latents.latent_channelwise_match,
"Latent to RawX" : nodes_latents.latent_to_raw_x,
"Latent Clear State Info" : nodes_latents.latent_clear_state_info,
"Latent Replace State Info" : nodes_latents.latent_replace_state_info,
"Latent Display State Info" : nodes_latents.latent_display_state_info,
"Latent Transfer State Info" : nodes_latents.latent_transfer_state_info,
"Latent TrimVideo State Info" : nodes_latents.TrimVideoLatent_state_info,
"Latent to Cuda" : nodes_latents.latent_to_cuda,
"Latent Batcher" : nodes_latents.latent_batch,
"Latent Normalize Channels" : nodes_latents.latent_normalize_channels,
"Latent Channels From To" : nodes_latents.latent_mean_channels_from_to,
"LatentPhaseMagnitude" : nodes_latents.LatentPhaseMagnitude,
"LatentPhaseMagnitudeMultiply" : nodes_latents.LatentPhaseMagnitudeMultiply,
"LatentPhaseMagnitudeOffset" : nodes_latents.LatentPhaseMagnitudeOffset,
"LatentPhaseMagnitudePower" : nodes_latents.LatentPhaseMagnitudePower,
"MaskFloatToBoolean" : nodes_latents.MaskFloatToBoolean,
"MaskToggle" : nodes_latents.MaskToggle,
"MaskEdge" : nodes_latents.MaskEdge,
#"MaskEdgeRatio" : nodes_latents.MaskEdgeRatio,
"Frames Masks Uninterpolate" : nodes_latents.Frames_Masks_Uninterpolate,
"Frames Masks ZeroOut" : nodes_latents.Frames_Masks_ZeroOut,
"Frames Latent ReverseOrder" : nodes_latents.Frames_Latent_ReverseOrder,
"EmptyLatentImage64" : nodes_latents.EmptyLatentImage64,
"EmptyLatentImageCustom" : nodes_latents.EmptyLatentImageCustom,
"StableCascade_StageC_VAEEncode_Exact": nodes_latents.StableCascade_StageC_VAEEncode_Exact,
"PrepForUnsampling" : helper_sigma_preview_image_preproc.VAEEncodeAdvanced,
"VAEEncodeAdvanced" : helper_sigma_preview_image_preproc.VAEEncodeAdvanced,
"VAEStyleTransferLatent" : helper_sigma_preview_image_preproc.VAEStyleTransferLatent,
"SigmasPreview" : helper_sigma_preview_image_preproc.SigmasPreview,
"SigmasSchedulePreview" : helper_sigma_preview_image_preproc.SigmasSchedulePreview,
"TorchCompileModelFluxAdv" : models.TorchCompileModelFluxAdvanced,
"TorchCompileModelAura" : models.TorchCompileModelAura,
"TorchCompileModelSD35" : models.TorchCompileModelSD35,
"TorchCompileModels" : models.TorchCompileModels,
"ClownpileModelWanVideo" : models.ClownpileModelWanVideo,
"ModelTimestepPatcher" : models.ModelSamplingAdvanced,
"ModelSamplingAdvanced" : models.ModelSamplingAdvanced,
"ModelSamplingAdvancedResolution" : models.ModelSamplingAdvancedResolution,
"FluxGuidanceDisable" : models.FluxGuidanceDisable,
"ReWanPatcher" : models.ReWanPatcher,
"ReFluxPatcher" : models.ReFluxPatcher,
"ReChromaPatcher" : models.ReChromaPatcher,
"ReSD35Patcher" : models.ReSD35Patcher,
"ReAuraPatcher" : models.ReAuraPatcher,
"ReLTXVPatcher" : models.ReLTXVPatcher,
"ReHiDreamPatcher" : models.ReHiDreamPatcher,
"ReSDPatcher" : models.ReSDPatcher,
"ReReduxPatcher" : models.ReReduxPatcher,
"ReWanPatcherAdvanced" : models.ReWanPatcherAdvanced,
"ReFluxPatcherAdvanced" : models.ReFluxPatcherAdvanced,
"ReChromaPatcherAdvanced" : models.ReChromaPatcherAdvanced,
"ReSD35PatcherAdvanced" : models.ReSD35PatcherAdvanced,
"ReAuraPatcherAdvanced" : models.ReAuraPatcherAdvanced,
"ReLTXVPatcherAdvanced" : models.ReLTXVPatcherAdvanced,
"ReHiDreamPatcherAdvanced" : models.ReHiDreamPatcherAdvanced,
"LayerPatcher" : loaders.LayerPatcher,
"FluxOrthoCFGPatcher" : models.FluxOrthoCFGPatcher,
"UNetSave" : models.UNetSave,
"Sigmas Recast" : sigmas.set_precision_sigmas,
"Sigmas Noise Inversion" : sigmas.sigmas_noise_inversion,
"Sigmas From Text" : sigmas.sigmas_from_text,
"Sigmas Variance Floor" : sigmas.sigmas_variance_floor,
"Sigmas Truncate" : sigmas.sigmas_truncate,
"Sigmas Start" : sigmas.sigmas_start,
"Sigmas Split" : sigmas.sigmas_split,
"Sigmas Split Value" : sigmas.sigmas_split_value,
"Sigmas Concat" : sigmas.sigmas_concatenate,
"Sigmas Pad" : sigmas.sigmas_pad,
"Sigmas Unpad" : sigmas.sigmas_unpad,
"Sigmas SetFloor" : sigmas.sigmas_set_floor,
"Sigmas DeleteBelowFloor" : sigmas.sigmas_delete_below_floor,
"Sigmas DeleteDuplicates" : sigmas.sigmas_delete_consecutive_duplicates,
"Sigmas Cleanup" : sigmas.sigmas_cleanup,
"Sigmas Mult" : sigmas.sigmas_mult,
"Sigmas Modulus" : sigmas.sigmas_modulus,
"Sigmas Quotient" : sigmas.sigmas_quotient,
"Sigmas Add" : sigmas.sigmas_add,
"Sigmas Power" : sigmas.sigmas_power,
"Sigmas Abs" : sigmas.sigmas_abs,
"Sigmas2 Mult" : sigmas.sigmas2_mult,
"Sigmas2 Add" : sigmas.sigmas2_add,
"Sigmas Rescale" : sigmas.sigmas_rescale,
"Sigmas Count" : sigmas.sigmas_count,
"Sigmas Resample" : sigmas.sigmas_interpolate,
"Sigmas Math1" : sigmas.sigmas_math1,
"Sigmas Math3" : sigmas.sigmas_math3,
"Sigmas Iteration Karras" : sigmas.sigmas_iteration_karras,
"Sigmas Iteration Polyexp" : sigmas.sigmas_iteration_polyexp,
# New Sigma Nodes
"Sigmas Lerp" : sigmas.sigmas_lerp,
"Sigmas InvLerp" : sigmas.sigmas_invlerp,
"Sigmas ArcSine" : sigmas.sigmas_arcsine,
"Sigmas LinearSine" : sigmas.sigmas_linearsine,
"Sigmas Append" : sigmas.sigmas_append,
"Sigmas ArcCosine" : sigmas.sigmas_arccosine,
"Sigmas ArcTangent" : sigmas.sigmas_arctangent,
"Sigmas CrossProduct" : sigmas.sigmas_crossproduct,
"Sigmas DotProduct" : sigmas.sigmas_dotproduct,
"Sigmas Fmod" : sigmas.sigmas_fmod,
"Sigmas Frac" : sigmas.sigmas_frac,
"Sigmas If" : sigmas.sigmas_if,
"Sigmas Logarithm2" : sigmas.sigmas_logarithm2,
"Sigmas SmoothStep" : sigmas.sigmas_smoothstep,
"Sigmas SquareRoot" : sigmas.sigmas_squareroot,
"Sigmas TimeStep" : sigmas.sigmas_timestep,
"Sigmas Sigmoid" : sigmas.sigmas_sigmoid,
"Sigmas Easing" : sigmas.sigmas_easing,
"Sigmas Hyperbolic" : sigmas.sigmas_hyperbolic,
"Sigmas Gaussian" : sigmas.sigmas_gaussian,
"Sigmas Percentile" : sigmas.sigmas_percentile,
"Sigmas KernelSmooth" : sigmas.sigmas_kernel_smooth,
"Sigmas QuantileNorm" : sigmas.sigmas_quantile_norm,
"Sigmas AdaptiveStep" : sigmas.sigmas_adaptive_step,
"Sigmas Chaos" : sigmas.sigmas_chaos,
"Sigmas ReactionDiffusion" : sigmas.sigmas_reaction_diffusion,
"Sigmas Attractor" : sigmas.sigmas_attractor,
"Sigmas CatmullRom" : sigmas.sigmas_catmull_rom,
"Sigmas LambertW" : sigmas.sigmas_lambert_w,
"Sigmas ZetaEta" : sigmas.sigmas_zeta_eta,
"Sigmas GammaBeta" : sigmas.sigmas_gamma_beta,
"Sigmas GaussianCDF" : sigmas.sigmas_gaussian_cdf,
"Sigmas StepwiseMultirate" : sigmas.sigmas_stepwise_multirate,
"Sigmas HarmonicDecay" : sigmas.sigmas_harmonic_decay,
"Sigmas AdaptiveNoiseFloor" : sigmas.sigmas_adaptive_noise_floor,
"Sigmas CollatzIteration" : sigmas.sigmas_collatz_iteration,
"Sigmas ConwaySequence" : sigmas.sigmas_conway_sequence,
"Sigmas GilbreathSequence" : sigmas.sigmas_gilbreath_sequence,
"Sigmas CNFInverse" : sigmas.sigmas_cnf_inverse,
"Sigmas RiemannianFlow" : sigmas.sigmas_riemannian_flow,
"Sigmas LangevinDynamics" : sigmas.sigmas_langevin_dynamics,
"Sigmas PersistentHomology" : sigmas.sigmas_persistent_homology,
"Sigmas NormalizingFlows" : sigmas.sigmas_normalizing_flows,
"ClownScheduler" : sigmas.ClownScheduler, # for modulating parameters
"Tan Scheduler" : sigmas.tan_scheduler,
"Tan Scheduler 2" : sigmas.tan_scheduler_2stage,
"Tan Scheduler 2 Simple" : sigmas.tan_scheduler_2stage_simple,
"Constant Scheduler" : sigmas.constant_scheduler,
"Linear Quadratic Advanced" : sigmas.linear_quadratic_advanced,
"SetImageSizeWithScale" : nodes_misc.SetImageSizeWithScale,
"SetImageSize" : nodes_misc.SetImageSize,
"Mask Bounding Box Aspect Ratio" : images.MaskBoundingBoxAspectRatio,
"Image Get Color Swatches" : images.Image_Get_Color_Swatches,
"Masks From Color Swatches" : images.Masks_From_Color_Swatches,
"Masks From Colors" : images.Masks_From_Colors,
"Masks Unpack 4" : images.Masks_Unpack4,
"Masks Unpack 8" : images.Masks_Unpack8,
"Masks Unpack 16" : images.Masks_Unpack16,
"Image Sharpen FS" : images.ImageSharpenFS,
"Image Channels LAB" : images.Image_Channels_LAB,
"Image Median Blur" : images.ImageMedianBlur,
"Image Gaussian Blur" : images.ImageGaussianBlur,
"Image Pair Split" : images.Image_Pair_Split,
"Image Crop Location Exact" : images.Image_Crop_Location_Exact,
"Film Grain" : images.Film_Grain,
"Frequency Separation Linear Light" : images.Frequency_Separation_Linear_Light,
"Frequency Separation Hard Light" : images.Frequency_Separation_Hard_Light,
"Frequency Separation Hard Light LAB" : images.Frequency_Separation_Hard_Light_LAB,
"Frame Select" : images.Frame_Select,
"Frames Slice" : images.Frames_Slice,
"Frames Concat" : images.Frames_Concat,
"Mask Sketch" : images.MaskSketch,
"Image Grain Add" : images.Image_Grain_Add,
"Image Repeat Tile To Size" : images.ImageRepeatTileToSize,
"Frames Concat Masks" : nodes_latents.Frames_Concat_Masks,
"Frame Select Latent" : nodes_latents.Frame_Select_Latent,
"Frames Slice Latent" : nodes_latents.Frames_Slice_Latent,
"Frames Concat Latent" : nodes_latents.Frames_Concat_Latent,
"Frame Select Latent Raw" : nodes_latents.Frame_Select_Latent_Raw,
"Frames Slice Latent Raw" : nodes_latents.Frames_Slice_Latent_Raw,
"Frames Concat Latent Raw" : nodes_latents.Frames_Concat_Latent_Raw,
}
NODE_DISPLAY_NAME_MAPPINGS = {
}
WEB_DIRECTORY = "./web/js"
flags = {
"zampler" : False,
"beta_samplers" : False,
"legacy_samplers": False,
}
file_path = os.path.join(os.path.dirname(__file__), "zampler_test_code.txt")
if os.path.exists(file_path):
try:
from .zampler import add_zamplers
NODE_CLASS_MAPPINGS, extra_samplers = add_zamplers(NODE_CLASS_MAPPINGS, extra_samplers)
flags["zampler"] = True
RESplain("Importing zampler.")
except ImportError:
try:
import importlib
for module_name in ["RES4LYF.zampler", "res4lyf.zampler"]:
try:
zampler_module = importlib.import_module(module_name)
add_zamplers = zampler_module.add_zamplers
NODE_CLASS_MAPPINGS, extra_samplers = add_zamplers(NODE_CLASS_MAPPINGS, extra_samplers)
flags["zampler"] = True
RESplain(f"Importing zampler via {module_name}.")
break
except ImportError:
continue
else:
raise ImportError("Zampler module not found in any path")
except Exception as e:
print(f"(RES4LYF) Failed to import zamplers: {e}")
try:
from .beta import add_beta
NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers = add_beta(NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers)
flags["beta_samplers"] = True
RESplain("Importing beta samplers.")
except ImportError:
try:
import importlib
for module_name in ["RES4LYF.beta", "res4lyf.beta"]:
try:
beta_module = importlib.import_module(module_name)
add_beta = beta_module.add_beta
NODE_CLASS_MAPPINGS, extra_samplers = add_beta(NODE_CLASS_MAPPINGS, extra_samplers)
flags["beta_samplers"] = True
RESplain(f"Importing beta samplers via {module_name}.")
break
except ImportError:
continue
else:
raise ImportError("Beta module not found in any path")
except Exception as e:
print(f"(RES4LYF) Failed to import beta samplers: {e}")
try:
from .legacy import add_legacy
NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers = add_legacy(NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers)
flags["legacy_samplers"] = True
RESplain("Importing legacy samplers.")
except ImportError:
try:
import importlib
for module_name in ["RES4LYF.legacy", "res4lyf.legacy"]:
try:
legacy_module = importlib.import_module(module_name)
add_legacy = legacy_module.add_legacy
NODE_CLASS_MAPPINGS, extra_samplers = add_legacy(NODE_CLASS_MAPPINGS, extra_samplers)
flags["legacy_samplers"] = True
RESplain(f"Importing legacy samplers via {module_name}.")
break
except ImportError:
continue
else:
raise ImportError("Legacy module not found in any path")
except Exception as e:
print(f"(RES4LYF) Failed to import legacy samplers: {e}")
add_samplers()
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS", "WEB_DIRECTORY"]
================================================
FILE: attention_masks.py
================================================
import torch
import torch.nn.functional as F
from torch import Tensor
from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar
from einops import rearrange
import copy
import base64
import comfy.supported_models
import node_helpers
import gc
from .sigmas import get_sigmas
from .helper import initialize_or_scale, precision_tool, get_res4lyf_scheduler_list
from .latents import get_orthogonal, get_collinear, get_edge_mask, checkerboard_variable
from .res4lyf import RESplain
from .beta.constants import MAX_STEPS
def fp_not(tensor):
return 1 - tensor
def fp_or(tensor1, tensor2):
return torch.maximum(tensor1, tensor2)
def fp_and(tensor1, tensor2):
return torch.minimum(tensor1, tensor2)
def fp_and2(tensor1, tensor2):
triu = torch.triu(torch.ones_like(tensor1))
tril = torch.tril(torch.ones_like(tensor2))
triu.diagonal().fill_(0.0)
tril.diagonal().fill_(0.0)
new_tensor = tensor1 * triu + tensor2 * tril
new_tensor.diagonal().fill_(1.0)
return new_tensor
class CoreAttnMask:
def __init__(self, mask, mask_type=None, start_sigma=None, end_sigma=None, start_block=0, end_block=-1, idle_device='cpu', work_device='cuda'):
self.mask = mask.to(idle_device)
self.start_sigma = start_sigma
self.end_sigma = end_sigma
self.start_block = start_block
self.end_block = end_block
self.work_device = work_device
self.idle_device = idle_device
self.mask_type = mask_type
def set_sigma_range(self, start_sigma, end_sigma):
self.start_sigma = start_sigma
self.end_sigma = end_sigma
def set_block_range(self, start_block, end_block):
self.start_block = start_block
self.end_block = end_block
def __call__(self, weight=1.0, mask_type=None, transformer_options=None, block_idx=0):
"""
Return mask if block_idx is in range, sigma passed via transformer_options is in range, else return None. If no range is specified, return mask.
"""
if block_idx < self.start_block:
return None
if block_idx > self.end_block and self.end_block > 0:
return None
mask_type = self.mask_type if mask_type is None else mask_type
if transformer_options is None:
return self.mask.to(self.work_device) * weight if mask_type.startswith("gradient") else self.mask.to(self.work_device) > 0
sigma = transformer_options['sigmas'][0].to(self.start_sigma.device)
if self.start_sigma is not None and self.end_sigma is not None:
if self.start_sigma >= sigma > self.end_sigma:
return self.mask.to(self.work_device) * weight if mask_type.startswith("gradient") else self.mask.to(self.work_device) > 0
else:
return self.mask.to(self.work_device) * weight if mask_type.startswith("gradient") else self.mask.to(self.work_device) > 0
return None
class BaseAttentionMask:
def __init__(self, mask_type="gradient", edge_width=0, edge_width_list=None, use_self_attn_mask_list=None, dtype=torch.float16):
self.t = 1
self.img_len = 0
self.text_len = 0
self.text_off = 0
self.h = 0
self.w = 0
self.text_register_tokens = 0
self.context_lens = []
self.context_lens_list = []
self.masks = []
self.num_regions = 0
self.attn_mask = None
self.mask_type = mask_type
self.edge_width = edge_width
self.edge_width_list = edge_width_list
self.use_self_attn_mask_list = use_self_attn_mask_list
if mask_type == "gradient":
self.dtype = dtype
else:
self.dtype = torch.bool
def set_latent(self, latent):
if latent.ndim == 4:
self.b, self.c, self.h, self.w = latent.shape
elif latent.ndim == 5:
self.b, self.c, self.t, self.h, self.w = latent.shape
#if not isinstance(self.model_config, comfy.supported_models.Stable_Cascade_C):
self.h //= 2 # 16x16 PE patch_size = 2 1024x1024 rgb -> 128x128 16ch latent -> 64x64 img
self.w //= 2
self.img_len = self.h * self.w
def add_region(self, context, mask):
self.context_lens.append(context.shape[-2])
self.masks .append(mask)
self.text_len = sum(self.context_lens)
self.text_off = self.text_len
self.num_regions += 1
def add_region_sizes(self, context_size_list, mask):
self.context_lens .append(sum(context_size_list))
self.context_lens_list.append( context_size_list)
self.masks .append(mask)
self.text_len = sum(sum(sublist) for sublist in self.context_lens_list)
self.text_off = self.text_len
self.num_regions += 1
def add_regions(self, contexts, masks):
for context, mask in zip(contexts, masks):
self.add_region(context, mask)
def clear_regions(self):
self.context_lens = []
self.masks = []
self.text_len = 0
self.text_off = 0
self.num_regions = 0
def generate(self):
print("Initializing ergosphere.")
def get(self, **kwargs):
return self.attn_mask(**kwargs)
def attn_mask_recast(self, dtype):
if self.attn_mask.mask.dtype != dtype:
self.attn_mask.mask = self.attn_mask.mask.to(dtype)
class FullAttentionMask(BaseAttentionMask):
def generate(self, mask_type=None, dtype=None):
mask_type = self.mask_type if mask_type is None else mask_type
dtype = self.dtype if dtype is None else dtype
text_off = self.text_off
text_len = self.text_len
img_len = self.img_len
t = self.t
h = self.h
w = self.w
if self.edge_width_list is None:
self.edge_width_list = [self.edge_width] * self.num_regions
attn_mask = torch.zeros((text_off+t*img_len, text_len+t*img_len), dtype=dtype)
#cross_self_mask = torch.zeros((t*img_len, t*img_len), dtype=torch.float16)
prev_len = 0
for context_len, mask in zip(self.context_lens, self.masks):
img2txt_mask = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, context_len)
img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
curr_len = prev_len + context_len
attn_mask[prev_len:curr_len, prev_len:curr_len] = 1.0 # self TXT 2 TXT
attn_mask[prev_len:curr_len, text_len: ] = img2txt_mask.transpose(-1, -2).repeat(1,t) # cross TXT 2 regional IMG # txt2img_mask
attn_mask[text_off: , prev_len:curr_len] = img2txt_mask.repeat(t,1) # cross regional IMG 2 TXT
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], fp_and(img2txt_mask_sq.repeat(t,t), img2txt_mask_sq.transpose(-1, -2).repeat(t,t))) # img2txt_mask_sq, txt2img_mask_sq
#cross_self_mask[:,:] = fp_or(cross_self_mask, fp_and(img2txt_mask_sq.repeat(t,t), (1-img2txt_mask_sq).transpose(-1, -2).repeat(t,t)))
prev_len = curr_len
if self.mask_type.endswith("_masked") or self.mask_type.endswith("_A") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_A,unmasked"):
img2txt_mask_sq = F.interpolate(self.masks[0].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
if self.mask_type.endswith("_unmasked") or self.mask_type.endswith("_C") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_B,unmasked") or self.mask_type.endswith("_A,unmasked"):
img2txt_mask_sq = F.interpolate(self.masks[-1].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
if self.mask_type.endswith("_B") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_B,unmasked"):
img2txt_mask_sq = F.interpolate(self.masks[1].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
if self.edge_width > 0:
edge_mask = torch.zeros_like(self.masks[0])
for mask in self.masks:
edge_mask = fp_or(edge_mask, get_edge_mask(mask, dilation=self.edge_width))
img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
elif self.edge_width_list is not None:
edge_mask = torch.zeros_like(self.masks[0])
for mask, edge_width in zip(self.masks, self.edge_width_list):
if edge_width != 0:
edge_mask_new = get_edge_mask(mask, dilation=abs(edge_width))
edge_mask = fp_or(edge_mask, fp_and(edge_mask_new, mask)) #fp_and here is to ensure edge_mask only grows into the region for current mask
img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
if self.use_self_attn_mask_list is not None:
for mask, use_self_attn_mask in zip(self.masks, self.use_self_attn_mask_list):
if not use_self_attn_mask:
img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
#cmask = torch.zeros((text_len+t*img_len), dtype=torch.bfloat16)
#cmask[text_len:] = cross_self_mask #cmask[text_len:] + 0.25 * cross_self_mask
#self.cross_self_mask = CoreAttnMask(cmask[None,None,...,None], mask_type=mask_type) # shape: 1, 1, txt_len+img_len, 1
#self.cross_self_mask = CoreAttnMask(cross_self_mask[None,None,...,None], mask_type=mask_type) # shape: 1, 1, txt_len+img_len, 1
#self.cross_self_mask = CoreAttnMask(cross_self_mask[None,None,...,None], mask_type=mask_type) # shape: 1, 1, txt_len+img_len, 1
"""
cross_self_mask = F.interpolate(self.masks[0].unsqueeze(0).to(torch.bfloat16), (h, w), mode='nearest-exact').to(torch.bfloat16).flatten()#.unsqueeze(1) # .repeat(1, img_len)
edge_mask = get_edge_mask(self.masks[0], dilation=80)
edge_mask = F.interpolate(edge_mask.unsqueeze(0).to(torch.bfloat16), (h, w), mode='nearest-exact').flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = F.interpolate((1-self.masks[0]).unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask = attn_mask.to(torch.bfloat16)
edge_mask = edge_mask.to(torch.bfloat16)"""
self.cross_self_mask = CoreAttnMask(torch.zeros_like(img2txt_mask_sq).to(torch.bfloat16).squeeze(), mask_type=mask_type)
self.attn_mask = CoreAttnMask(attn_mask, mask_type=mask_type)
class FullAttentionMaskHiDream(BaseAttentionMask):
def generate(self, mask_type=None, dtype=None):
mask_type = self.mask_type if mask_type is None else mask_type
dtype = self.dtype if dtype is None else dtype
text_off = self.text_off
text_len = self.text_len
img_len = self.img_len
t = self.t
h = self.h
w = self.w
if self.edge_width_list is None:
self.edge_width_list = [self.edge_width] * self.num_regions
attn_mask = torch.zeros((text_off+t*img_len, text_len+t*img_len), dtype=dtype)
reg_num = 0
prev_len = 0
for context_len, mask in zip(self.context_lens, self.masks):
img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
curr_len = prev_len + context_len
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], fp_and(img2txt_mask_sq.repeat(t,t), img2txt_mask_sq.transpose(-1,-2).repeat(t,t))) # img2txt_mask_sq, txt2img_mask_sq
prev_len = curr_len
reg_num += 1
self.self_attn_mask = attn_mask[text_off:, text_len:].clone()
if self.mask_type.endswith("_masked") or self.mask_type.endswith("_A") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_A,unmasked"):
img2txt_mask_sq = F.interpolate(self.masks[0].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
if self.mask_type.endswith("_unmasked") or self.mask_type.endswith("_C") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_B,unmasked") or self.mask_type.endswith("_A,unmasked"):
img2txt_mask_sq = F.interpolate(self.masks[-1].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
if self.mask_type.endswith("_B") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_B,unmasked"):
img2txt_mask_sq = F.interpolate(self.masks[1].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
if self.edge_width > 0:
edge_mask = torch.zeros_like(self.masks[0])
for mask in self.masks:
edge_mask_new = get_edge_mask(mask, dilation=abs(self.edge_width))
edge_mask = fp_or(edge_mask, edge_mask_new)
#edge_mask = fp_or(edge_mask, get_edge_mask(mask, dilation=self.edge_width))
img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
elif self.edge_width < 0: # edge masks using cross-attn too
edge_mask = torch.zeros_like(self.masks[0])
for mask in self.masks:
edge_mask = fp_or(edge_mask, get_edge_mask(mask, dilation=abs(self.edge_width)))
img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
elif self.edge_width_list is not None:
edge_mask = torch.zeros_like(self.masks[0])
for mask, edge_width in zip(self.masks, self.edge_width_list):
if edge_width != 0:
edge_mask_new = get_edge_mask(mask, dilation=abs(edge_width))
edge_mask = fp_or(edge_mask, fp_and(edge_mask_new, mask)) #fp_and here is to ensure edge_mask only grows into the region for current mask
img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
if self.use_self_attn_mask_list is not None:
for mask, use_self_attn_mask in zip(self.masks, self.use_self_attn_mask_list):
if not use_self_attn_mask:
img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
text_len_t5 = sum(sublist[0] for sublist in self.context_lens_list)
img2txt_mask_t5 = torch.empty((img_len, text_len_t5)).to(attn_mask)
offset_t5_start = 0
reg_num_slice = 0
for context_len, mask_slice, edge_width in zip(self.context_lens, self.masks, self.edge_width_list):
if self.edge_width < 0: # edge masks using cross-attn too
mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(self.edge_width)))
if edge_width < 0: # edge masks using cross-attn too
mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(edge_width)))
slice_len = self.context_lens_list[reg_num_slice][0]
offset_t5_end = offset_t5_start + slice_len
img2txt_mask_slice = F.interpolate(mask_slice.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, slice_len)
img2txt_mask_t5[:, offset_t5_start:offset_t5_end] = img2txt_mask_slice
offset_t5_start = offset_t5_end
reg_num_slice += 1
text_len_llama = sum(sublist[1] for sublist in self.context_lens_list)
img2txt_mask_llama = torch.empty((img_len, text_len_llama)).to(attn_mask)
offset_llama_start = 0
reg_num_slice = 0
for context_len, mask_slice, edge_width in zip(self.context_lens, self.masks, self.edge_width_list):
if self.edge_width < 0: # edge masks using cross-attn too
mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(self.edge_width)))
if edge_width < 0: # edge masks using cross-attn too
mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(edge_width)))
slice_len = self.context_lens_list[reg_num_slice][1]
offset_llama_end = offset_llama_start + slice_len
img2txt_mask_slice = F.interpolate(mask_slice.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, slice_len)
img2txt_mask_llama[:, offset_llama_start:offset_llama_end] = img2txt_mask_slice
offset_llama_start = offset_llama_end
reg_num_slice += 1
img2txt_mask = torch.cat([img2txt_mask_t5, img2txt_mask_llama.repeat(1,2)], dim=-1)
attn_mask[:-text_off , :-text_len ] = attn_mask[text_off:, text_len:].clone()
attn_mask[:-text_off , -text_len:] = img2txt_mask
attn_mask[ -text_off:, :-text_len ] = img2txt_mask.transpose(-2,-1)
attn_mask[img_len:,img_len:] = 1.0 # txt -> txt "self-cross" attn is critical with hidream in most cases. checkerboard strategies are generally poo
# mask cross attention between text embeds
flat = [v for group in zip(*self.context_lens_list) for v in group]
checkvar = checkerboard_variable(flat)
attn_mask[img_len:, img_len:] = checkvar
self.attn_mask = CoreAttnMask(attn_mask, mask_type=mask_type)
#flat = [v for group in zip(*self.context_lens_list) for v in group]
def gen_edge_mask(self, block_idx):
mask_type = self.mask_type
dtype = self.dtype
text_off = self.text_off
text_len = self.text_len
img_len = self.img_len
t = self.t
h = self.h
w = self.w
if self.edge_width_list is None:
return self.attn_mask.mask
else:
#attn_mask = self.attn_mask.mask.clone()
attn_mask = torch.zeros_like(self.attn_mask.mask)
attn_mask[text_off:, text_len:] = self.self_attn_mask.clone()
edge_mask = torch.zeros_like(self.masks[0])
for mask, edge_width in zip(self.masks, self.edge_width_list):
#edge_width *= (block_idx/48)
edge_width *= torch.rand(1).item()
edge_width = int(edge_width)
if edge_width != 0:
#edge_width *= (block_idx/48)
#edge_width = int(edge_width)
edge_mask_new = get_edge_mask(mask, dilation=abs(edge_width))
edge_mask = fp_or(edge_mask, fp_and(edge_mask_new, mask)) #fp_and here is to ensure edge_mask only grows into the region for current mask
img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
if self.use_self_attn_mask_list is not None:
for mask, use_self_attn_mask in zip(self.masks, self.use_self_attn_mask_list):
if not use_self_attn_mask:
img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len)
attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq)
text_len_t5 = sum(sublist[0] for sublist in self.context_lens_list)
img2txt_mask_t5 = torch.empty((img_len, text_len_t5)).to(attn_mask)
offset_t5_start = 0
reg_num_slice = 0
for context_len, mask_slice, edge_width in zip(self.context_lens, self.masks, self.edge_width_list):
if self.edge_width < 0: # edge masks using cross-attn too
mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(self.edge_width)))
if edge_width < 0: # edge masks using cross-attn too
mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(edge_width)))
slice_len = self.context_lens_list[reg_num_slice][0]
offset_t5_end = offset_t5_start + slice_len
img2txt_mask_slice = F.interpolate(mask_slice.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, slice_len)
img2txt_mask_t5[:, offset_t5_start:offset_t5_end] = img2txt_mask_slice
offset_t5_start = offset_t5_end
reg_num_slice += 1
text_len_llama = sum(sublist[1] for sublist in self.context_lens_list)
img2txt_mask_llama = torch.empty((img_len, text_len_llama)).to(attn_mask)
offset_llama_start = 0
reg_num_slice = 0
for context_len, mask_slice, edge_width in zip(self.context_lens, self.masks, self.edge_width_list):
if self.edge_width < 0: # edge masks using cross-attn too
mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(self.edge_width)))
if edge_width < 0: # edge masks using cross-attn too
mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(edge_width)))
slice_len = self.context_lens_list[reg_num_slice][1]
offset_llama_end = offset_llama_start + slice_len
img2txt_mask_slice = F.interpolate(mask_slice.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, slice_len)
img2txt_mask_llama[:, offset_llama_start:offset_llama_end] = img2txt_mask_slice
offset_llama_start = offset_llama_end
reg_num_slice += 1
img2txt_mask = torch.cat([img2txt_mask_t5, img2txt_mask_llama.repeat(1,2)], dim=-1)
attn_mask[:-text_off , :-text_len ] = attn_mask[text_off:, text_len:].clone()
attn_mask[:-text_off , -text_len:] = img2txt_mask
attn_mask[ -text_off:, :-text_len ] = img2txt_mask.transpose(-2,-1)
attn_mask[img_len:,img_len:] = 1.0 # txt -> txt "self-cross" attn is critical with hidream in most cases. checkerboard strategies are generally poo
# mask cross attention between text embeds
flat = [v for group in zip(*self.context_lens_list) for v in group]
checkvar = checkerboard_variable(flat)
attn_mask[img_len:, img_len:] = checkvar
return attn_mask.to('cuda')
class RegionalContext:
def __init__(self, idle_device='cpu', work_device='cuda'):
self.context = None
self.clip_fea = None
self.llama3 = None
self.context_list = []
self.clip_fea_list = []
self.clip_pooled_list = []
self.llama3_list = []
self.t5_list = []
self.pooled_output = None
self.idle_device = idle_device
self.work_device = work_device
def add_region(self, context, pooled_output=None, clip_fea=None):
if self.context is not None:
self.context = torch.cat([self.context, context], dim=1)
else:
self.context = context
self.context_list.append(context)
if pooled_output is not None:
self.clip_pooled_list.append(pooled_output)
if clip_fea is not None:
if self.clip_fea is not None:
self.clip_fea = torch.cat([self.clip_fea, clip_fea], dim=1)
else:
self.clip_fea = clip_fea
self.clip_fea_list.append(clip_fea)
def add_region_clip_fea(self, clip_fea):
if self.clip_fea is not None:
self.clip_fea = torch.cat([self.clip_fea, clip_fea], dim=1)
else:
self.clip_fea = clip_fea
self.clip_fea_list.append(clip_fea)
def add_region_llama3(self, llama3):
if self.llama3 is not None:
self.llama3 = torch.cat([self.llama3, llama3], dim=-2) # base shape 1,32,128,4096
else:
self.llama3 = llama3
def add_region_hidream(self, t5, llama3):
self.t5_list .append(t5)
self.llama3_list.append(llama3)
def clear_regions(self):
if self.context is not None:
del self.context
self.context = None
if self.clip_fea is not None:
del self.clip_fea
self.clip_fea = None
if self.llama3 is not None:
del self.llama3
self.llama3 = None
del self.t5_list
del self.llama3_list
self.t5_list = []
self.llama3_list = []
def get(self):
return self.context.to(self.work_device)
def get_clip_fea(self):
if self.clip_fea is not None:
return self.clip_fea.to(self.work_device)
else:
return None
def get_llama3(self):
if self.llama3 is not None:
return self.llama3.to(self.work_device)
else:
return None
class CrossAttentionMask(BaseAttentionMask):
def generate(self, mask_type=None, dtype=None):
mask_type = self.mask_type if mask_type is None else mask_type
dtype = self.dtype if dtype is None else dtype
text_off = self.text_off
text_len = self.text_len
img_len = self.img_len
t = self.t
h = self.h
w = self.w
cross_attn_mask = torch.zeros((t * img_len, text_len), dtype=dtype)
prev_len = 0
for context_len, mask in zip(self.context_lens, self.masks):
cross_mask, self_mask = None, None
if mask.ndim == 6:
mask.squeeze_(0)
if mask.ndim == 3:
t_mask = mask.shape[0]
elif mask.ndim == 4:
if mask.shape[0] > 1:
cross_mask = mask[0]
if cross_mask.shape[-3] > self.t:
cross_mask = cross_mask[:self.t,...]
elif cross_mask.shape[-3] < self.t:
cross_mask = F.pad(cross_mask.permute(1,2,0), [0,self.t-cross_mask.shape[-3]], value=0).permute(2,0,1)
t_mask = self.t
else:
t_mask = mask.shape[-3]
mask.squeeze_(0)
elif mask.ndim == 5:
t_mask = mask.shape[-3]
else:
t_mask = 1
mask.unsqueeze_(0)
if cross_mask is not None:
img2txt_mask = F.interpolate(cross_mask.unsqueeze(0).unsqueeze(0).to(torch.float16), (t_mask, h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1)
else:
img2txt_mask = F.interpolate( mask.unsqueeze(0).unsqueeze(0).to(torch.float16), (t_mask, h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1)
if t_mask == 1: # ...why only if == 1?
img2txt_mask = img2txt_mask.repeat(1, context_len)
curr_len = prev_len + context_len
if t_mask == 1:
cross_attn_mask[:, prev_len:curr_len] = img2txt_mask.repeat(t,1)
else:
cross_attn_mask[:, prev_len:curr_len] = img2txt_mask
prev_len = curr_len
self.attn_mask = CoreAttnMask(cross_attn_mask, mask_type=mask_type)
class SplitAttentionMask(BaseAttentionMask):
def generate(self, mask_type=None, dtype=None):
mask_type = self.mask_type if mask_type is None else mask_type
dtype = self.dtype if dtype is None else dtype
text_off = self.text_off
text_len = self.text_len
img_len = self.img_len
t = self.t
h = self.h
w = self.w
if self.edge_width_list is None:
self.edge_width_list = [self.edge_width] * self.num_regions
cross_attn_mask = torch.zeros((t * img_len, text_len), dtype=dtype)
self_attn_mask = torch.zeros((t * img_len, t * img_len), dtype=dtype)
prev_len = 0
self_masks = []
for context_len, mask in zip(self.context_lens, self.masks):
cross_mask, self_mask = None, None
if mask.ndim == 6:
mask.squeeze_(0)
if mask.ndim == 3:
t_mask = mask.shape[0]
elif mask.ndim == 4:
if mask.shape[0] > 1:
cross_mask = mask[0]
if cross_mask.shape[-3] > self.t:
cross_mask = cross_mask[:self.t,...]
elif cross_mask.shape[-3] < self.t:
cross_mask = F.pad(cross_mask.permute(1,2,0), [0,self.t-cross_mask.shape[-3]], value=0).permute(2,0,1)
self_mask = mask[1]
if self_mask.shape[-3] > self.t:
self_mask = self_mask[:self.t,...]
elif self_mask.shape[-3] < self.t:
self_mask = F.pad(self_mask.permute(1,2,0), [0,self.t-self_mask.shape[-3]], value=0).permute(2,0,1)
t_mask = self.t
else:
t_mask = mask.shape[-3]
mask.squeeze_(0)
elif mask.ndim == 5:
t_mask = mask.shape[-3]
else:
t_mask = 1
mask.unsqueeze_(0)
if cross_mask is not None:
img2txt_mask = F.interpolate(cross_mask.unsqueeze(0).unsqueeze(0).to(torch.float16), (t_mask, h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1)
else:
img2txt_mask = F.interpolate( mask.unsqueeze(0).unsqueeze(0).to(torch.float16), (t_mask, h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1)
if t_mask == 1: # ...why only if == 1?
img2txt_mask = img2txt_mask.repeat(1, context_len)
curr_len = prev_len + context_len
if t_mask == 1:
cross_attn_mask[:, prev_len:curr_len] = img2txt_mask.repeat(t,1)
else:
cross_attn_mask[:, prev_len:curr_len] = img2txt_mask
if self_mask is not None:
img2txt_mask_sq = F.interpolate(self_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, t_mask * img_len)
else:
img2txt_mask_sq = F.interpolate( mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, t_mask * img_len)
self_masks.append(img2txt_mask_sq)
if t_mask > 1:
self_attn_mask = fp_or(self_attn_mask, fp_and(img2txt_mask_sq, img2txt_mask_sq.transpose(-1,-2)))
else:
self_attn_mask = fp_or(self_attn_mask, fp_and(img2txt_mask_sq.repeat(t,t), img2txt_mask_sq.transpose(-1,-2)).repeat(t,t))
prev_len = curr_len
if self.mask_type.endswith("_masked") or self.mask_type.endswith("_A") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_A,unmasked"):
self_attn_mask = fp_or(self_attn_mask, self_masks[0])
if self.mask_type.endswith("_unmasked") or self.mask_type.endswith("_C") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_B,unmasked") or self.mask_type.endswith("_A,unmasked"):
self_attn_mask = fp_or(self_attn_mask, self_masks[-1])
if self.mask_type.endswith("_B") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_B,unmasked"):
self_attn_mask = fp_or(self_attn_mask, self_masks[1])
if self.edge_width > 0:
edge_mask = torch.zeros_like(self.masks[0])
for mask in self.masks:
edge_mask_new = get_edge_mask(mask, dilation=abs(self.edge_width))
edge_mask = fp_or(edge_mask, edge_mask_new)
#edge_mask = fp_or(edge_mask, get_edge_mask(mask, dilation=self.edge_width))
img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, t_mask * img_len)
self_attn_mask = fp_or(self_attn_mask, img2txt_mask_sq)
elif self.edge_width_list is not None:
edge_mask = torch.zeros_like(self.masks[0])
for mask, edge_width in zip(self.masks, self.edge_width_list):
if edge_width != 0:
edge_mask_new = get_edge_mask(mask, dilation=abs(edge_width))
edge_mask = fp_or(edge_mask, fp_and(edge_mask_new, mask)) #fp_and here is to ensure edge_mask only grows into the region for current mask
img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, t_mask * img_len)
self_attn_mask = fp_or(self_attn_mask, img2txt_mask_sq)
if self.use_self_attn_mask_list is not None:
for mask, use_self_attn_mask in zip(self.masks, self.use_self_attn_mask_list):
if not use_self_attn_mask:
img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, t_mask * img_len)
self_attn_mask = fp_or(self_attn_mask, img2txt_mask_sq)
attn_mask = torch.cat([cross_attn_mask, self_attn_mask], dim=1)
self.attn_mask = CoreAttnMask(attn_mask, mask_type=mask_type)
================================================
FILE: aura/mmdit.py
================================================
#AuraFlow MMDiT
#Originally written by the AuraFlow Authors
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
#from comfy.ldm.modules.attention import optimized_attention
from comfy.ldm.modules.attention import attention_pytorch
import comfy.ops
import comfy.ldm.common_dit
from ..helper import ExtraOptions
from typing import Dict, Optional, Tuple, List
from ..latents import slerp_tensor, interpolate_spd, tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d
from ..style_transfer import apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch
from einops import rearrange
def modulate(x, shift, scale):
return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
def find_multiple(n: int, k: int) -> int:
if n % k == 0:
return n
return n + k - (n % k)
class MLP(nn.Module): # not executed directly with ReAura?
def __init__(self, dim, hidden_dim=None, dtype=None, device=None, operations=None) -> None:
super().__init__()
if hidden_dim is None:
hidden_dim = 4 * dim
n_hidden = int(2 * hidden_dim / 3)
n_hidden = find_multiple(n_hidden, 256)
self.c_fc1 = operations.Linear(dim, n_hidden, bias=False, dtype=dtype, device=device)
self.c_fc2 = operations.Linear(dim, n_hidden, bias=False, dtype=dtype, device=device)
self.c_proj = operations.Linear(n_hidden, dim, bias=False, dtype=dtype, device=device)
#@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor")
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = F.silu(self.c_fc1(x)) * self.c_fc2(x)
x = self.c_proj(x)
return x
class MultiHeadLayerNorm(nn.Module):
def __init__(self, hidden_size=None, eps=1e-5, dtype=None, device=None):
# Copy pasta from https://github.com/huggingface/transformers/blob/e5f71ecaae50ea476d1e12351003790273c4b2ed/src/transformers/models/cohere/modeling_cohere.py#L78
super().__init__()
self.weight = nn.Parameter(torch.empty(hidden_size, dtype=dtype, device=device))
self.variance_epsilon = eps
#@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor")
def forward(self, hidden_states):
input_dtype = hidden_states.dtype
hidden_states = hidden_states.to(torch.float32)
mean = hidden_states.mean(-1, keepdim=True)
variance = (hidden_states - mean).pow(2).mean(-1, keepdim=True)
hidden_states = (hidden_states - mean) * torch.rsqrt(
variance + self.variance_epsilon
)
hidden_states = self.weight.to(torch.float32) * hidden_states
return hidden_states.to(input_dtype)
class ReSingleAttention(nn.Module):
def __init__(self, dim, n_heads, mh_qknorm=False, dtype=None, device=None, operations=None):
super().__init__()
self.n_heads = n_heads
self.head_dim = dim // n_heads
# this is for cond
self.w1q = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
self.w1k = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
self.w1v = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
self.w1o = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
self.q_norm1 = (
MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device)
if mh_qknorm
else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device)
)
self.k_norm1 = (
MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device)
if mh_qknorm
else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device)
)
#@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") # c = 1,4552,3072 #operations.Linear = torch.nn.Linear with recast
def forward(self, c, mask=None):
bsz, seqlen1, _ = c.shape
q, k, v = self.w1q(c), self.w1k(c), self.w1v(c)
q = q.view(bsz, seqlen1, self.n_heads, self.head_dim)
k = k.view(bsz, seqlen1, self.n_heads, self.head_dim)
v = v.view(bsz, seqlen1, self.n_heads, self.head_dim)
q, k = self.q_norm1(q), self.k_norm1(k)
output = attention_pytorch(q.permute(0, 2, 1, 3), k.permute(0, 2, 1, 3), v.permute(0, 2, 1, 3), self.n_heads, skip_reshape=True, mask=mask)
c = self.w1o(output)
return c
class ReDoubleAttention(nn.Module):
def __init__(self, dim, n_heads, mh_qknorm=False, dtype=None, device=None, operations=None):
super().__init__()
self.n_heads = n_heads
self.head_dim = dim // n_heads
# this is for cond 1 (one) not l (L)
self.w1q = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
self.w1k = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
self.w1v = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
self.w1o = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
# this is for x
self.w2q = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
self.w2k = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
self.w2v = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
self.w2o = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device)
self.q_norm1 = (
MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device)
if mh_qknorm
else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device)
)
self.k_norm1 = (
MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device)
if mh_qknorm
else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device)
)
self.q_norm2 = (
MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device)
if mh_qknorm
else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device)
)
self.k_norm2 = (
MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device)
if mh_qknorm
else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device)
)
#@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") # c.shape 1,264,3072 x.shape 1,4032,3072
def forward(self, c, x, mask=None):
bsz, seqlen1, _ = c.shape
bsz, seqlen2, _ = x.shape
cq, ck, cv = self.w1q(c), self.w1k(c), self.w1v(c)
cq = cq.view(bsz, seqlen1, self.n_heads, self.head_dim)
ck = ck.view(bsz, seqlen1, self.n_heads, self.head_dim)
cv = cv.view(bsz, seqlen1, self.n_heads, self.head_dim)
cq, ck = self.q_norm1(cq), self.k_norm1(ck)
xq, xk, xv = self.w2q(x), self.w2k(x), self.w2v(x)
xq = xq.view(bsz, seqlen2, self.n_heads, self.head_dim)
xk = xk.view(bsz, seqlen2, self.n_heads, self.head_dim)
xv = xv.view(bsz, seqlen2, self.n_heads, self.head_dim)
xq, xk = self.q_norm2(xq), self.k_norm2(xk)
# concat all q,k,v.shape 1,4299,12,256 cq 1,267,12,256 xq 1,4032,12,256 self.n_heads 12
q, k, v = (
torch.cat([cq, xq], dim=1),
torch.cat([ck, xk], dim=1),
torch.cat([cv, xv], dim=1),
)
# attn mask would be 4299,4299
if mask is not None:
pass
output = attention_pytorch(q.permute(0, 2, 1, 3), k.permute(0, 2, 1, 3), v.permute(0, 2, 1, 3), self.n_heads, skip_reshape=True, mask=mask)
c, x = output.split([seqlen1, seqlen2], dim=1)
c = self.w1o(c)
x = self.w2o(x)
return c, x
class ReMMDiTBlock(nn.Module):
def __init__(self, dim, heads=8, global_conddim=1024, is_last=False, dtype=None, device=None, operations=None):
super().__init__()
self.normC1 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device)
self.normC2 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device)
if not is_last:
self.mlpC = MLP(dim, hidden_dim=dim * 4, dtype=dtype, device=device, operations=operations)
self.modC = nn.Sequential(
nn.SiLU(),
operations.Linear(global_conddim, 6 * dim, bias=False, dtype=dtype, device=device),
)
else:
self.modC = nn.Sequential(
nn.SiLU(),
operations.Linear(global_conddim, 2 * dim, bias=False, dtype=dtype, device=device),
)
self.normX1 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device)
self.normX2 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device)
self.mlpX = MLP(dim, hidden_dim=dim * 4, dtype=dtype, device=device, operations=operations)
self.modX = nn.Sequential(
nn.SiLU(),
operations.Linear(global_conddim, 6 * dim, bias=False, dtype=dtype, device=device),
)
self.attn = ReDoubleAttention(dim, heads, dtype=dtype, device=device, operations=operations)
self.is_last = is_last
#@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") # MAIN BLOCK
def forward(self, c, x, global_cond, mask=None, **kwargs):
cres, xres = c, x
cshift_msa, cscale_msa, cgate_msa, cshift_mlp, cscale_mlp, cgate_mlp = (
self.modC(global_cond).chunk(6, dim=1)
)
c = modulate(self.normC1(c), cshift_msa, cscale_msa)
# xpath
xshift_msa, xscale_msa, xgate_msa, xshift_mlp, xscale_mlp, xgate_mlp = (
self.modX(global_cond).chunk(6, dim=1)
)
x = modulate(self.normX1(x), xshift_msa, xscale_msa)
# attention c.shape 1,520,3072 x.shape 1,6144,3072
c, x = self.attn(c, x, mask=mask)
c = self.normC2(cres + cgate_msa.unsqueeze(1) * c)
c = cgate_mlp.unsqueeze(1) * self.mlpC(modulate(c, cshift_mlp, cscale_mlp))
c = cres + c
x = self.normX2(xres + xgate_msa.unsqueeze(1) * x)
x = xgate_mlp.unsqueeze(1) * self.mlpX(modulate(x, xshift_mlp, xscale_mlp))
x = xres + x
return c, x
class ReDiTBlock(nn.Module):
# like MMDiTBlock, but it only has X
def __init__(self, dim, heads=8, global_conddim=1024, dtype=None, device=None, operations=None):
super().__init__()
self.norm1 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device)
self.norm2 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device)
self.modCX = nn.Sequential(
nn.SiLU(),
operations.Linear(global_conddim, 6 * dim, bias=False, dtype=dtype, device=device),
)
self.attn = ReSingleAttention(dim, heads, dtype=dtype, device=device, operations=operations)
self.mlp = MLP(dim, hidden_dim=dim * 4, dtype=dtype, device=device, operations=operations)
#@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") # cx.shape 1,6664,3072 global_cond.shape 1,3072 mlpout.shape 1,6664,3072 float16
def forward(self, cx, global_cond, mask=None, **kwargs):
cxres = cx
shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.modCX(
global_cond
).chunk(6, dim=1)
cx = modulate(self.norm1(cx), shift_msa, scale_msa)
cx = self.attn(cx, mask=mask)
cx = self.norm2(cxres + gate_msa.unsqueeze(1) * cx)
mlpout = self.mlp(modulate(cx, shift_mlp, scale_mlp))
cx = gate_mlp.unsqueeze(1) * mlpout
cx = cxres + cx # residual connection
return cx
class TimestepEmbedder(nn.Module):
def __init__(self, hidden_size, frequency_embedding_size=256, dtype=None, device=None, operations=None):
super().__init__()
self.mlp = nn.Sequential(
operations.Linear(frequency_embedding_size, hidden_size, dtype=dtype, device=device),
nn.SiLU(),
operations.Linear(hidden_size, hidden_size, dtype=dtype, device=device),
)
self.frequency_embedding_size = frequency_embedding_size
@staticmethod
def timestep_embedding(t, dim, max_period=10000):
half = dim // 2
freqs = 1000 * torch.exp(
-math.log(max_period) * torch.arange(start=0, end=half) / half
).to(t.device)
args = t[:, None] * freqs[None]
embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
if dim % 2:
embedding = torch.cat(
[embedding, torch.zeros_like(embedding[:, :1])], dim=-1
)
return embedding
#@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor")
def forward(self, t, dtype):
t_freq = self.timestep_embedding(t, self.frequency_embedding_size).to(dtype)
t_emb = self.mlp(t_freq)
return t_emb
class ReMMDiT(nn.Module):
def __init__(
self,
in_channels=4,
out_channels=4,
patch_size=2,
dim=3072,
n_layers=36,
n_double_layers=4,
n_heads=12,
global_conddim=3072,
cond_seq_dim=2048,
max_seq=32 * 32,
device=None,
dtype=None,
operations=None,
):
super().__init__()
self.dtype = dtype
self.t_embedder = TimestepEmbedder(global_conddim, dtype=dtype, device=device, operations=operations)
self.cond_seq_linear = operations.Linear(
cond_seq_dim, dim, bias=False, dtype=dtype, device=device
) # linear for something like text sequence.
self.init_x_linear = operations.Linear(
patch_size * patch_size * in_channels, dim, dtype=dtype, device=device
) # init linear for patchified image.
self.positional_encoding = nn.Parameter(torch.empty(1, max_seq, dim, dtype=dtype, device=device))
self.register_tokens = nn.Parameter(torch.empty(1, 8, dim, dtype=dtype, device=device))
self.double_layers = nn.ModuleList([])
self.single_layers = nn.ModuleList([])
for idx in range(n_double_layers):
self.double_layers.append(
ReMMDiTBlock(dim, n_heads, global_conddim, is_last=(idx == n_layers - 1), dtype=dtype, device=device, operations=operations)
)
for idx in range(n_double_layers, n_layers):
self.single_layers.append(
ReDiTBlock(dim, n_heads, global_conddim, dtype=dtype, device=device, operations=operations)
)
self.final_linear = operations.Linear(
dim, patch_size * patch_size * out_channels, bias=False, dtype=dtype, device=device
)
self.modF = nn.Sequential(
nn.SiLU(),
operations.Linear(global_conddim, 2 * dim, bias=False, dtype=dtype, device=device),
)
self.out_channels = out_channels
self.patch_size = patch_size
self.n_double_layers = n_double_layers
self.n_layers = n_layers
self.h_max = round(max_seq**0.5)
self.w_max = round(max_seq**0.5)
@torch.no_grad()
def extend_pe(self, init_dim=(16, 16), target_dim=(64, 64)):
# extend pe
pe_data = self.positional_encoding.data.squeeze(0)[: init_dim[0] * init_dim[1]]
pe_as_2d = pe_data.view(init_dim[0], init_dim[1], -1).permute(2, 0, 1)
# now we need to extend this to target_dim. for this we will use interpolation.
# we will use torch.nn.functional.interpolate
pe_as_2d = F.interpolate(
pe_as_2d.unsqueeze(0), size=target_dim, mode="bilinear"
)
pe_new = pe_as_2d.squeeze(0).permute(1, 2, 0).flatten(0, 1)
self.positional_encoding.data = pe_new.unsqueeze(0).contiguous()
self.h_max, self.w_max = target_dim
def pe_selection_index_based_on_dim(self, h, w):
h_p, w_p = h // self.patch_size, w // self.patch_size
original_pe_indexes = torch.arange(self.positional_encoding.shape[1])
original_pe_indexes = original_pe_indexes.view(self.h_max, self.w_max)
starth = self.h_max // 2 - h_p // 2
endh = starth + h_p
startw = self.w_max // 2 - w_p // 2
endw = startw + w_p
original_pe_indexes = original_pe_indexes[
starth:endh, startw:endw
]
return original_pe_indexes.flatten()
def unpatchify(self, x, h, w):
c = self.out_channels
p = self.patch_size
x = x.reshape(shape=(x.shape[0], h, w, p, p, c))
x = torch.einsum("nhwpqc->nchpwq", x)
imgs = x.reshape(shape=(x.shape[0], c, h * p, w * p))
return imgs
def patchify(self, x):
B, C, H, W = x.size()
x = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size))
x = x.view(
B,
C,
(H + 1) // self.patch_size,
self.patch_size,
(W + 1) // self.patch_size,
self.patch_size,
)
x = x.permute(0, 2, 4, 1, 3, 5).flatten(-3).flatten(1, 2)
return x
def apply_pos_embeds(self, x, h, w):
h = (h + 1) // self.patch_size
w = (w + 1) // self.patch_size
max_dim = max(h, w)
cur_dim = self.h_max
pos_encoding = comfy.ops.cast_to_input(self.positional_encoding.reshape(1, cur_dim, cur_dim, -1), x)
if max_dim > cur_dim:
pos_encoding = F.interpolate(pos_encoding.movedim(-1, 1), (max_dim, max_dim), mode="bilinear").movedim(1, -1)
cur_dim = max_dim
from_h = (cur_dim - h) // 2
from_w = (cur_dim - w) // 2
pos_encoding = pos_encoding[:,from_h:from_h+h,from_w:from_w+w]
return x + pos_encoding.reshape(1, -1, self.positional_encoding.shape[-1])
def forward(self, x, timestep, context, transformer_options={}, **kwargs):
x_orig = x.clone()
context_orig = context.clone()
SIGMA = timestep[0].unsqueeze(0) #/ 1000
EO = transformer_options.get("ExtraOptions", ExtraOptions(""))
if EO is not None:
EO.mute = True
y0_style_pos = transformer_options.get("y0_style_pos")
y0_style_neg = transformer_options.get("y0_style_neg")
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight", 0.0)
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight", 0.0)
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight", 0.0)
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight", 0.0)
y0_style_neg_synweight *= y0_style_neg_weight
out_list = []
for i in range(len(transformer_options['cond_or_uncond'])):
UNCOND = transformer_options['cond_or_uncond'][i] == 1
x = x_orig[i][None,...].clone()
context = context_orig.clone()
patches_replace = transformer_options.get("patches_replace", {})
# patchify x, add PE
b, c, h, w = x.shape
h_len = ((h + (self.patch_size // 2)) // self.patch_size) # h_len 96
w_len = ((w + (self.patch_size // 2)) // self.patch_size) # w_len 96
x = self.init_x_linear(self.patchify(x)) # B, T_x, D
x = self.apply_pos_embeds(x, h, w)
if UNCOND:
transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0)
transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0)
transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig')
AttnMask = transformer_options.get('AttnMask', None)
RegContext = transformer_options.get('RegContext', None)
if AttnMask is not None and transformer_options['reg_cond_weight'] > 0.0:
AttnMask.attn_mask_recast(x.dtype)
context_tmp = RegContext.get().to(context.dtype)
#context_tmp = 0 * context_tmp.clone()
# If it's not a perfect factor, repeat and slice:
A = context[i][None,...].clone()
B = context_tmp
context_tmp = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :]
else:
context_tmp = context[i][None,...].clone()
elif UNCOND == False:
transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0)
transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0)
transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig')
AttnMask = transformer_options.get('AttnMask', None)
RegContext = transformer_options.get('RegContext', None)
if AttnMask is not None and transformer_options['reg_cond_weight'] > 0.0:
AttnMask.attn_mask_recast(x.dtype)
context_tmp = RegContext.get().to(context.dtype)
else:
context_tmp = context[i][None,...].clone()
if context_tmp is None:
context_tmp = context[i][None,...].clone()
# process conditions for MMDiT Blocks
#c_seq = context # B, T_c, D_c
c_seq = context_tmp # B, T_c, D_c
t = timestep
c = self.cond_seq_linear(c_seq) # B, T_c, D # 1,256,2048 ->
c = torch.cat([comfy.ops.cast_to_input(self.register_tokens, c).repeat(c.size(0), 1, 1), c], dim=1) #1,256,3072 -> 1,264,3072
global_cond = self.t_embedder(t, x.dtype) # B, D
global_cond = global_cond[i][None]
weight = transformer_options['reg_cond_weight'] if 'reg_cond_weight' in transformer_options else 0.0
floor = transformer_options['reg_cond_floor'] if 'reg_cond_floor' in transformer_options else 0.0
floor = min(floor, weight)
reg_cond_mask_expanded = transformer_options.get('reg_cond_mask_expanded')
reg_cond_mask_expanded = reg_cond_mask_expanded.to(img.dtype).to(img.device) if reg_cond_mask_expanded is not None else None
reg_cond_mask = None
AttnMask = transformer_options.get('AttnMask')
mask = None
if AttnMask is not None and weight > 0:
mask = AttnMask.get(weight=weight) #mask_obj[0](transformer_options, weight.item())
mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False
if not mask_type_bool:
mask = mask.to(x.dtype)
if mask_type_bool:
mask = F.pad(mask, (8, 0, 8, 0), value=True)
#mask = F.pad(mask, (0, 8, 0, 8), value=True)
else:
mask = F.pad(mask, (8, 0, 8, 0), value=1.0)
text_len = context.shape[1] # mask_obj[0].text_len
mask[text_len:,text_len:] = torch.clamp(mask[text_len:,text_len:], min=floor.to(mask.device)) #ORIGINAL SELF-ATTN REGION BLEED
reg_cond_mask = reg_cond_mask_expanded.unsqueeze(0).clone() if reg_cond_mask_expanded is not None else None
mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False
total_layers = len(self.double_layers) + len(self.single_layers)
blocks_replace = patches_replace.get("dit", {}) # context 1,259,2048 x 1,4032,3072
if len(self.double_layers) > 0:
for i, layer in enumerate(self.double_layers):
if mask_type_bool and weight < (i / (total_layers-1)) and mask is not None:
mask = mask.to(x.dtype)
if ("double_block", i) in blocks_replace:
def block_wrap(args):
out = {}
out["txt"], out["img"] = layer( args["txt"],
args["img"],
args["vec"])
return out
out = blocks_replace[("double_block", i)]({"img": x, "txt": c, "vec": global_cond}, {"original_block": block_wrap})
c = out["txt"]
x = out["img"]
else:
c, x = layer(c, x, global_cond, mask=mask, **kwargs)
if len(self.single_layers) > 0:
c_len = c.size(1)
cx = torch.cat([c, x], dim=1)
for i, layer in enumerate(self.single_layers):
if mask_type_bool and weight < ((len(self.double_layers) + i) / (total_layers-1)) and mask is not None:
mask = mask.to(x.dtype)
if ("single_block", i) in blocks_replace:
def block_wrap(args):
out = {}
out["img"] = layer(args["img"], args["vec"])
return out
out = blocks_replace[("single_block", i)]({"img": cx, "vec": global_cond}, {"original_block": block_wrap})
cx = out["img"]
else:
cx = layer(cx, global_cond, mask=mask, **kwargs)
x = cx[:, c_len:]
fshift, fscale = self.modF(global_cond).chunk(2, dim=1)
x = modulate(x, fshift, fscale)
x = self.final_linear(x)
x = self.unpatchify(x, (h + 1) // self.patch_size, (w + 1) // self.patch_size)[:,:,:h,:w]
out_list.append(x)
eps = torch.stack(out_list, dim=0).squeeze(dim=1)
freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method")
freqsep_sigma = transformer_options.get("freqsep_sigma")
freqsep_kernel_size = transformer_options.get("freqsep_kernel_size")
freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size")
freqsep_stride = transformer_options.get("freqsep_stride")
freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight")
freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight")
freqsep_mask = transformer_options.get("freqsep_mask")
dtype = eps.dtype if self.style_dtype is None else self.style_dtype
if y0_style_pos is not None:
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight")
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight")
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_pos_mask = transformer_options.get("y0_style_pos_mask")
y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge")
y0_style_pos = y0_style_pos.to(dtype)
x = x_orig.clone().to(dtype)
#x = x.to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised)
y0_adain_embed = self.Retrojector.embed(y0_style_pos)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if EO("scattersort_median_LP"):
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7))
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7))
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad)
denoised_spatial = denoised_spatial_LP + denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
if freqsep_mask is not None:
freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float()
freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact')
if hasattr(self, "adain_tile"):
tile_h, tile_w = self.adain_tile
denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if self.adain_flag:
h_off = tile_h // 2
w_off = tile_w // 2
denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off]
self.adain_flag = False
else:
h_off = 0
w_off = 0
self.adain_flag = True
tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w))
y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w))
tiles_out = []
for i in range(tiles.shape[0]):
tile = tiles[i].unsqueeze(0)
y0_tile = y0_tiles[i].unsqueeze(0)
tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
tile = adain_seq_inplace(tile, y0_tile)
tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w))
tiles_out_tensor = torch.cat(tiles_out, dim=0)
tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides)
if h_off == 0:
denoised_pretile = tiles_out_tensor
else:
denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor
denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"):
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median_pw":
denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight)
elif freqsep_lowpass_method == "gaussian_pw":
denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median":
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size)
elif freqsep_lowpass_method == "gaussian":
denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
if EO("adain_fs_uhp"):
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP
y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP
#denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP
denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP
denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
else:
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
if transformer_options.get('y0_standard_guide') is not None:
y0_standard_guide = transformer_options.get('y0_standard_guide')
y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide)
f_cs = self.StyleWCT.get(y0_standard_guide_embed)
self.y0_standard_guide = self.Retrojector.unembed(f_cs)
if transformer_options.get('y0_inv_standard_guide') is not None:
y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide')
y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide)
f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed)
self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs)
denoised_approx = self.Retrojector.unembed(denoised_embed)
eps = (x - denoised_approx) / sigma
if not UNCOND:
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
else:
eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
elif eps.shape[0] == 1 and UNCOND:
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
if y0_style_neg is not None:
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight")
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight")
y0_style_neg_synweight *= y0_style_neg_weight
y0_style_neg_mask = transformer_options.get("y0_style_neg_mask")
y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge")
y0_style_neg = y0_style_neg.to(dtype)
x = x.to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised)
y0_adain_embed = self.Retrojector.embed(y0_style_neg)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
denoised_approx = self.Retrojector.unembed(denoised_embed)
if UNCOND:
eps = (x - denoised_approx) / sigma
eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
elif eps.shape[0] == 1 and not UNCOND:
eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
return eps
def unpatchify2(x: torch.Tensor, H: int, W: int, patch_size: int) -> torch.Tensor:
"""
Invert patchify:
x: (B, N, C*p*p)
returns: (B, C, H, W), slicing off any padding
"""
B, N, CPP = x.shape
p = patch_size
Hp = math.ceil(H / p)
Wp = math.ceil(W / p)
C = CPP // (p * p)
assert N == Hp * Wp, f"Expected N={Hp*Wp} patches, got {N}"
x = x.view(B, Hp, Wp, CPP)
x = x.view(B, Hp, Wp, C, p, p)
x = x.permute(0, 3, 1, 4, 2, 5)
imgs = x.reshape(B, C, Hp * p, Wp * p)
return imgs[:, :, :H, :W]
================================================
FILE: beta/__init__.py
================================================
from . import rk_sampler_beta
from . import samplers
from . import samplers_extensions
def add_beta(NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers):
NODE_CLASS_MAPPINGS.update({
#"SharkSampler" : samplers.SharkSampler,
#"SharkSamplerAdvanced_Beta" : samplers.SharkSampler, #SharkSamplerAdvanced_Beta,
"SharkOptions_Beta" : samplers_extensions.SharkOptions_Beta,
"ClownOptions_SDE_Beta" : samplers_extensions.ClownOptions_SDE_Beta,
"ClownOptions_DetailBoost_Beta" : samplers_extensions.ClownOptions_DetailBoost_Beta,
"ClownGuide_Style_Beta" : samplers_extensions.ClownGuide_Style_Beta,
"ClownGuide_Style_EdgeWidth" : samplers_extensions.ClownGuide_Style_EdgeWidth,
"ClownGuide_Style_TileSize" : samplers_extensions.ClownGuide_Style_TileSize,
"ClownGuide_Beta" : samplers_extensions.ClownGuide_Beta,
"ClownGuides_Beta" : samplers_extensions.ClownGuides_Beta,
"ClownGuidesAB_Beta" : samplers_extensions.ClownGuidesAB_Beta,
"ClownGuides_Sync" : samplers_extensions.ClownGuides_Sync,
"ClownGuides_Sync_Advanced" : samplers_extensions.ClownGuides_Sync_Advanced,
"ClownGuide_FrequencySeparation" : samplers_extensions.ClownGuide_FrequencySeparation,
"SharkOptions_GuiderInput" : samplers_extensions.SharkOptions_GuiderInput,
"ClownOptions_ImplicitSteps_Beta" : samplers_extensions.ClownOptions_ImplicitSteps_Beta,
"ClownOptions_Cycles_Beta" : samplers_extensions.ClownOptions_Cycles_Beta,
"SharkOptions_GuideCond_Beta" : samplers_extensions.SharkOptions_GuideCond_Beta,
"SharkOptions_GuideConds_Beta" : samplers_extensions.SharkOptions_GuideConds_Beta,
"ClownOptions_Tile_Beta" : samplers_extensions.ClownOptions_Tile_Beta,
"ClownOptions_Tile_Advanced_Beta" : samplers_extensions.ClownOptions_Tile_Advanced_Beta,
"ClownGuide_Mean_Beta" : samplers_extensions.ClownGuide_Mean_Beta,
"ClownGuide_AdaIN_MMDiT_Beta" : samplers_extensions.ClownGuide_AdaIN_MMDiT_Beta,
"ClownGuide_AttnInj_MMDiT_Beta" : samplers_extensions.ClownGuide_AttnInj_MMDiT_Beta,
"ClownGuide_StyleNorm_Advanced_HiDream" : samplers_extensions.ClownGuide_StyleNorm_Advanced_HiDream,
"ClownOptions_SDE_Mask_Beta" : samplers_extensions.ClownOptions_SDE_Mask_Beta,
"ClownOptions_StepSize_Beta" : samplers_extensions.ClownOptions_StepSize_Beta,
"ClownOptions_SigmaScaling_Beta" : samplers_extensions.ClownOptions_SigmaScaling_Beta,
"ClownOptions_Momentum_Beta" : samplers_extensions.ClownOptions_Momentum_Beta,
"ClownOptions_SwapSampler_Beta" : samplers_extensions.ClownOptions_SwapSampler_Beta,
"ClownOptions_ExtraOptions_Beta" : samplers_extensions.ClownOptions_ExtraOptions_Beta,
"ClownOptions_Automation_Beta" : samplers_extensions.ClownOptions_Automation_Beta,
"SharkOptions_UltraCascade_Latent_Beta" : samplers_extensions.SharkOptions_UltraCascade_Latent_Beta,
"SharkOptions_StartStep_Beta" : samplers_extensions.SharkOptions_StartStep_Beta,
"ClownOptions_Combine" : samplers_extensions.ClownOptions_Combine,
"ClownOptions_Frameweights" : samplers_extensions.ClownOptions_Frameweights,
"ClownOptions_FlowGuide" : samplers_extensions.ClownOptions_FlowGuide,
"ClownStyle_Block_MMDiT" : samplers_extensions.ClownStyle_Block_MMDiT,
"ClownStyle_MMDiT" : samplers_extensions.ClownStyle_MMDiT,
"ClownStyle_Attn_MMDiT" : samplers_extensions.ClownStyle_Attn_MMDiT,
"ClownStyle_Boost" : samplers_extensions.ClownStyle_Boost,
"ClownStyle_UNet" : samplers_extensions.ClownStyle_UNet,
"ClownStyle_Block_UNet" : samplers_extensions.ClownStyle_Block_UNet,
"ClownStyle_Attn_UNet" : samplers_extensions.ClownStyle_Attn_UNet,
"ClownStyle_ResBlock_UNet" : samplers_extensions.ClownStyle_ResBlock_UNet,
"ClownStyle_SpatialBlock_UNet" : samplers_extensions.ClownStyle_SpatialBlock_UNet,
"ClownStyle_TransformerBlock_UNet": samplers_extensions.ClownStyle_TransformerBlock_UNet,
"ClownSamplerSelector_Beta" : samplers_extensions.ClownSamplerSelector_Beta,
"SharkSampler_Beta" : samplers.SharkSampler_Beta,
"SharkChainsampler_Beta" : samplers.SharkChainsampler_Beta,
"ClownsharKSampler_Beta" : samplers.ClownsharKSampler_Beta,
"ClownsharkChainsampler_Beta" : samplers.ClownsharkChainsampler_Beta,
"ClownSampler_Beta" : samplers.ClownSampler_Beta,
"ClownSamplerAdvanced_Beta" : samplers.ClownSamplerAdvanced_Beta,
"BongSampler" : samplers.BongSampler,
})
extra_samplers.update({
"res_2m" : sample_res_2m,
"res_3m" : sample_res_3m,
"res_2s" : sample_res_2s,
"res_3s" : sample_res_3s,
"res_5s" : sample_res_5s,
"res_6s" : sample_res_6s,
"res_2m_ode" : sample_res_2m_ode,
"res_3m_ode" : sample_res_3m_ode,
"res_2s_ode" : sample_res_2s_ode,
"res_3s_ode" : sample_res_3s_ode,
"res_5s_ode" : sample_res_5s_ode,
"res_6s_ode" : sample_res_6s_ode,
"deis_2m" : sample_deis_2m,
"deis_3m" : sample_deis_3m,
"deis_2m_ode": sample_deis_2m_ode,
"deis_3m_ode": sample_deis_3m_ode,
"rk_beta": rk_sampler_beta.sample_rk_beta,
})
NODE_DISPLAY_NAME_MAPPINGS.update({
#"SharkSampler" : "SharkSampler",
#"SharkSamplerAdvanced_Beta" : "SharkSamplerAdvanced",
"SharkSampler_Beta" : "SharkSampler",
"SharkChainsampler_Beta" : "SharkChainsampler",
"BongSampler" : "BongSampler",
"ClownsharKSampler_Beta" : "ClownsharKSampler",
"ClownsharkChainsampler_Beta" : "ClownsharkChainsampler",
"ClownSampler_Beta" : "ClownSampler",
"ClownSamplerAdvanced_Beta" : "ClownSamplerAdvanced",
"ClownGuide_Mean_Beta" : "ClownGuide Mean",
"ClownGuide_AdaIN_MMDiT_Beta" : "ClownGuide AdaIN (HiDream)",
"ClownGuide_AttnInj_MMDiT_Beta" : "ClownGuide AttnInj (HiDream)",
"ClownGuide_StyleNorm_Advanced_HiDream" : "ClownGuide_StyleNorm_Advanced_HiDream",
"ClownGuide_Style_Beta" : "ClownGuide Style",
"ClownGuide_Beta" : "ClownGuide",
"ClownGuides_Beta" : "ClownGuides",
"ClownGuides_Sync" : "ClownGuides Sync",
"ClownGuides_Sync_Advanced" : "ClownGuides Sync_Advanced",
"ClownGuidesAB_Beta" : "ClownGuidesAB",
"ClownSamplerSelector_Beta" : "ClownSamplerSelector",
"ClownOptions_SDE_Mask_Beta" : "ClownOptions SDE Mask",
"ClownOptions_SDE_Beta" : "ClownOptions SDE",
"ClownOptions_StepSize_Beta" : "ClownOptions Step Size",
"ClownOptions_DetailBoost_Beta" : "ClownOptions Detail Boost",
"ClownOptions_SigmaScaling_Beta" : "ClownOptions Sigma Scaling",
"ClownOptions_Momentum_Beta" : "ClownOptions Momentum",
"ClownOptions_ImplicitSteps_Beta" : "ClownOptions Implicit Steps",
"ClownOptions_Cycles_Beta" : "ClownOptions Cycles",
"ClownOptions_SwapSampler_Beta" : "ClownOptions Swap Sampler",
"ClownOptions_ExtraOptions_Beta" : "ClownOptions Extra Options",
"ClownOptions_Automation_Beta" : "ClownOptions Automation",
"SharkOptions_GuideCond_Beta" : "SharkOptions Guide Cond",
"SharkOptions_GuideConds_Beta" : "SharkOptions Guide Conds",
"SharkOptions_Beta" : "SharkOptions",
"SharkOptions_StartStep_Beta" : "SharkOptions Start Step",
"SharkOptions_UltraCascade_Latent_Beta" : "SharkOptions UltraCascade Latent",
"ClownOptions_Combine" : "ClownOptions Combine",
"ClownOptions_Frameweights" : "ClownOptions Frameweights",
"SharkOptions_GuiderInput" : "SharkOptions Guider Input",
"ClownOptions_Tile_Beta" : "ClownOptions Tile",
"ClownOptions_Tile_Advanced_Beta" : "ClownOptions Tile Advanced",
})
return NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers
def sample_res_2m(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_2m",)
def sample_res_3m(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_3m",)
def sample_res_2s(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_2s",)
def sample_res_3s(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_3s",)
def sample_res_5s(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_5s",)
def sample_res_6s(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_6s",)
def sample_res_2m_ode(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_2m", eta=0.0, eta_substep=0.0, )
def sample_res_3m_ode(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_3m", eta=0.0, eta_substep=0.0, )
def sample_res_2s_ode(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_2s", eta=0.0, eta_substep=0.0, )
def sample_res_3s_ode(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_3s", eta=0.0, eta_substep=0.0, )
def sample_res_5s_ode(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_5s", eta=0.0, eta_substep=0.0, )
def sample_res_6s_ode(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_6s", eta=0.0, eta_substep=0.0, )
def sample_deis_2m(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="deis_2m",)
def sample_deis_3m(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="deis_3m",)
def sample_deis_2m_ode(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="deis_2m", eta=0.0, eta_substep=0.0, )
def sample_deis_3m_ode(model, x, sigmas, extra_args=None, callback=None, disable=None):
return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="deis_3m", eta=0.0, eta_substep=0.0, )
================================================
FILE: beta/constants.py
================================================
MAX_STEPS = 10000
IMPLICIT_TYPE_NAMES = [
"rebound",
"retro-eta",
"bongmath",
"predictor-corrector",
]
GUIDE_MODE_NAMES_BETA_SIMPLE = [
"flow",
"sync",
"lure",
"data",
"epsilon",
"inversion",
"pseudoimplicit",
"fully_pseudoimplicit",
"none",
]
FRAME_WEIGHTS_CONFIG_NAMES = [
"frame_weights",
"frame_weights_inv",
"frame_targets"
]
FRAME_WEIGHTS_DYNAMICS_NAMES = [
"constant",
"linear",
"ease_out",
"ease_in",
"middle",
"trough",
]
FRAME_WEIGHTS_SCHEDULE_NAMES = [
"moderate_early",
"moderate_late",
"fast_early",
"fast_late",
"slow_early",
"slow_late",
]
GUIDE_MODE_NAMES_PSEUDOIMPLICIT = [
"pseudoimplicit",
"pseudoimplicit_cw",
"pseudoimplicit_projection",
"pseudoimplicit_projection_cw",
"fully_pseudoimplicit",
"fully_pseudoimplicit_projection",
"fully_pseudoimplicit_cw",
"fully_pseudoimplicit_projection_cw"
]
================================================
FILE: beta/deis_coefficients.py
================================================
# Adapted from: https://github.com/zju-pi/diff-sampler/blob/main/gits-main/solver_utils.py
# fixed the calcs for "rhoab" which suffered from an off-by-one error and made some other minor corrections
import torch
import numpy as np
# A pytorch reimplementation of DEIS (https://github.com/qsh-zh/deis).
#############################
### Utils for DEIS solver ###
#############################
#----------------------------------------------------------------------------
# Transfer from the input time (sigma) used in EDM to that (t) used in DEIS.
def edm2t(edm_steps, epsilon_s=1e-3, sigma_min=0.002, sigma_max=80):
vp_sigma = lambda beta_d, beta_min: lambda t: (np.e ** (0.5 * beta_d * (t ** 2) + beta_min * t) - 1) ** 0.5
vp_sigma_inv = lambda beta_d, beta_min: lambda sigma: ((beta_min ** 2 + 2 * beta_d * (sigma ** 2 + 1).log()).sqrt() - beta_min) / beta_d
vp_beta_d = 2 * (np.log(torch.tensor(sigma_min).cpu() ** 2 + 1) / epsilon_s - np.log(torch.tensor(sigma_max).cpu() ** 2 + 1)) / (epsilon_s - 1)
vp_beta_min = np.log(torch.tensor(sigma_max).cpu() ** 2 + 1) - 0.5 * vp_beta_d
t_steps = vp_sigma_inv(vp_beta_d.clone().detach().cpu(), vp_beta_min.clone().detach().cpu())(edm_steps.clone().detach().cpu())
return t_steps, vp_beta_min, vp_beta_d + vp_beta_min
#----------------------------------------------------------------------------
def cal_poly(prev_t, j, taus):
poly = 1
for k in range(prev_t.shape[0]):
if k == j:
continue
poly *= (taus - prev_t[k]) / (prev_t[j] - prev_t[k])
return poly
#----------------------------------------------------------------------------
# Transfer from t to alpha_t.
def t2alpha_fn(beta_0, beta_1, t):
return torch.exp(-0.5 * t ** 2 * (beta_1 - beta_0) - t * beta_0)
#----------------------------------------------------------------------------
def cal_integrand(beta_0, beta_1, taus):
with torch.inference_mode(mode=False):
taus = taus.clone()
beta_0 = beta_0.clone()
beta_1 = beta_1.clone()
with torch.enable_grad():
taus.requires_grad_(True)
alpha = t2alpha_fn(beta_0, beta_1, taus)
log_alpha = alpha.log()
log_alpha.sum().backward()
d_log_alpha_dtau = taus.grad
integrand = -0.5 * d_log_alpha_dtau / torch.sqrt(alpha * (1 - alpha))
return integrand
#----------------------------------------------------------------------------
def get_deis_coeff_list(t_steps, max_order, N=10000, deis_mode='tab'):
"""
Get the coefficient list for DEIS sampling.
Args:
t_steps: A pytorch tensor. The time steps for sampling.
max_order: A `int`. Maximum order of the solver. 1 <= max_order <= 4
N: A `int`. Use how many points to perform the numerical integration when deis_mode=='tab'.
deis_mode: A `str`. Select between 'tab' and 'rhoab'. Type of DEIS.
Returns:
A pytorch tensor. A batch of generated samples or sampling trajectories if return_inters=True.
"""
if deis_mode == 'tab':
t_steps, beta_0, beta_1 = edm2t(t_steps)
C = []
for i, (t_cur, t_next) in enumerate(zip(t_steps[:-1], t_steps[1:])):
order = min(i+1, max_order)
if order == 1:
C.append([])
else:
taus = torch.linspace(t_cur, t_next, N) # split the interval for integral approximation
dtau = (t_next - t_cur) / N
prev_t = t_steps[[i - k for k in range(order)]]
coeff_temp = []
integrand = cal_integrand(beta_0, beta_1, taus)
for j in range(order):
poly = cal_poly(prev_t, j, taus)
coeff_temp.append(torch.sum(integrand * poly) * dtau)
C.append(coeff_temp)
elif deis_mode == 'rhoab':
# Analytical solution, second order
def get_def_integral_2(a, b, start, end, c):
coeff = (end**3 - start**3) / 3 - (end**2 - start**2) * (a + b) / 2 + (end - start) * a * b
return coeff / ((c - a) * (c - b))
# Analytical solution, third order
def get_def_integral_3(a, b, c, start, end, d):
coeff = (end**4 - start**4) / 4 - (end**3 - start**3) * (a + b + c) / 3 \
+ (end**2 - start**2) * (a*b + a*c + b*c) / 2 - (end - start) * a * b * c
return coeff / ((d - a) * (d - b) * (d - c))
C = []
for i, (t_cur, t_next) in enumerate(zip(t_steps[:-1], t_steps[1:])):
order = min(i+1, max_order) #fixed order calcs
if order == 1:
C.append([])
else:
prev_t = t_steps[[i - k for k in range(order+1)]]
if order == 2:
coeff_cur = ((t_next - prev_t[1])**2 - (t_cur - prev_t[1])**2) / (2 * (t_cur - prev_t[1]))
coeff_prev1 = (t_next - t_cur)**2 / (2 * (prev_t[1] - t_cur))
coeff_temp = [coeff_cur, coeff_prev1]
elif order == 3:
coeff_cur = get_def_integral_2(prev_t[1], prev_t[2], t_cur, t_next, t_cur)
coeff_prev1 = get_def_integral_2(t_cur, prev_t[2], t_cur, t_next, prev_t[1])
coeff_prev2 = get_def_integral_2(t_cur, prev_t[1], t_cur, t_next, prev_t[2])
coeff_temp = [coeff_cur, coeff_prev1, coeff_prev2]
elif order == 4:
coeff_cur = get_def_integral_3(prev_t[1], prev_t[2], prev_t[3], t_cur, t_next, t_cur)
coeff_prev1 = get_def_integral_3(t_cur, prev_t[2], prev_t[3], t_cur, t_next, prev_t[1])
coeff_prev2 = get_def_integral_3(t_cur, prev_t[1], prev_t[3], t_cur, t_next, prev_t[2])
coeff_prev3 = get_def_integral_3(t_cur, prev_t[1], prev_t[2], t_cur, t_next, prev_t[3])
coeff_temp = [coeff_cur, coeff_prev1, coeff_prev2, coeff_prev3]
C.append(coeff_temp)
return C
================================================
FILE: beta/noise_classes.py
================================================
import torch
import torch.nn.functional as F
from torch import nn, Tensor, Generator, lerp
from torch.nn.functional import unfold
from torch.distributions import StudentT, Laplace
import numpy as np
import pywt
import functools
from typing import Callable, Tuple
from math import pi
from comfy.k_diffusion.sampling import BrownianTreeNoiseSampler
from ..res4lyf import RESplain
# Set this to "True" if you have installed OpenSimplex. Recommended to install without dependencies due to conflicting packages: pip3 install opensimplex --no-deps
OPENSIMPLEX_ENABLE = False
if OPENSIMPLEX_ENABLE:
from opensimplex import OpenSimplex
class PrecisionTool:
def __init__(self, cast_type='fp64'):
self.cast_type = cast_type
def cast_tensor(self, func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if self.cast_type not in ['fp64', 'fp32', 'fp16']:
return func(*args, **kwargs)
target_device = None
for arg in args:
if torch.is_tensor(arg):
target_device = arg.device
break
if target_device is None:
for v in kwargs.values():
if torch.is_tensor(v):
target_device = v.device
break
# recursively zs_recast tensors in nested dictionaries
def cast_and_move_to_device(data):
if torch.is_tensor(data):
if self.cast_type == 'fp64':
return data.to(torch.float64).to(target_device)
elif self.cast_type == 'fp32':
return data.to(torch.float32).to(target_device)
elif self.cast_type == 'fp16':
return data.to(torch.float16).to(target_device)
elif isinstance(data, dict):
return {k: cast_and_move_to_device(v) for k, v in data.items()}
return data
new_args = [cast_and_move_to_device(arg) for arg in args]
new_kwargs = {k: cast_and_move_to_device(v) for k, v in kwargs.items()}
return func(*new_args, **new_kwargs)
return wrapper
def set_cast_type(self, new_value):
if new_value in ['fp64', 'fp32', 'fp16']:
self.cast_type = new_value
else:
self.cast_type = 'fp64'
precision_tool = PrecisionTool(cast_type='fp64')
def noise_generator_factory(cls, **fixed_params):
def create_instance(**kwargs):
params = {**fixed_params, **kwargs}
return cls(**params)
return create_instance
def like(x):
return {'size': x.shape, 'dtype': x.dtype, 'layout': x.layout, 'device': x.device}
def scale_to_range(x, scaled_min = -1.73, scaled_max = 1.73): #1.73 is roughly the square root of 3
return scaled_min + (x - x.min()) * (scaled_max - scaled_min) / (x.max() - x.min())
def normalize(x):
return (x - x.mean())/ x.std()
class NoiseGenerator:
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None):
self.seed = seed
if x is not None:
self.x = x
self.size = x.shape
self.dtype = x.dtype
self.layout = x.layout
self.device = x.device
else:
self.x = torch.zeros(size, dtype, layout, device)
# allow overriding parameters imported from latent 'x' if specified
if size is not None:
self.size = size
if dtype is not None:
self.dtype = dtype
if layout is not None:
self.layout = layout
if device is not None:
self.device = device
self.sigma_max = sigma_max.to(device) if isinstance(sigma_max, torch.Tensor) else sigma_max
self.sigma_min = sigma_min.to(device) if isinstance(sigma_min, torch.Tensor) else sigma_min
self.last_seed = seed #- 1 #adapt for update being called during initialization, which increments last_seed
if generator is None:
self.generator = torch.Generator(device=self.device).manual_seed(seed)
else:
self.generator = generator
def __call__(self):
raise NotImplementedError("This method got clownsharked!")
def update(self, **kwargs):
#if not isinstance(self, BrownianNoiseGenerator):
# self.last_seed += 1
updated_values = []
for attribute_name, value in kwargs.items():
if value is not None:
setattr(self, attribute_name, value)
updated_values.append(getattr(self, attribute_name))
return tuple(updated_values)
class BrownianNoiseGenerator(NoiseGenerator):
def __call__(self, *, sigma=None, sigma_next=None, **kwargs):
return BrownianTreeNoiseSampler(self.x, self.sigma_min, self.sigma_max, seed=self.seed, cpu = self.device.type=='cpu')(sigma, sigma_next)
class FractalNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
alpha=0.0, k=1.0, scale=0.1):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(alpha=alpha, k=k, scale=scale)
def __call__(self, *, alpha=None, k=None, scale=None, **kwargs):
self.update(alpha=alpha, k=k, scale=scale)
self.last_seed += 1
if len(self.size) == 5:
b, c, t, h, w = self.size
else:
b, c, h, w = self.size
noise = torch.normal(mean=0.0, std=1.0, size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
y_freq = torch.fft.fftfreq(h, 1/h, device=self.device)
x_freq = torch.fft.fftfreq(w, 1/w, device=self.device)
if len(self.size) == 5:
t_freq = torch.fft.fftfreq(t, 1/t, device=self.device)
freq = torch.sqrt(t_freq[:, None, None]**2 + y_freq[None, :, None]**2 + x_freq[None, None, :]**2).clamp(min=1e-10)
else:
freq = torch.sqrt(y_freq[:, None]**2 + x_freq[None, :]**2).clamp(min=1e-10)
spectral_density = self.k / torch.pow(freq, self.alpha * self.scale)
spectral_density[0, 0] = 0
noise_fft = torch.fft.fftn(noise)
modified_fft = noise_fft * spectral_density
noise = torch.fft.ifftn(modified_fft).real
return noise / torch.std(noise)
class SimplexNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
scale=0.01):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.noise = OpenSimplex(seed=seed)
self.scale = scale
def __call__(self, *, scale=None, **kwargs):
self.update(scale=scale)
self.last_seed += 1
if len(self.size) == 5:
b, c, t, h, w = self.size
else:
b, c, h, w = self.size
noise_array = self.noise.noise3array(np.arange(w),np.arange(h),np.arange(c))
self.noise = OpenSimplex(seed=self.noise.get_seed()+1)
noise_tensor = torch.from_numpy(noise_array).to(self.device)
noise_tensor = torch.unsqueeze(noise_tensor, dim=0)
if len(self.size) == 5:
noise_tensor = torch.unsqueeze(noise_tensor, dim=0)
return noise_tensor / noise_tensor.std()
#return normalize(scale_to_range(noise_tensor))
class HiresPyramidNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
discount=0.7, mode='nearest-exact'):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(discount=discount, mode=mode)
def __call__(self, *, discount=None, mode=None, **kwargs):
self.update(discount=discount, mode=mode)
self.last_seed += 1
if len(self.size) == 5:
b, c, t, h, w = self.size
orig_h, orig_w, orig_t = h, w, t
u = nn.Upsample(size=(orig_h, orig_w, orig_t), mode=self.mode).to(self.device)
else:
b, c, h, w = self.size
orig_h, orig_w = h, w
orig_t = t = 1
u = nn.Upsample(size=(orig_h, orig_w), mode=self.mode).to(self.device)
noise = ((torch.rand(size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) - 0.5) * 2 * 1.73)
for i in range(4):
r = torch.rand(1, device=self.device, generator=self.generator).item() * 2 + 2
h, w = min(orig_h * 15, int(h * (r ** i))), min(orig_w * 15, int(w * (r ** i)))
if len(self.size) == 5:
t = min(orig_t * 15, int(t * (r ** i)))
new_noise = torch.randn((b, c, t, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
else:
new_noise = torch.randn((b, c, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
upsampled_noise = u(new_noise)
noise += upsampled_noise * self.discount ** i
if h >= orig_h * 15 or w >= orig_w * 15 or t >= orig_t * 15:
break # if resolution is too high
return noise / noise.std()
class PyramidNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
discount=0.8, mode='nearest-exact'):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(discount=discount, mode=mode)
def __call__(self, *, discount=None, mode=None, **kwargs):
self.update(discount=discount, mode=mode)
self.last_seed += 1
x = torch.zeros(self.size, dtype=self.dtype, layout=self.layout, device=self.device)
if len(self.size) == 5:
b, c, t, h, w = self.size
orig_h, orig_w, orig_t = h, w, t
else:
b, c, h, w = self.size
orig_h, orig_w = h, w
r = 1
for i in range(5):
r *= 2
if len(self.size) == 5:
scaledSize = (b, c, t * r, h * r, w * r)
origSize = (orig_h, orig_w, orig_t)
else:
scaledSize = (b, c, h * r, w * r)
origSize = (orig_h, orig_w)
x += torch.nn.functional.interpolate(
torch.normal(mean=0, std=0.5 ** i, size=scaledSize, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator),
size=origSize, mode=self.mode
) * self.discount ** i
return x / x.std()
class InterpolatedPyramidNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
discount=0.7, mode='nearest-exact'):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(discount=discount, mode=mode)
def __call__(self, *, discount=None, mode=None, **kwargs):
self.update(discount=discount, mode=mode)
self.last_seed += 1
if len(self.size) == 5:
b, c, t, h, w = self.size
orig_t, orig_h, orig_w = t, h, w
else:
b, c, h, w = self.size
orig_h, orig_w = h, w
t = orig_t = 1
noise = ((torch.rand(size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) - 0.5) * 2 * 1.73)
multipliers = [1]
for i in range(4):
r = torch.rand(1, device=self.device, generator=self.generator).item() * 2 + 2
h, w = min(orig_h * 15, int(h * (r ** i))), min(orig_w * 15, int(w * (r ** i)))
if len(self.size) == 5:
t = min(orig_t * 15, int(t * (r ** i)))
new_noise = torch.randn((b, c, t, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
upsampled_noise = nn.functional.interpolate(new_noise, size=(orig_t, orig_h, orig_w), mode=self.mode)
else:
new_noise = torch.randn((b, c, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
upsampled_noise = nn.functional.interpolate(new_noise, size=(orig_h, orig_w), mode=self.mode)
noise += upsampled_noise * self.discount ** i
multipliers.append( self.discount ** i)
if h >= orig_h * 15 or w >= orig_w * 15 or (len(self.size) == 5 and t >= orig_t * 15):
break # if resolution is too high
noise = noise / sum([m ** 2 for m in multipliers]) ** 0.5
return noise / noise.std()
class CascadeBPyramidNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
levels=10, mode='nearest', size_range=[1,16]):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(epsilon=x, levels=levels, mode=mode, size_range=size_range)
def __call__(self, *, levels=10, mode='nearest', size_range=[1,16], **kwargs):
self.update(levels=levels, mode=mode)
if len(self.size) == 5:
raise NotImplementedError("CascadeBPyramidNoiseGenerator is not implemented for 5D tensors (eg. video).")
self.last_seed += 1
b, c, h, w = self.size
epsilon = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
multipliers = [1]
for i in range(1, levels):
m = 0.75 ** i
h, w = int(epsilon.size(-2) // (2 ** i)), int(epsilon.size(-2) // (2 ** i))
if size_range is None or (size_range[0] <= h <= size_range[1] or size_range[0] <= w <= size_range[1]):
offset = torch.randn(epsilon.size(0), epsilon.size(1), h, w, device=self.device, generator=self.generator)
epsilon = epsilon + torch.nn.functional.interpolate(offset, size=epsilon.shape[-2:], mode=self.mode) * m
multipliers.append(m)
if h <= 1 or w <= 1:
break
epsilon = epsilon / sum([m ** 2 for m in multipliers]) ** 0.5 #divides the epsilon tensor by the square root of the sum of the squared multipliers.
return epsilon
class UniformNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
mean=0.0, scale=1.73):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(mean=mean, scale=scale)
def __call__(self, *, mean=None, scale=None, **kwargs):
self.update(mean=mean, scale=scale)
self.last_seed += 1
noise = torch.rand(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
return self.scale * 2 * (noise - 0.5) + self.mean
class GaussianNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
mean=0.0, std=1.0):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(mean=mean, std=std)
def __call__(self, *, mean=None, std=None, **kwargs):
self.update(mean=mean, std=std)
self.last_seed += 1
noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
return (noise - noise.mean()) / noise.std()
class GaussianBackwardsNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
mean=0.0, std=1.0):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(mean=mean, std=std)
def __call__(self, *, mean=None, std=None, **kwargs):
self.update(mean=mean, std=std)
self.last_seed += 1
RESplain("GaussianBackwards last seed:", self.generator.initial_seed())
self.generator.manual_seed(self.generator.initial_seed() - 1)
noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
return (noise - noise.mean()) / noise.std()
class LaplacianNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
loc=0, scale=1.0):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(loc=loc, scale=scale)
def __call__(self, *, loc=None, scale=None, **kwargs):
self.update(loc=loc, scale=scale)
self.last_seed += 1
# b, c, h, w = self.size
# orig_h, orig_w = h, w
noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 4.0
rng_state = torch.random.get_rng_state()
torch.manual_seed(self.generator.initial_seed())
laplacian_noise = Laplace(loc=self.loc, scale=self.scale).rsample(self.size).to(self.device)
self.generator.manual_seed(self.generator.initial_seed() + 1)
torch.random.set_rng_state(rng_state)
noise += laplacian_noise
return noise / noise.std()
class StudentTNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
loc=0, scale=0.2, df=1):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(loc=loc, scale=scale, df=df)
def __call__(self, *, loc=None, scale=None, df=None, **kwargs):
self.update(loc=loc, scale=scale, df=df)
self.last_seed += 1
# b, c, h, w = self.size
# orig_h, orig_w = h, w
rng_state = torch.random.get_rng_state()
torch.manual_seed(self.generator.initial_seed())
noise = StudentT(loc=self.loc, scale=self.scale, df=self.df).rsample(self.size)
if not isinstance(self, BrownianNoiseGenerator):
self.last_seed += 1
s = torch.quantile(noise.flatten(start_dim=1).abs(), 0.75, dim=-1)
if len(self.size) == 5:
s = s.reshape(*s.shape, 1, 1, 1, 1)
else:
s = s.reshape(*s.shape, 1, 1, 1)
noise = noise.clamp(-s, s)
noise_latent = torch.copysign(torch.pow(torch.abs(noise), 0.5), noise).to(self.device)
self.generator.manual_seed(self.generator.initial_seed() + 1)
torch.random.set_rng_state(rng_state)
return (noise_latent - noise_latent.mean()) / noise_latent.std()
class WaveletNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
wavelet='haar'):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(wavelet=wavelet)
def __call__(self, *, wavelet=None, **kwargs):
self.update(wavelet=wavelet)
self.last_seed += 1
# b, c, h, w = self.size
# orig_h, orig_w = h, w
# noise for spatial dimensions only
coeffs = pywt.wavedecn(torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator).to('cpu'), wavelet=self.wavelet, mode='periodization')
noise = pywt.waverecn(coeffs, wavelet=self.wavelet, mode='periodization')
noise_tensor = torch.tensor(noise, dtype=self.dtype, device=self.device)
noise_tensor = (noise_tensor - noise_tensor.mean()) / noise_tensor.std()
return noise_tensor
class PerlinNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
detail=0.0):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(detail=detail)
@staticmethod
def get_positions(block_shape: Tuple[int, int]) -> Tensor:
bh, bw = block_shape
positions = torch.stack(
torch.meshgrid(
[(torch.arange(b) + 0.5) / b for b in (bw, bh)],
indexing="xy",
),
-1,
).view(1, bh, bw, 1, 1, 2)
return positions
@staticmethod
def unfold_grid(vectors: Tensor) -> Tensor:
batch_size, _, gpy, gpx = vectors.shape
return (
unfold(vectors, (2, 2))
.view(batch_size, 2, 4, -1)
.permute(0, 2, 3, 1)
.view(batch_size, 4, gpy - 1, gpx - 1, 2)
)
@staticmethod
def smooth_step(t: Tensor) -> Tensor:
return t * t * (3.0 - 2.0 * t)
@staticmethod
def perlin_noise_tensor(
self,
vectors: Tensor, positions: Tensor, step: Callable = None
) -> Tensor:
if step is None:
step = self.smooth_step
batch_size = vectors.shape[0]
# grid height, grid width
gh, gw = vectors.shape[2:4]
# block height, block width
bh, bw = positions.shape[1:3]
for i in range(2):
if positions.shape[i + 3] not in (1, vectors.shape[i + 2]):
raise Exception(
f"Blocks shapes do not match: vectors ({vectors.shape[1]}, {vectors.shape[2]}), positions {gh}, {gw})"
)
if positions.shape[0] not in (1, batch_size):
raise Exception(
f"Batch sizes do not match: vectors ({vectors.shape[0]}), positions ({positions.shape[0]})"
)
vectors = vectors.view(batch_size, 4, 1, gh * gw, 2)
positions = positions.view(positions.shape[0], bh * bw, -1, 2)
step_x = step(positions[..., 0])
step_y = step(positions[..., 1])
row0 = lerp(
(vectors[:, 0] * positions).sum(dim=-1),
(vectors[:, 1] * (positions - positions.new_tensor((1, 0)))).sum(dim=-1),
step_x,
)
row1 = lerp(
(vectors[:, 2] * (positions - positions.new_tensor((0, 1)))).sum(dim=-1),
(vectors[:, 3] * (positions - positions.new_tensor((1, 1)))).sum(dim=-1),
step_x,
)
noise = lerp(row0, row1, step_y)
return (
noise.view(
batch_size,
bh,
bw,
gh,
gw,
)
.permute(0, 3, 1, 4, 2)
.reshape(batch_size, gh * bh, gw * bw)
)
def perlin_noise(
self,
grid_shape: Tuple[int, int],
out_shape: Tuple[int, int],
batch_size: int = 1,
generator: Generator = None,
*args,
**kwargs,
) -> Tensor:
gh, gw = grid_shape # grid height and width
oh, ow = out_shape # output height and width
bh, bw = oh // gh, ow // gw # block height and width
if oh != bh * gh:
raise Exception(f"Output height {oh} must be divisible by grid height {gh}")
if ow != bw * gw != 0:
raise Exception(f"Output width {ow} must be divisible by grid width {gw}")
angle = torch.empty(
[batch_size] + [s + 1 for s in grid_shape], device=self.device, *args, **kwargs
).uniform_(to=2.0 * pi, generator=self.generator)
# random vectors on grid points
vectors = self.unfold_grid(torch.stack((torch.cos(angle), torch.sin(angle)), dim=1))
# positions inside grid cells [0, 1)
positions = self.get_positions((bh, bw)).to(vectors)
return self.perlin_noise_tensor(self, vectors, positions).squeeze(0)
def __call__(self, *, detail=None, **kwargs):
self.update(detail=detail) #currently unused
self.last_seed += 1
if len(self.size) == 5:
b, c, t, h, w = self.size
noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 2.0
for tt in range(t):
for i in range(2):
perlin_slice = self.perlin_noise((h, w), (h, w), batch_size=c, generator=self.generator).to(self.device)
perlin_expanded = perlin_slice.unsqueeze(0).unsqueeze(2)
time_slice = noise[:, :, tt:tt+1, :, :]
noise[:, :, tt:tt+1, :, :] += perlin_expanded
else:
b, c, h, w = self.size
#orig_h, orig_w = h, w
noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 2.0
for i in range(2):
noise += self.perlin_noise((h, w), (h, w), batch_size=c, generator=self.generator).to(self.device)
return noise / noise.std()
from functools import partial
NOISE_GENERATOR_CLASSES = {
"fractal" : FractalNoiseGenerator,
"gaussian" : GaussianNoiseGenerator,
"gaussian_backwards" : GaussianBackwardsNoiseGenerator,
"uniform" : UniformNoiseGenerator,
"pyramid-cascade_B" : CascadeBPyramidNoiseGenerator,
"pyramid-interpolated" : InterpolatedPyramidNoiseGenerator,
"pyramid-bilinear" : noise_generator_factory(PyramidNoiseGenerator, mode='bilinear'),
"pyramid-bicubic" : noise_generator_factory(PyramidNoiseGenerator, mode='bicubic'),
"pyramid-nearest" : noise_generator_factory(PyramidNoiseGenerator, mode='nearest'),
"hires-pyramid-bilinear": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bilinear'),
"hires-pyramid-bicubic" : noise_generator_factory(HiresPyramidNoiseGenerator, mode='bicubic'),
"hires-pyramid-nearest" : noise_generator_factory(HiresPyramidNoiseGenerator, mode='nearest'),
"brownian" : BrownianNoiseGenerator,
"laplacian" : LaplacianNoiseGenerator,
"studentt" : StudentTNoiseGenerator,
"wavelet" : WaveletNoiseGenerator,
"perlin" : PerlinNoiseGenerator,
}
NOISE_GENERATOR_CLASSES_SIMPLE = {
"none" : GaussianNoiseGenerator,
"brownian" : BrownianNoiseGenerator,
"gaussian" : GaussianNoiseGenerator,
"gaussian_backwards" : GaussianBackwardsNoiseGenerator,
"laplacian" : LaplacianNoiseGenerator,
"perlin" : PerlinNoiseGenerator,
"studentt" : StudentTNoiseGenerator,
"uniform" : UniformNoiseGenerator,
"wavelet" : WaveletNoiseGenerator,
"brown" : noise_generator_factory(FractalNoiseGenerator, alpha=2.0),
"pink" : noise_generator_factory(FractalNoiseGenerator, alpha=1.0),
"white" : noise_generator_factory(FractalNoiseGenerator, alpha=0.0),
"blue" : noise_generator_factory(FractalNoiseGenerator, alpha=-1.0),
"violet" : noise_generator_factory(FractalNoiseGenerator, alpha=-2.0),
"ultraviolet_A" : noise_generator_factory(FractalNoiseGenerator, alpha=-3.0),
"ultraviolet_B" : noise_generator_factory(FractalNoiseGenerator, alpha=-4.0),
"ultraviolet_C" : noise_generator_factory(FractalNoiseGenerator, alpha=-5.0),
"hires-pyramid-bicubic" : noise_generator_factory(HiresPyramidNoiseGenerator, mode='bicubic'),
"hires-pyramid-bilinear": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bilinear'),
"hires-pyramid-nearest" : noise_generator_factory(HiresPyramidNoiseGenerator, mode='nearest'),
"pyramid-bicubic" : noise_generator_factory(PyramidNoiseGenerator, mode='bicubic'),
"pyramid-bilinear" : noise_generator_factory(PyramidNoiseGenerator, mode='bilinear'),
"pyramid-nearest" : noise_generator_factory(PyramidNoiseGenerator, mode='nearest'),
"pyramid-interpolated" : InterpolatedPyramidNoiseGenerator,
"pyramid-cascade_B" : CascadeBPyramidNoiseGenerator,
}
if OPENSIMPLEX_ENABLE:
NOISE_GENERATOR_CLASSES.update({
"simplex": SimplexNoiseGenerator,
})
NOISE_GENERATOR_NAMES = tuple(NOISE_GENERATOR_CLASSES.keys())
NOISE_GENERATOR_NAMES_SIMPLE = tuple(NOISE_GENERATOR_CLASSES_SIMPLE.keys())
@precision_tool.cast_tensor
def prepare_noise(latent_image, seed, noise_type, noise_inds=None, alpha=1.0, k=1.0): # adapted from comfy/sample.py: https://github.com/comfyanonymous/ComfyUI
#optional arg skip can be used to skip and discard x number of noise generations for a given seed
noise_func = NOISE_GENERATOR_CLASSES.get(noise_type)(x=latent_image, seed=seed, sigma_min=0.0291675, sigma_max=14.614642) # WARNING: HARDCODED SDXL SIGMA RANGE!
if noise_type == "fractal":
noise_func.alpha = alpha
noise_func.k = k
# from here until return is very similar to comfy/sample.py
if noise_inds is None:
return noise_func(sigma=14.614642, sigma_next=0.0291675)
unique_inds, inverse = np.unique(noise_inds, return_inverse=True)
noises = []
for i in range(unique_inds[-1]+1):
noise = noise_func(size = [1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, device=latent_image.device)
if i in unique_inds:
noises.append(noise)
noises = [noises[i] for i in inverse]
noises = torch.cat(noises, axis=0)
return noises
================================================
FILE: beta/phi_functions.py
================================================
import torch
import math
from typing import Optional
# Remainder solution
def _phi(j, neg_h):
remainder = torch.zeros_like(neg_h)
for k in range(j):
remainder += (neg_h)**k / math.factorial(k)
phi_j_h = ((neg_h).exp() - remainder) / (neg_h)**j
return phi_j_h
def calculate_gamma(c2, c3):
return (3*(c3**3) - 2*c3) / (c2*(2 - 3*c2))
# Exact analytic solution originally calculated by Clybius. https://github.com/Clybius/ComfyUI-Extra-Samplers/tree/main
def _gamma(n: int,) -> int:
"""
https://en.wikipedia.org/wiki/Gamma_function
for every positive integer n,
Γ(n) = (n-1)!
"""
return math.factorial(n-1)
def _incomplete_gamma(s: int, x: float, gamma_s: Optional[int] = None) -> float:
"""
https://en.wikipedia.org/wiki/Incomplete_gamma_function#Special_values
if s is a positive integer,
Γ(s, x) = (s-1)!*∑{k=0..s-1}(x^k/k!)
"""
if gamma_s is None:
gamma_s = _gamma(s)
sum_: float = 0
# {k=0..s-1} inclusive
for k in range(s):
numerator: float = x**k
denom: int = math.factorial(k)
quotient: float = numerator/denom
sum_ += quotient
incomplete_gamma_: float = sum_ * math.exp(-x) * gamma_s
return incomplete_gamma_
def phi(j: int, neg_h: float, ):
"""
For j={1,2,3}: you could alternatively use Kat's phi_1, phi_2, phi_3 which perform fewer steps
Lemma 1
https://arxiv.org/abs/2308.02157
ϕj(-h) = 1/h^j*∫{0..h}(e^(τ-h)*(τ^(j-1))/((j-1)!)dτ)
https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84
= 1/h^j*[(e^(-h)*(-τ)^(-j)*τ(j))/((j-1)!)]{0..h}
https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84+between+0+and+h
= 1/h^j*((e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h)))/(j-1)!)
= (e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h))/((j-1)!*h^j)
= (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/(j-1)!
= (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/Γ(j)
= (e^(-h)*(-h)^(-j)*(1-Γ(j,-h)/Γ(j))
requires j>0
"""
assert j > 0
gamma_: float = _gamma(j)
incomp_gamma_: float = _incomplete_gamma(j, neg_h, gamma_s=gamma_)
phi_: float = math.exp(neg_h) * neg_h**-j * (1-incomp_gamma_/gamma_)
return phi_
from mpmath import mp, mpf, factorial, exp
mp.dps = 80 # e.g. 80 decimal digits (~ float256)
def phi_mpmath_series(j: int, neg_h: float) -> float:
"""
Arbitrary‐precision phi_j(-h) via the remainder‐series definition,
using mpmath’s mpf and factorial.
"""
j = int(j)
z = mpf(float(neg_h))
S = mp.mpf('0') # S = sum_{k=0..j-1} z^k / k!
for k in range(j):
S += (z**k) / factorial(k)
phi_val = (exp(z) - S) / (z**j)
return float(phi_val)
class Phi:
def __init__(self, h, c, analytic_solution=False):
self.h = h
self.c = c
self.cache = {}
if analytic_solution:
#self.phi_f = superphi
self.phi_f = phi_mpmath_series
self.h = mpf(float(h))
self.c = [mpf(c_val) for c_val in c]
#self.c = c
#self.phi_f = phi
else:
self.phi_f = phi
#self.phi_f = _phi # remainder method
def __call__(self, j, i=-1):
if (j, i) in self.cache:
return self.cache[(j, i)]
if i < 0:
c = 1
else:
c = self.c[i - 1]
if c == 0:
self.cache[(j, i)] = 0
return 0
if j == 0 and type(c) in {float, torch.Tensor}:
result = math.exp(float(-self.h * c))
else:
result = self.phi_f(j, -self.h * c)
self.cache[(j, i)] = result
return result
from mpmath import mp, mpf, gamma, gammainc
def superphi(j: int, neg_h: float, ):
gamma_: float = gamma(j)
incomp_gamma_: float = gamma_ - gammainc(j, 0, float(neg_h))
phi_: float = float(math.exp(float(neg_h)) * neg_h**-j) * (1-incomp_gamma_/gamma_)
return float(phi_)
================================================
FILE: beta/rk_coefficients_beta.py
================================================
import torch
from torch import Tensor
import copy
import math
from mpmath import mp, mpf, factorial, exp
mp.dps = 80
from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar
from .deis_coefficients import get_deis_coeff_list
from .phi_functions import phi, Phi, calculate_gamma
from ..helper import ExtraOptions, get_extra_options_kv, extra_options_flag
from itertools import permutations, combinations
import random
from einops import rearrange, einsum
from ..res4lyf import get_display_sampler_category
# Samplers with free parameters (c1, c2, c3)
# 1 2 3
# X res_2s
# X X res_3s
# X res_3s_alt
# X res_3s_strehmel_weiner
# X dpmpp_2s (dpmpp_sde_2s has c2=1.0)
# X X dpmpp_3s
# X X irk_exp_diag_2s
RK_EXPONENTIAL_PREFIXES = (
"res",
"dpmpp",
"ddim",
"pec",
"etdrk",
"lawson",
"abnorsett",
)
def is_exponential(rk_type:str) -> bool:
return rk_type.startswith(RK_EXPONENTIAL_PREFIXES)
RK_SAMPLER_NAMES_BETA_FOLDERS = ["none",
"multistep/res_2m",
"multistep/res_3m",
"multistep/dpmpp_2m",
"multistep/dpmpp_3m",
"multistep/abnorsett_2m",
"multistep/abnorsett_3m",
"multistep/abnorsett_4m",
"multistep/deis_2m",
"multistep/deis_3m",
"multistep/deis_4m",
"exponential/res_2s_rkmk2e",
"exponential/res_2s",
"exponential/res_2s_stable",
"exponential/res_3s",
"exponential/res_3s_non-monotonic",
"exponential/res_3s_alt",
"exponential/res_3s_cox_matthews",
"exponential/res_3s_lie",
"exponential/res_3s_sunstar",
"exponential/res_3s_strehmel_weiner",
"exponential/res_4s_krogstad",
"exponential/res_4s_krogstad_alt",
"exponential/res_4s_strehmel_weiner",
"exponential/res_4s_strehmel_weiner_alt",
"exponential/res_4s_cox_matthews",
"exponential/res_4s_cfree4",
"exponential/res_4s_friedli",
"exponential/res_4s_minchev",
"exponential/res_4s_munthe-kaas",
"exponential/res_5s",
"exponential/res_5s_hochbruck-ostermann",
"exponential/res_6s",
"exponential/res_8s",
"exponential/res_8s_alt",
"exponential/res_10s",
"exponential/res_15s",
"exponential/res_16s",
"exponential/etdrk2_2s",
"exponential/etdrk3_a_3s",
"exponential/etdrk3_b_3s",
"exponential/etdrk4_4s",
"exponential/etdrk4_4s_alt",
"exponential/dpmpp_2s",
"exponential/dpmpp_sde_2s",
"exponential/dpmpp_3s",
"exponential/lawson2a_2s",
"exponential/lawson2b_2s",
"exponential/lawson4_4s",
"exponential/lawson41-gen_4s",
"exponential/lawson41-gen-mod_4s",
"exponential/ddim",
"hybrid/pec423_2h2s",
"hybrid/pec433_2h3s",
"hybrid/abnorsett2_1h2s",
"hybrid/abnorsett3_2h2s",
"hybrid/abnorsett4_3h2s",
"hybrid/lawson42-gen-mod_1h4s",
"hybrid/lawson43-gen-mod_2h4s",
"hybrid/lawson44-gen-mod_3h4s",
"hybrid/lawson45-gen-mod_4h4s",
"linear/ralston_2s",
"linear/ralston_3s",
"linear/ralston_4s",
"linear/midpoint_2s",
"linear/heun_2s",
"linear/heun_3s",
"linear/houwen-wray_3s",
"linear/kutta_3s",
"linear/ssprk3_3s",
"linear/ssprk4_4s",
"linear/rk38_4s",
"linear/rk4_4s",
"linear/rk5_7s",
"linear/rk6_7s",
"linear/bogacki-shampine_4s",
"linear/bogacki-shampine_7s",
"linear/dormand-prince_6s",
"linear/dormand-prince_13s",
"linear/tsi_7s",
#"verner_robust_16s",
"linear/euler",
"diag_implicit/irk_exp_diag_2s",
"diag_implicit/kraaijevanger_spijker_2s",
"diag_implicit/qin_zhang_2s",
"diag_implicit/pareschi_russo_2s",
"diag_implicit/pareschi_russo_alt_2s",
"diag_implicit/crouzeix_2s",
"diag_implicit/crouzeix_3s",
"diag_implicit/crouzeix_3s_alt",
"fully_implicit/gauss-legendre_2s",
"fully_implicit/gauss-legendre_3s",
"fully_implicit/gauss-legendre_4s",
"fully_implicit/gauss-legendre_4s_alternating_a",
"fully_implicit/gauss-legendre_4s_ascending_a",
"fully_implicit/gauss-legendre_4s_alt",
"fully_implicit/gauss-legendre_5s",
"fully_implicit/gauss-legendre_5s_ascending",
#"gauss-legendre_diag_8s",
"fully_implicit/radau_ia_2s",
"fully_implicit/radau_ia_3s",
"fully_implicit/radau_iia_2s",
"fully_implicit/radau_iia_3s",
"fully_implicit/radau_iia_3s_alt",
"fully_implicit/radau_iia_5s",
"fully_implicit/radau_iia_7s",
"fully_implicit/radau_iia_9s",
"fully_implicit/radau_iia_11s",
"fully_implicit/lobatto_iiia_2s",
"fully_implicit/lobatto_iiia_3s",
"fully_implicit/lobatto_iiia_4s",
"fully_implicit/lobatto_iiib_2s",
"fully_implicit/lobatto_iiib_3s",
"fully_implicit/lobatto_iiib_4s",
"fully_implicit/lobatto_iiic_2s",
"fully_implicit/lobatto_iiic_3s",
"fully_implicit/lobatto_iiic_4s",
"fully_implicit/lobatto_iiic_star_2s",
"fully_implicit/lobatto_iiic_star_3s",
"fully_implicit/lobatto_iiid_2s",
"fully_implicit/lobatto_iiid_3s",
]
RK_SAMPLER_NAMES_BETA_NO_FOLDERS = []
for orig_sampler_name in RK_SAMPLER_NAMES_BETA_FOLDERS[1:]:
sampler_name = orig_sampler_name.split("/")[-1] if "/" in orig_sampler_name else orig_sampler_name
RK_SAMPLER_NAMES_BETA_NO_FOLDERS.append(sampler_name)
IRK_SAMPLER_NAMES_BETA_FOLDERS = ["none", "use_explicit"]
for orig_sampler_name in RK_SAMPLER_NAMES_BETA_FOLDERS[1:]:
if "implicit" in orig_sampler_name and "/" in orig_sampler_name:
IRK_SAMPLER_NAMES_BETA_FOLDERS.append(orig_sampler_name)
IRK_SAMPLER_NAMES_BETA_NO_FOLDERS = []
for orig_sampler_name in IRK_SAMPLER_NAMES_BETA_FOLDERS[1:]:
sampler_name = orig_sampler_name.split("/")[-1] if "/" in orig_sampler_name else orig_sampler_name
IRK_SAMPLER_NAMES_BETA_NO_FOLDERS.append(sampler_name)
RK_SAMPLER_FOLDER_MAP = {}
for orig_sampler_name in RK_SAMPLER_NAMES_BETA_FOLDERS:
if "/" in orig_sampler_name:
folder, sampler_name = orig_sampler_name.rsplit("/", 1)
else:
folder = ""
sampler_name = orig_sampler_name
RK_SAMPLER_FOLDER_MAP[sampler_name] = folder
IRK_SAMPLER_FOLDER_MAP = {}
for orig_sampler_name in IRK_SAMPLER_NAMES_BETA_FOLDERS:
if "/" in orig_sampler_name:
folder, sampler_name = orig_sampler_name.rsplit("/", 1)
else:
folder = ""
sampler_name = orig_sampler_name
IRK_SAMPLER_FOLDER_MAP[sampler_name] = folder
class DualFormatList(list):
"""list that can match items with or without category prefixes."""
def __contains__(self, item):
if super().__contains__(item):
return True
if isinstance(item, str) and "/" in item:
base_name = item.split("/")[-1]
return any(name.endswith(base_name) for name in self)
return any(isinstance(opt, str) and opt.endswith("/" + item) for opt in self)
def get_sampler_name_list(nameOnly = False) -> list:
sampler_name_list = []
for sampler_name in RK_SAMPLER_FOLDER_MAP:
if get_display_sampler_category() and not nameOnly:
folder_name = RK_SAMPLER_FOLDER_MAP[sampler_name]
full_sampler_name = f"{folder_name}/{sampler_name}"
else:
full_sampler_name = sampler_name
if full_sampler_name[0] == "/":
full_sampler_name = full_sampler_name[1:]
sampler_name_list.append(full_sampler_name)
return DualFormatList(sampler_name_list)
def get_default_sampler_name(nameOnly = False) -> str:
default_sampler_name = "res_2m"
#find the key associated with the default value
for sampler_name in RK_SAMPLER_FOLDER_MAP:
if sampler_name == default_sampler_name:
if get_display_sampler_category() and not nameOnly:
folder_name = RK_SAMPLER_FOLDER_MAP[sampler_name]
return f"{folder_name}/{default_sampler_name}"
else:
return default_sampler_name
return default_sampler_name
def get_implicit_sampler_name_list(nameOnly = False) -> list:
implicit_sampler_name_list = []
for sampler_name in IRK_SAMPLER_FOLDER_MAP:
if get_display_sampler_category() and not nameOnly:
folder_name = IRK_SAMPLER_FOLDER_MAP[sampler_name]
full_sampler_name = f"{folder_name}/{sampler_name}"
else:
full_sampler_name = sampler_name
if full_sampler_name[0] == "/":
full_sampler_name = full_sampler_name[1:]
implicit_sampler_name_list.append(full_sampler_name)
return DualFormatList(implicit_sampler_name_list)
def get_default_implicit_sampler_name(nameOnly = False) -> str:
default_sampler_value = "explicit_diagonal"
#find the key associated with the default value
for sampler_name in IRK_SAMPLER_FOLDER_MAP:
if sampler_name == default_sampler_value:
if get_display_sampler_category() and not nameOnly:
folder_name = IRK_SAMPLER_FOLDER_MAP[sampler_name]
return f"{folder_name}/{default_sampler_value}"
else:
return default_sampler_value
return default_sampler_value
def get_full_sampler_name(sampler_name_in: str) -> str:
if "/" in sampler_name_in and sampler_name_in[0] != "/":
return sampler_name_in
for sampler_name in RK_SAMPLER_FOLDER_MAP:
if sampler_name == sampler_name_in:
folder_name = RK_SAMPLER_FOLDER_MAP[sampler_name]
return f"{folder_name}/{sampler_name}"
return sampler_name
def process_sampler_name(sampler_name_in):
processed_name = sampler_name_in.split("/")[-1] if "/" in sampler_name_in else sampler_name_in
full_sampler_name = get_full_sampler_name(sampler_name_in)
if sampler_name_in.startswith("fully_implicit") or sampler_name_in.startswith("diag_implicit"):
implicit_sampler_name = processed_name
sampler_name = "euler"
else:
sampler_name = processed_name
implicit_sampler_name = "use_explicit"
return sampler_name, implicit_sampler_name
alpha_crouzeix = (2/(3**0.5)) * math.cos(math.pi / 18)
gamma_crouzeix = (1/(3**0.5)) * math.cos(math.pi / 18) + 1/2 # Crouzeix & Raviart 1980; A-stable; pg 100 in Solving Ordinary Differential Equations II
delta_crouzeix = 1 / (6 * (2 * gamma_crouzeix - 1)**2) # Crouzeix & Raviart 1980; A-stable; pg 100 in Solving Ordinary Differential Equations II
rk_coeff = {
"gauss-legendre_diag_8s": ( # https://github.com/SciML/IRKGaussLegendre.jl/blob/master/src/IRKCoefficients.jl Antoñana, M., Makazaga, J., Murua, Ander. "Reducing and monitoring round-off error propagation for symplectic implicit Runge-Kutta schemes." Numerical Algorithms. 2017.
[
[
0.5,
0,0,0,0,0,0,0,
],
[
1.0818949631055814971365081647359309e00,
0.5,
0,0,0,0,0,0,
],
[
9.5995729622205494766003095439844678e-01,
1.0869589243008327233290709646162480e00,
0.5,
0,0,0,0,0,
],
[
1.0247213458032003748680445816450829e00,
9.5505887369737431186016905653386876e-01,
1.0880938387323083134422138713913203e00,
0.5,
0,0,0,0,
],
[
9.8302382676362890697311829123888390e-01,
1.0287597754747493109782305570410685e00,
9.5383453518519996588326911440754302e-01,
1.0883471611098277842507073806008045e00,
0.5,
0,0,0,
],
[
1.0122259141132982060539425317219435e00,
9.7998287236359129082628958290257329e-01,
1.0296038730649779374630125982121223e00,
9.5383453518519996588326911440754302e-01,
1.0880938387323083134422138713913203e00,
0.5,
0,0,
],
[
9.9125143323080263118822334698608777e-01,
1.0140743558891669291459735166525994e00,
9.7998287236359129082628958290257329e-01,
1.0287597754747493109782305570410685e00,
9.5505887369737431186016905653386876e-01,
1.0869589243008327233290709646162480e00,
0.5,
0,
],
[
1.0054828082532158826793409353214951e00,
9.9125143323080263118822334698608777e-01,
1.0122259141132982060539425317219435e00,
9.8302382676362890697311829123888390e-01,
1.0247213458032003748680445816450829e00,
9.5995729622205494766003095439844678e-01,
1.0818949631055814971365081647359309e00,
0.5,
],
],
[
[
5.0614268145188129576265677154981094e-02,
1.1119051722668723527217799721312045e-01,
1.5685332293894364366898110099330067e-01,
1.8134189168918099148257522463859781e-01,
1.8134189168918099148257522463859781e-01,
1.5685332293894364366898110099330067e-01,
1.1119051722668723527217799721312045e-01,
5.0614268145188129576265677154981094e-02,]
],
[
1.9855071751231884158219565715263505e-02, # 0.019855071751231884158219565715263505
1.0166676129318663020422303176208480e-01,
2.3723379504183550709113047540537686e-01,
4.0828267875217509753026192881990801e-01,
5.9171732124782490246973807118009203e-01,
7.6276620495816449290886952459462321e-01,
8.9833323870681336979577696823791522e-01,
9.8014492824876811584178043428473653e-01,
]
),
"gauss-legendre_5s": (
[
[4563950663 / 32115191526,
(310937500000000 / 2597974476091533 + 45156250000 * (739**0.5) / 8747388808389),
(310937500000000 / 2597974476091533 - 45156250000 * (739**0.5) / 8747388808389),
(5236016175 / 88357462711 + 709703235 * (739**0.5) / 353429850844),
(5236016175 / 88357462711 - 709703235 * (739**0.5) / 353429850844)],
[(4563950663 / 32115191526 - 38339103 * (739**0.5) / 6250000000),
(310937500000000 / 2597974476091533 + 9557056475401 * (739**0.5) / 3498955523355600000),
(310937500000000 / 2597974476091533 - 14074198220719489 * (739**0.5) / 3498955523355600000),
(5236016175 / 88357462711 + 5601362553163918341 * (739**0.5) / 2208936567775000000000),
(5236016175 / 88357462711 - 5040458465159165409 * (739**0.5) / 2208936567775000000000)],
[(4563950663 / 32115191526 + 38339103 * (739**0.5) / 6250000000),
(310937500000000 / 2597974476091533 + 14074198220719489 * (739**0.5) / 3498955523355600000),
(310937500000000 / 2597974476091533 - 9557056475401 * (739**0.5) / 3498955523355600000),
(5236016175 / 88357462711 + 5040458465159165409 * (739**0.5) / 2208936567775000000000),
(5236016175 / 88357462711 - 5601362553163918341 * (739**0.5) / 2208936567775000000000)],
[(4563950663 / 32115191526 - 38209 * (739**0.5) / 7938810),
(310937500000000 / 2597974476091533 - 359369071093750 * (739**0.5) / 70145310854471391),
(310937500000000 / 2597974476091533 - 323282178906250 * (739**0.5) / 70145310854471391),
(5236016175 / 88357462711 - 470139 * (739**0.5) / 1413719403376),
(5236016175 / 88357462711 - 44986764863 * (739**0.5) / 21205791050640)],
[(4563950663 / 32115191526 + 38209 * (739**0.5) / 7938810),
(310937500000000 / 2597974476091533 + 359369071093750 * (739**0.5) / 70145310854471391),
(310937500000000 / 2597974476091533 + 323282178906250 * (739**0.5) / 70145310854471391),
(5236016175 / 88357462711 + 44986764863 * (739**0.5) / 21205791050640),
(5236016175 / 88357462711 + 470139 * (739**0.5) / 1413719403376)],
],
[
[
4563950663 / 16057595763,
621875000000000 / 2597974476091533,
621875000000000 / 2597974476091533,
10472032350 / 88357462711,
10472032350 / 88357462711]
],
[
1 / 2,
1 / 2 - 99 * (739**0.5) / 10000, # smallest # 0.06941899716778028758987101075583196
1 / 2 + 99 * (739**0.5) / 10000, # largest
1 / 2 - (739**0.5) / 60,
1 / 2 + (739**0.5) / 60
]
),
"gauss-legendre_5s_ascending": (
[
[(4563950663 / 32115191526 - 38339103 * (739**0.5) / 6250000000),
(310937500000000 / 2597974476091533 + 9557056475401 * (739**0.5) / 3498955523355600000),
(310937500000000 / 2597974476091533 - 14074198220719489 * (739**0.5) / 3498955523355600000),
(5236016175 / 88357462711 + 5601362553163918341 * (739**0.5) / 2208936567775000000000),
(5236016175 / 88357462711 - 5040458465159165409 * (739**0.5) / 2208936567775000000000)],
[(4563950663 / 32115191526 - 38209 * (739**0.5) / 7938810),
(310937500000000 / 2597974476091533 - 359369071093750 * (739**0.5) / 70145310854471391),
(310937500000000 / 2597974476091533 - 323282178906250 * (739**0.5) / 70145310854471391),
(5236016175 / 88357462711 - 470139 * (739**0.5) / 1413719403376),
(5236016175 / 88357462711 - 44986764863 * (739**0.5) / 21205791050640)],
[4563950663 / 32115191526,
(310937500000000 / 2597974476091533 + 45156250000 * (739**0.5) / 8747388808389),
(310937500000000 / 2597974476091533 - 45156250000 * (739**0.5) / 8747388808389),
(5236016175 / 88357462711 + 709703235 * (739**0.5) / 353429850844),
(5236016175 / 88357462711 - 709703235 * (739**0.5) / 353429850844)],
[(4563950663 / 32115191526 + 38209 * (739**0.5) / 7938810),
(310937500000000 / 2597974476091533 + 359369071093750 * (739**0.5) / 70145310854471391),
(310937500000000 / 2597974476091533 + 323282178906250 * (739**0.5) / 70145310854471391),
(5236016175 / 88357462711 + 44986764863 * (739**0.5) / 21205791050640),
(5236016175 / 88357462711 + 470139 * (739**0.5) / 1413719403376)],
[(4563950663 / 32115191526 + 38339103 * (739**0.5) / 6250000000),
(310937500000000 / 2597974476091533 + 14074198220719489 * (739**0.5) / 3498955523355600000),
(310937500000000 / 2597974476091533 - 9557056475401 * (739**0.5) / 3498955523355600000),
(5236016175 / 88357462711 + 5040458465159165409 * (739**0.5) / 2208936567775000000000),
(5236016175 / 88357462711 - 5601362553163918341 * (739**0.5) / 2208936567775000000000)],
],
[
[621875000000000 / 2597974476091533,
10472032350 / 88357462711,
4563950663 / 16057595763,
10472032350 / 88357462711,
621875000000000 / 2597974476091533,]
],
[
1 / 2 - 99 * (739**0.5) / 10000, # smallest # 0.06941899716778028758987101075583196
1 / 2 - (739**0.5) / 60,
1 / 2,
1 / 2 + (739**0.5) / 60,
1 / 2 + 99 * (739**0.5) / 10000, # largest
]
),
"gauss-legendre_4s_alt": ( # https://ijstre.com/Publish/072016/371428231.pdf Four Point Gauss Quadrature Runge – Kuta Method Of Order 8 For Ordinary Differential Equations
[
[1633/18780 - 71*206**0.5/96717000,
134689/939000 - 927*206**0.5/78250,
171511/939000 - 927*206**0.5/78250,
1633/18780 - 121979*206**0.5/19343400,],
[7623/78250 - 1629507*206**0.5/257912000,
347013/21284000,
-118701/4256800,
7623/78250 + 1629507*206**0.5/257912000,],
[8978/117375 + 1629507*206**0.5/257912000,
4520423/12770400,
10410661/63852000,
8978/117375 + 1629507*206**0.5/257912000,],
[1633/18780 + 121979*206**0.5/19343400,
134689/939000 + 927*206**0.5/78250,
171511/939000 + 927*206**0.5/78250,
1633/18780 + 71*206**0.5/96717000,],
],
[
[1633/9390,
1531/4695,
1531/4695,
1633/9390,]
],
[
1/2 - 3*206**0.5 / 100, # 0.06941899716778028758987101075583196
33/100,
67/100,
1/2 + 3*206**0.5 / 100,
]
),
"gauss-legendre_4s": (
[
[1/4, 1/4 - 15**0.5 / 6, 1/4 + 15**0.5 / 6, 1/4],
[1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6, 1/4],
[1/4, 1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6],
[1/4 - 15**0.5 / 6, 1/4, 1/4 + 15**0.5 / 6, 1/4],
],
[
[
1/8,
3/8,
3/8,
1/8,]
],
[
1/2 - 15**0.5 / 10, # 0.11270166537925831148207346002176004
1/2 + 15**0.5 / 10,
1/2 + 15**0.5 / 10,
1/2 - 15**0.5 / 10
]
),
"gauss-legendre_4s_alternating_a": (
[
[1/4, 1/4 - 15**0.5 / 6, 1/4 + 15**0.5 / 6, 1/4],
[1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6, 1/4],
[1/4 - 15**0.5 / 6, 1/4, 1/4 + 15**0.5 / 6, 1/4],
[1/4, 1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6],
],
[
[
1/8,
3/8,
1/8,
3/8,]
],
[
1/2 - 15**0.5 / 10, # 0.11270166537925831148207346002176004
1/2 + 15**0.5 / 10,
1/2 - 15**0.5 / 10,
1/2 + 15**0.5 / 10,
]
),
"gauss-legendre_4s_ascending_a": (
[
[1/4 - 15**0.5 / 6, 1/4, 1/4 + 15**0.5 / 6, 1/4],
[1/4, 1/4 - 15**0.5 / 6, 1/4 + 15**0.5 / 6, 1/4],
[1/4, 1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6],
[1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6, 1/4],
],
[
[
1/8,
3/8,
1/8,
3/8,]
],
[
1/2 - 15**0.5 / 10,
1/2 - 15**0.5 / 10,
1/2 + 15**0.5 / 10,
1/2 + 15**0.5 / 10,
]
),
"gauss-legendre_3s": ( # Kunzmann-Butcher, IRK, order 6 https://www.math.umd.edu/~mariakc/SymplecticMethods.pdf
[
[5/36, 2/9 - 15**0.5 / 15, 5/36 - 15**0.5 / 30],
[5/36 + 15**0.5 / 24, 2/9, 5/36 - 15**0.5 / 24],
[5/36 + 15**0.5 / 30, 2/9 + 15**0.5 / 15, 5/36],
],
[
[5/18, 4/9, 5/18]
],
[1/2 - 15**0.5 / 10, 1/2, 1/2 + 15**0.5 / 10] # 0.11270166537925831148207346002176004
),
"gauss-legendre_2s": ( # Hammer-Hollingsworth, IRK, order 4 https://www.math.umd.edu/~mariakc/SymplecticMethods.pdf
[
[1/4, 1/4 - 3**0.5 / 6],
[1/4 + 3**0.5 / 6, 1/4],
],
[
[1/2, 1/2],
],
[1/2 - 3**0.5 / 6, 1/2 + 3**0.5 / 6] # 0.21132486540518711774542560974902127 # 1/2 - (1/2)*(1/3**0.5) 1/2 + (1/2)*(1/3**0.5)
),
"radau_iia_4s": (
[
[],
[],
[],
[],
],
[
[1/4, 1/4, 1/4, 1/4],
],
[(1/11)*(4-6**0.5), (1/11)*(4+6**0.5), 1/2, 1]
),
"radau_iia_11s": ( # https://github.com/ryanelandt/Radau.jl
[
[0.015280520789530369, -0.0057824996781311875, 0.00438010324638053, -0.0036210375473319026, 0.003092977042211754, -0.0026728314041491816, 0.0023050911672361017, -0.001955651803123845, 0.001593873849612843, -0.0011728625554916522, 0.00046993032567176855],
[0.03288397668119629, 0.03451351173940448, -0.009285420023734383, 0.00641324617083941, -0.005095455838865143, 0.0042460913690415955, -0.0035876743372353984, 0.003006834900018004, -0.0024326697483255453, 0.0017827773828584467, -0.0007131464180496306],
[0.029332502147155125, 0.0741624250777296, 0.0511486756872502, -0.012005023334430185, 0.00777794727524923, -0.005944695307870806, 0.004802655736401176, -0.003923600687657003, 0.003127328539609814, -0.0022731432208609507, 0.0009063777304940358],
[0.03111455337650569, 0.06578995121943092, 0.10929962691877611, 0.06381051663919307, -0.013853591907177828, 0.008557435524870741, -0.0063076358492939275, 0.004913357548166058, -0.0038139969541068734, 0.0027334306074068546, -0.0010839711153145738],
[0.03005269275666326, 0.07011284530154153, 0.09714692306747527, 0.1353916024839275, 0.07147107644479529, -0.014710238851905252, 0.008733191499420551, -0.00619941303527863, 0.004591640852897801, -0.003213330884490774, 0.001262857250740274],
[0.030728073929609766, 0.06751925856657341, 0.10334060375222286, 0.12083525997663601, 0.1503267876654705, 0.07350931976920085, -0.014512880052768446, 0.008296645645701008, -0.0056128275038367864, 0.003766229774466616, -0.001457705807615146],
[0.030292022376401242, 0.06914472100762357, 0.09972096441656238, 0.12801064060853223, 0.13493180383303127, 0.15289670039157693, 0.06975993047996924, -0.013274545709987746, 0.007258767272883859, -0.0044843888202694155, 0.0016878458203415244],
[0.03056654381836576, 0.06813851028407998, 0.10188107030389015, 0.12403361149690655, 0.14211431622263265, 0.13829395377418516, 0.14289135336320447, 0.06052636121446275, -0.011077739682117822, 0.005598667203856668, -0.0019877269625674446],
[0.030406629901865028, 0.06871880785022819, 0.10066095698900927, 0.12619527453091425, 0.13848875677027936, 0.14450773783254642, 0.13065188915037962, 0.1211140113707743, 0.046555483263607714, -0.008026200095719123, 0.002437640226261747],
[0.030484119381553945, 0.06843924691254653, 0.10124184869598654, 0.1251873187759311, 0.14011843430039864, 0.14190386755377057, 0.13500342651951197, 0.11262869537051934, 0.08930604389562254, 0.028969664972192485, -0.0033116985395201413],
[0.03046254890606557, 0.06851684106660112, 0.10108155427001221, 0.1254626888485642, 0.13968066655169153, 0.14258278197050367, 0.1339335430948421, 0.11443306192448831, 0.08565880960332992, 0.04992304095398403, 0.008264462809917356],
],
[
[0.03046254890606557, 0.06851684106660112, 0.10108155427001221, 0.1254626888485642, 0.13968066655169153, 0.14258278197050367, 0.1339335430948421, 0.11443306192448831, 0.08565880960332992, 0.04992304095398403, 0.008264462809917356],
],
[0.011917613432415597, 0.061732071877148124, 0.14711144964307024, 0.26115967600845624, 0.39463984688578685, 0.5367387657156606, 0.6759444616766651, 0.8009789210368988, 0.9017109877901468, 0.9699709678385136, 1.0]
),
"radau_iia_9s": ( # https://github.com/ryanelandt/Radau.jl
[
[0.022788378793458776, -0.008589639752938945, 0.0064510291769951465, -0.00525752869975012, 0.004388833809361376, -0.0036512155536904674, 0.0029404882137526148, -0.002149274163882554, 0.0008588433240576261],
[0.04890795244749932, 0.05070205048082808, -0.013523807196021316, 0.009209373774305071, -0.0071557133175369604, 0.005747246699432309, -0.004542582976394536, 0.003288161681791406, -0.0013090736941094112],
[0.04374276009157137, 0.10830189290274023, 0.07291956593742897, -0.016879877210016055, 0.010704551844802781, -0.007901946479238777, 0.005991406942179993, -0.0042480244399873135, 0.0016781498061495626],
[0.04624923745394712, 0.09656073072680009, 0.1542987697900386, 0.0867193693031384, -0.018451639643617873, 0.011036658729835513, -0.007673280940281649, 0.005228224999889903, -0.00203590583647778],
[0.044834436586910234, 0.10230684968594175, 0.13821763419236816, 0.18126393468214014, 0.09043360059943564, -0.018085063366782478, 0.010193387903855565, -0.006405265418866323, 0.0024271699384239612],
[0.045658755719323395, 0.09914547048938806, 0.14574704049699233, 0.16364828123387398, 0.18594458734451902, 0.08361326023153276, -0.015809936146309538, 0.00813825269404473, -0.002910469207795258],
[0.045200600187797244, 0.10085370671832047, 0.1419422367945749, 0.17118947183876332, 0.1697833861700019, 0.16776829117327952, 0.06707903432249304, -0.011792230536025322, 0.0036092462886493657],
[0.045416516657427734, 0.10006040244594375, 0.143652840987038, 0.16801908098069296, 0.17556076841841367, 0.15588627045003361, 0.12889391351650395, 0.04281082602522101, -0.004934574771244536],
[0.04535725246164146, 0.10027664901227598, 0.1431933481786156, 0.16884698348796479, 0.1741365013864833, 0.158421887835219, 0.12359468910229653, 0.0738270095231577, 0.012345679012345678],
],
[
[0.04535725246164146, 0.10027664901227598, 0.1431933481786156, 0.16884698348796479, 0.1741365013864833, 0.158421887835219, 0.12359468910229653, 0.0738270095231577, 0.012345679012345678],
],
[0.01777991514736345, 0.09132360789979396, 0.21430847939563075, 0.37193216458327233, 0.5451866848034267, 0.7131752428555694, 0.8556337429578544, 0.9553660447100302, 1.0]
),
"radau_iia_7s": ( # https://github.com/ryanelandt/Radau.jl
[
[0.03754626499392133, -0.0140393345564604, 0.0103527896007423, -0.008158322540275011, 0.006388413879534685, -0.004602326779148656, 0.0018289425614706437],
[0.08014759651561897, 0.08106206398589154, -0.021237992120711036, 0.014000291238817119, -0.010234185730090163, 0.0071534651513645905, -0.0028126393724067235],
[0.0720638469418819, 0.17106835498388662, 0.10961456404007211, -0.024619871728984055, 0.014760377043950817, -0.009575259396791401, 0.0036726783971383057],
[0.07570512581982441, 0.15409015514217114, 0.2271077366732024, 0.11747818703702478, -0.023810827153044174, 0.012709985533661206, -0.004608844281289633],
[0.07391234216319184, 0.16135560761594242, 0.2068672415521042, 0.23700711534269422, 0.10308679353381345, -0.018854139152580447, 0.0058589009748887914],
[0.07470556205979623, 0.1583072238724687, 0.21415342326720002, 0.21987784703186003, 0.19875212168063527, 0.06926550160550914, -0.00811600819772829],
[0.07449423555601031, 0.15910211573365074, 0.21235188950297781, 0.22355491450728324, 0.19047493682211558, 0.1196137446126562, 0.02040816326530612],
],
[
[0.07449423555601031, 0.15910211573365074, 0.21235188950297781, 0.22355491450728324, 0.19047493682211558, 0.1196137446126562, 0.02040816326530612],
],
[0.029316427159784893, 0.1480785996684843, 0.3369846902811543, 0.5586715187715501, 0.7692338620300545, 0.9269456713197411, 1.0]
),
"radau_iia_5s": ( # https://github.com/ryanelandt/Radau.jl
[
[0.07299886431790333, -0.02673533110794557, 0.018676929763984353, -0.01287910609330644, 0.005042839233882015],
[0.15377523147918246, 0.14621486784749352, -0.03644456890512809, 0.02123306311930472, -0.007935579902728777],
[0.14006304568480987, 0.29896712949128346, 0.16758507013524895, -0.03396910168661774, 0.010944288744192253],
[0.14489430810953477, 0.2765000687601592, 0.32579792291042103, 0.12875675325490976, -0.015708917378805327],
[0.14371356079122594, 0.28135601514946207, 0.31182652297574126, 0.22310390108357075, 0.04],
],
[
[0.14371356079122594, 0.28135601514946207, 0.31182652297574126, 0.22310390108357075, 0.04],
],
[0.05710419611451768, 0.2768430136381238, 0.5835904323689168, 0.8602401356562195, 1.0]
),
"radau_iia_3s": (
[
[11/45 - 7*6**0.5 / 360, 37/225 - 169*6**0.5 / 1800, -2/225 + 6**0.5 / 75],
[37/225 + 169*6**0.5 / 1800, 11/45 + 7*6**0.5 / 360, -2/225 - 6**0.5 / 75],
[4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9],
],
[
[4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9],
],
[2/5 - 6**0.5 / 10, 2/5 + 6**0.5 / 10, 1.]
),
"radau_iia_3s_alt": ( # https://www.unige.ch/~hairer/preprints/coimbra.pdf (page 7) Ehle [Eh69] and Axelsson [Ax69]
[
[(88 - 7*6**0.5) / 360, (296 - 169*6**0.5) / 1800, (-2 + 3 * 6**0.5) / 225],
[(296 + 169*6**0.5) / 1800, (88 + 7*6**0.5) / 360, (-2 - 3*6**0.5) / 225],
[(16 - 6**0.5) / 36, (16 + 6**0.5) / 36, 1/9],
],
[
[
(16 - 6**0.5) / 36,
(16 + 6**0.5) / 36,
1/9],
],
[
(4 - 6**0.5) / 10,
(4 + 6**0.5) / 10,
1.]
),
"radau_iia_2s": (
[
[5/12, -1/12],
[3/4, 1/4],
],
[
[3/4, 1/4],
],
[1/3, 1]
),
"radau_ia_3s": (
[
[1/9, (-1-6**0.5)/18, (-1+6**0.5)/18],
[1/9, 11/45 + 7*6**0.5/360, 11/45-43*6**0.5/360],
[1/9, 11/45-43*6**0.5/360, 11/45 + 7*6**0.5/360],
],
[
[1/9, 4/9 + 6**0.5/36, 4/9 - 6**0.5/36],
],
[0, 3/5-6**0.5/10, 3/5+6**0.5/10]
),
"radau_ia_2s": (
[
[1/4, -1/4],
[1/4, 5/12],
],
[
[1/4, 3/4],
],
[0, 2/3]
),
"lobatto_iiia_4s": ( #6th order
[
[0, 0, 0, 0],
[(11+5**0.5)/120, (25-5**0.5)/120, (25-13*5**0.5)/120, (-1+5**0.5)/120],
[(11-5**0.5)/120, (25+13*5**0.5)/120, (25+5**0.5)/120, (-1-5**0.5)/120],
[1/12, 5/12, 5/12, 1/12],
],
[
[1/12, 5/12, 5/12, 1/12],
],
[0, (5-5**0.5)/10, (5+5**0.5)/10, 1]
),
"lobatto_iiib_4s": ( #6th order
[
[1/12, (-1-5**0.5)/24, (-1+5**0.5)/24, 0],
[1/12, (25+5**0.5)/120, (25-13*5**0.5)/120, 0],
[1/12, (25+13*5**0.5)/120, (25-5**0.5)/120, 0],
[1/12, (11-5**0.5)/24, (11+5**0.5)/24, 0],
],
[
[1/12, 5/12, 5/12, 1/12],
],
[0, (5-5**0.5)/10, (5+5**0.5)/10, 1]
),
"lobatto_iiic_4s": ( #6th order
[
[1/12, (-5**0.5)/12, (5**0.5)/12, -1/12],
[1/12, 1/4, (10-7*5**0.5)/60, (5**0.5)/60],
[1/12, (10+7*5**0.5)/60, 1/4, (-5**0.5)/60],
[1/12, 5/12, 5/12, 1/12],
],
[
[1/12, 5/12, 5/12, 1/12],
],
[0, (5-5**0.5)/10, (5+5**0.5)/10, 1]
),
"lobatto_iiia_3s": (
[
[0, 0, 0],
[5/24, 1/3, -1/24],
[1/6, 2/3, 1/6],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1]
),
"lobatto_iiia_2s": (
[
[0, 0],
[1/2, 1/2],
],
[
[1/2, 1/2],
],
[0, 1]
),
"lobatto_iiib_3s": (
[
[1/6, -1/6, 0],
[1/6, 1/3, 0],
[1/6, 5/6, 0],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1]
),
"lobatto_iiib_2s": (
[
[1/2, 0],
[1/2, 0],
],
[
[1/2, 1/2],
],
[0, 1]
),
"lobatto_iiic_3s": (
[
[1/6, -1/3, 1/6],
[1/6, 5/12, -1/12],
[1/6, 2/3, 1/6],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1]
),
"lobatto_iiic_2s": (
[
[1/2, -1/2],
[1/2, 1/2],
],
[
[1/2, 1/2],
],
[0, 1]
),
"lobatto_iiic_star_3s": (
[
[0, 0, 0],
[1/4, 1/4, 0],
[0, 1, 0],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1]
),
"lobatto_iiic_star_2s": (
[
[0, 0],
[1, 0],
],
[
[1/2, 1/2],
],
[0, 1]
),
"lobatto_iiid_3s": (
[
[1/6, 0, -1/6],
[1/12, 5/12, 0],
[1/2, 1/3, 1/6],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1]
),
"lobatto_iiid_2s": (
[
[1/2, 1/2],
[-1/2, 1/2],
],
[
[1/2, 1/2],
],
[0, 1]
),
"kraaijevanger_spijker_2s": ( #overshoots step
[
[1/2, 0],
[-1/2, 2],
],
[
[-1/2, 3/2],
],
[1/2, 3/2]
),
"qin_zhang_2s": (
[
[1/4, 0],
[1/2, 1/4],
],
[
[1/2, 1/2],
],
[1/4, 3/4]
),
"pareschi_russo_2s": (
[
[(1-2**0.5/2), 0],
[1-2*(1-2**0.5/2), (1-2**0.5/2)],
],
[
[1/2, 1/2],
],
[(1-2**0.5/2), 1-(1-2**0.5/2)]
),
"pareschi_russo_alt_2s": (
[
[(1-2**0.5/2), 0],
[1-(1-2**0.5/2), (1-2**0.5/2)],
],
[
[1-(1-2**0.5/2), (1-2**0.5/2)],
],
[(1-2**0.5/2), 1]
),
"crouzeix_3s_alt": ( # Crouzeix & Raviart 1980; A-stable; pg 100 in Solving Ordinary Differential Equations II
[
[gamma_crouzeix, 0, 0],
[1/2 - gamma_crouzeix, gamma_crouzeix, 0],
[2*gamma_crouzeix, 1-4*gamma_crouzeix, gamma_crouzeix],
],
[
[delta_crouzeix, 1-2*delta_crouzeix, delta_crouzeix],
],
[gamma_crouzeix, 1/2, 1-gamma_crouzeix],
),
"crouzeix_3s": (
[
[(1+alpha_crouzeix)/2, 0, 0],
[-alpha_crouzeix/2, (1+alpha_crouzeix)/2, 0],
[1+alpha_crouzeix, -(1+2*alpha_crouzeix), (1+alpha_crouzeix)/2],
],
[
[1/(6*alpha_crouzeix**2), 1-(1/(3*alpha_crouzeix**2)), 1/(6*alpha_crouzeix**2)],
],
[(1+alpha_crouzeix)/2, 1/2, (1-alpha_crouzeix)/2],
),
"crouzeix_2s": (
[
[1/2 + 3**0.5 / 6, 0],
[-(3**0.5 / 3), 1/2 + 3**0.5 / 6]
],
[
[1/2, 1/2],
],
[1/2 + 3**0.5 / 6, 1/2 - 3**0.5 / 6],
),
"verner_13s": ( #verner9. some values are missing, need to revise
[
[],
],
[
[],
],
[
0.03462,
0.09702435063878045,
0.14553652595817068,
0.561,
0.22900791159048503,
0.544992088409515,
0.645,
0.48375,
0.06757,
0.25,
0.6590650618730999,
0.8206,
0.9012,
]
),
"verner_robust_16s": (
[
[],
[0.04],
[-0.01988527319182291, 0.11637263332969652],
[0.0361827600517026, 0, 0.10854828015510781],
[2.272114264290177, 0, -8.526886447976398, 6.830772183686221],
[0.050943855353893744, 0, 0, 0.1755865049809071, 0.007022961270757467],
[0.1424783668683285, 0, 0, -0.3541799434668684, 0.07595315450295101, 0.6765157656337123],
[0.07111111111111111, 0, 0, 0, 0, 0.3279909287605898, 0.24089796012829906],
[0.07125, 0, 0, 0, 0, 0.32688424515752457, 0.11561575484247544, -0.03375],
[0.0482267732246581, 0, 0, 0, 0, 0.039485599804954, 0.10588511619346581, -0.021520063204743093, -0.10453742601833482],
[-0.026091134357549235, 0, 0, 0, 0, 0.03333333333333333, -0.1652504006638105, 0.03434664118368617, 0.1595758283215209, 0.21408573218281934],
[-0.03628423396255658, 0, 0, 0, 0, -1.0961675974272087, 0.1826035504321331, 0.07082254444170683, -0.02313647018482431, 0.2711204726320933, 1.3081337494229808],
[-0.5074635056416975, 0, 0, 0, 0, -6.631342198657237, -0.2527480100908801, -0.49526123800360955, 0.2932525545253887, 1.440108693768281, 6.237934498647056, 0.7270192054526988],
[0.6130118256955932, 0, 0, 0, 0, 9.088803891640463, -0.40737881562934486, 1.7907333894903747, 0.714927166761755, -1.4385808578417227, -8.26332931206474, -1.537570570808865, 0.34538328275648716],
[-1.2116979103438739, 0, 0, 0, 0, -19.055818715595954, 1.263060675389875, -6.913916969178458, -0.6764622665094981, 3.367860445026608, 18.00675164312591, 6.83882892679428, -1.0315164519219504, 0.4129106232130623],
[2.1573890074940536, 0, 0, 0, 0, 23.807122198095804, 0.8862779249216555, 13.139130397598764, -2.604415709287715, -5.193859949783872, -20.412340711541507, -12.300856252505723, 1.5215530950085394],
],
[
0.014588852784055396, 0, 0, 0, 0, 0, 0, 0.0020241978878893325, 0.21780470845697167,
0.12748953408543898, 0.2244617745463132, 0.1787254491259903, 0.07594344758096556,
0.12948458791975614, 0.029477447612619417, 0
],
[
0, 0.04, 0.09648736013787361, 0.1447310402068104, 0.576, 0.2272326564618766,
0.5407673435381234, 0.64, 0.48, 0.06754, 0.25, 0.6770920153543243, 0.8115,
0.906, 1, 1
],
),
"dormand-prince_13s": ( #non-monotonic
[
[],
[1/18],
[1/48, 1/16],
[1/32, 0, 3/32],
[5/16, 0, -75/64, 75/64],
[3/80, 0, 0, 3/16, 3/20],
[29443841/614563906, 0, 0, 77736538/692538347, -28693883/1125000000, 23124283/1800000000],
[16016141/946692911, 0, 0, 61564180/158732637, 22789713/633445777, 545815736/2771057229, -180193667/1043307555],
[39632708/573591083, 0, 0, -433636366/683701615, -421739975/2616292301, 100302831/723423059, 790204164/839813087, 800635310/3783071287],
[246121993/1340847787, 0, 0, -37695042795/15268766246, -309121744/1061227803, -12992083/490766935, 6005943493/2108947869, 393006217/1396673457, 123872331/1001029789],
[-1028468189/846180014, 0, 0, 8478235783/508512852, 1311729495/1432422823, -10304129995/1701304382, -48777925059/3047939560, 15336726248/1032824649, -45442868181/3398467696, 3065993473/597172653],
[185892177/718116043, 0, 0, -3185094517/667107341, -477755414/1098053517, -703635378/230739211, 5731566787/1027545527, 5232866602/850066563, -4093664535/808688257, 3962137247/1805957418, 65686358/487910083],
[403863854/491063109, 0, 0, -5068492393/434740067, -411421997/543043805, 652783627/914296604, 11173962825/925320556, -13158990841/6184727034, 3936647629/1978049680, -160528059/685178525, 248638103/1413531060],
],
[
[14005451/335480064, 0, 0, 0, 0, -59238493/1068277825, 181606767/758867731, 561292985/797845732, -1041891430/1371343529, 760417239/1151165299, 118820643/751138087, -528747749/2220607170, 1/4],
],
[0, 1/18, 1/12, 1/8, 5/16, 3/8, 59/400, 93/200, 5490023248 / 9719169821, 13/20, 1201146811 / 1299019798, 1, 1],
),
"dormand-prince_6s": (
[
[],
[1/5],
[3/40, 9/40],
[44/45, -56/15, 32/9],
[19372/6561, -25360/2187, 64448/6561, -212/729],
[9017/3168, -355/33, 46732/5247, 49/176, -5103/18656],
],
[
[35/384, 0, 500/1113, 125/192, -2187/6784, 11/84],
],
[0, 1/5, 3/10, 4/5, 8/9, 1],
),
"bogacki-shampine_7s": ( #5th order
[
[],
[1/6],
[2/27, 4/27],
[183/1372, -162/343, 1053/1372],
[68/297, -4/11, 42/143, 1960/3861],
[597/22528, 81/352, 63099/585728, 58653/366080, 4617/20480],
[174197/959244, -30942/79937, 8152137/19744439, 666106/1039181, -29421/29068, 482048/414219],
],
[
[587/8064, 0, 4440339/15491840, 24353/124800, 387/44800, 2152/5985, 7267/94080],
],
[0, 1/6, 2/9, 3/7, 2/3, 3/4, 1]
),
"bogacki-shampine_4s": ( #5th order
[
[],
[1/2],
[0, 3/4],
[2/9, 1/3, 4/9],
],
[
[2/9, 1/3, 4/9, 0],
],
[0, 1/2, 3/4, 1]
),
"tsi_7s": ( #5th order
[
[],
[0.161],
[-0.008480655492356989, 0.335480655492357],
[2.8971530571054935, -6.359448489975075, 4.3622954328695815],
[5.325864828439257, -11.748883564062828, 7.4955393428898365, -0.09249506636175525],
[5.86145544294642, -12.92096931784711, 8.159367898576159, -0.071584973281401, -0.02826905039406838],
[0.09646076681806523, 0.01, 0.4798896504144996, 1.379008574103742, -3.290069515436081, 2.324710524099774],
],
[
[0.09646076681806523, 0.01, 0.4798896504144996, 1.379008574103742, -3.290069515436081, 2.324710524099774, 0.0],
],
[0.0, 0.161, 0.327, 0.9, 0.9800255409045097, 1.0, 1.0],
),
"rk6_7s": ( #non-monotonic #5th order
[
[],
[1/3],
[0, 2/3],
[1/12, 1/3, -1/12],
[-1/16, 9/8, -3/16, -3/8],
[0, 9/8, -3/8, -3/4, 1/2],
[9/44, -9/11, 63/44, 18/11, 0, -16/11],
],
[
[11/120, 0, 27/40, 27/40, -4/15, -4/15, 11/120],
],
[0, 1/3, 2/3, 1/3, 1/2, 1/2, 1],
),
"rk5_7s": ( #5th order
[
[],
[1/5],
[3/40, 9/40],
[44/45, -56/15, 32/9],
[19372/6561, -25360/2187, 64448/6561, 212/729], #flipped 212 sign
[-9017/3168, -355/33, 46732/5247, 49/176, -5103/18656],
[35/384, 0, 500/1113, 125/192, -2187/6784, 11/84],
],
[
[5179/57600, 0, 7571/16695, 393/640, -92097/339200, 187/2100, 1/40],
],
[0, 1/5, 3/10, 4/5, 8/9, 1, 1],
),
"ssprk4_4s": ( #non-monotonic #https://link.springer.com/article/10.1007/s41980-022-00731-x
[
[],
[1/2],
[1/2, 1/2],
[1/6, 1/6, 1/6],
],
[
[1/6, 1/6, 1/6, 1/2],
],
[0, 1/2, 1, 1/2],
),
"rk4_4s": (
[
[],
[1/2],
[0, 1/2],
[0, 0, 1],
],
[
[1/6, 1/3, 1/3, 1/6],
],
[0, 1/2, 1/2, 1],
),
"rk38_4s": (
[
[],
[1/3],
[-1/3, 1],
[1, -1, 1],
],
[
[1/8, 3/8, 3/8, 1/8],
],
[0, 1/3, 2/3, 1],
),
"ralston_4s": (
[
[],
[2/5],
[(-2889+1428 * 5**0.5)/1024, (3785-1620 * 5**0.5)/1024],
[(-3365+2094 * 5**0.5)/6040, (-975-3046 * 5**0.5)/2552, (467040+203968*5**0.5)/240845],
],
[
[(263+24*5**0.5)/1812, (125-1000*5**0.5)/3828, (3426304+1661952*5**0.5)/5924787, (30-4*5**0.5)/123],
],
[0, 2/5, (14-3 * 5**0.5)/16, 1],
),
"heun_3s": (
[
[],
[1/3],
[0, 2/3],
],
[
[1/4, 0, 3/4],
],
[0, 1/3, 2/3],
),
"kutta_3s": (
[
[],
[1/2],
[-1, 2],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1],
),
"ralston_3s": (
[
[],
[1/2],
[0, 3/4],
],
[
[2/9, 1/3, 4/9],
],
[0, 1/2, 3/4],
),
"houwen-wray_3s": (
[
[],
[8/15],
[1/4, 5/12],
],
[
[1/4, 0, 3/4],
],
[0, 8/15, 2/3],
),
"ssprk3_3s": ( #non-monotonic
[
[],
[1],
[1/4, 1/4],
],
[
[1/6, 1/6, 2/3],
],
[0, 1, 1/2],
),
"midpoint_2s": (
[
[],
[1/2],
],
[
[0, 1],
],
[0, 1/2],
),
"heun_2s": (
[
[],
[1],
],
[
[1/2, 1/2],
],
[0, 1],
),
"ralston_2s": (
[
[],
[2/3],
],
[
[1/4, 3/4],
],
[0, 2/3],
),
"euler": (
[
[],
],
[
[1],
],
[0],
),
}
def get_rk_methods_beta(rk_type : str,
h : Tensor,
c1 : float = 0.0,
c2 : float = 0.5,
c3 : float = 1.0,
h_prev : Optional[Tensor] = None,
step : int = 0,
sigmas : Optional[Tensor] = None,
sigma : Optional[Tensor] = None,
sigma_next : Optional[Tensor] = None,
sigma_down : Optional[Tensor] = None,
extra_options : Optional[str] = None
):
FSAL = False
multistep_stages = 0
hybrid_stages = 0
u = None
v = None
EO = ExtraOptions(extra_options)
use_analytic_solution = not EO("disable_analytic_solution")
multistep_initial_sampler = EO("multistep_initial_sampler", "", debugMode=1)
multistep_fallback_sampler = EO("multistep_fallback_sampler", "")
multistep_extra_initial_steps = EO("multistep_extra_initial_steps", 1)
#if RK_Method_Beta.is_exponential(rk_type):
if rk_type.startswith(("res", "dpmpp", "ddim", "pec", "etdrk", "lawson")):
h_no_eta = -torch.log(sigma_next/sigma)
h_prev1_no_eta = -torch.log(sigmas[step]/sigmas[step-1]) if step >= 1 else None
h_prev2_no_eta = -torch.log(sigmas[step]/sigmas[step-2]) if step >= 2 else None
h_prev3_no_eta = -torch.log(sigmas[step]/sigmas[step-3]) if step >= 3 else None
h_prev4_no_eta = -torch.log(sigmas[step]/sigmas[step-4]) if step >= 4 else None
else:
h_no_eta = sigma_next - sigma
h_prev1_no_eta = sigmas[step] - sigmas[step-1] if step >= 1 else None
h_prev2_no_eta = sigmas[step] - sigmas[step-2] if step >= 2 else None
h_prev3_no_eta = sigmas[step] - sigmas[step-3] if step >= 3 else None
h_prev4_no_eta = sigmas[step] - sigmas[step-4] if step >= 4 else None
if type(c1) == torch.Tensor:
c1 = c1.item()
if type(c2) == torch.Tensor:
c2 = c2.item()
if type(c3) == torch.Tensor:
c3 = c3.item()
if c1 == -1:
c1 = random.uniform(0, 1)
if c2 == -1:
c2 = random.uniform(0, 1)
if c3 == -1:
c3 = random.uniform(0, 1)
if rk_type[:4] == "deis":
order = int(rk_type[-2])
if step < order + multistep_extra_initial_steps:
if order == 4:
#rk_type = "res_4s_strehmel_weiner"
rk_type = "ralston_4s"
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
order = 3
elif order == 3:
#rk_type = "res_3s"
rk_type = "ralston_3s"
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
elif order == 2:
#rk_type = "res_2s"
rk_type = "ralston_2s"
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
else:
rk_type = "deis"
multistep_stages = order-1
if rk_type[-2:] == "2m": #multistep method
rk_type = rk_type[:-2] + "2s"
#if h_prev is not None and step >= 1:
if h_no_eta < 1.0:
if step >= 1 + multistep_extra_initial_steps:
multistep_stages = 1
c2 = (-h_prev1_no_eta / h_no_eta).item()
else:
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
if rk_type.startswith("abnorsett"):
rk_type = "res_2s"
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
else:
#rk_type = "res_2s"
rk_type = "euler" if sigma < 0.1 else "res_2s"
rk_type = multistep_fallback_sampler if multistep_fallback_sampler else rk_type
if rk_type[-2:] == "3m": #multistep method
rk_type = rk_type[:-2] + "3s"
#if h_prev2 is not None and step >= 2:
if h_no_eta < 1.0:
if step >= 2 + multistep_extra_initial_steps:
multistep_stages = 2
c2 = (-h_prev1_no_eta / h_no_eta).item()
c3 = (-h_prev2_no_eta / h_no_eta).item()
else:
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
if rk_type.startswith("abnorsett"):
rk_type = "res_3s"
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
else:
#rk_type = "res_3s"
rk_type = "euler" if sigma < 0.1 else "res_3s"
rk_type = multistep_fallback_sampler if multistep_fallback_sampler else rk_type
if rk_type[-2:] == "4m": #multistep method
rk_type = rk_type[:-2] + "4s"
#if h_prev2 is not None and step >= 2:
if h_no_eta < 1.0:
if step >= 3 + multistep_extra_initial_steps:
multistep_stages = 3
c2 = (-h_prev1_no_eta / h_no_eta).item()
c3 = (-h_prev2_no_eta / h_no_eta).item()
# WOULD NEED A C4 (POW) TO IMPLEMENT RES_4M IF IT EXISTED
else:
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
if rk_type == "res_4s":
rk_type = "res_4s_strehmel_weiner"
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
if rk_type.startswith("abnorsett"):
rk_type = "res_4s_strehmel_weiner"
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
else:
#rk_type = "res_4s_strehmel_weiner"
rk_type = "euler" if sigma < 0.1 else "res_4s_strehmel_weiner"
rk_type = multistep_fallback_sampler if multistep_fallback_sampler else rk_type
if rk_type[-3] == "h" and rk_type[-1] == "s": #hybrid method
if step < int(rk_type[-4]) + multistep_extra_initial_steps:
rk_type = "res_" + rk_type[-2:]
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
else:
hybrid_stages = int(rk_type[-4]) #+1 adjustment needed?
if rk_type == "res_4s":
rk_type = "res_4s_strehmel_weiner"
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
if rk_type == "res_1s":
rk_type = "res_2s"
rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type
if rk_type in rk_coeff:
a, b, ci = copy.deepcopy(rk_coeff[rk_type])
a = [row + [0] * (len(ci) - len(row)) for row in a]
match rk_type:
case "deis":
coeff_list = get_deis_coeff_list(sigmas, multistep_stages+1, deis_mode="rhoab")
coeff_list = [[elem / h for elem in inner_list] for inner_list in coeff_list]
if multistep_stages == 1:
b1, b2 = coeff_list[step]
a = [
[0, 0],
[0, 0],
]
b = [
[b1, b2],
]
ci = [0, 0]
if multistep_stages == 2:
b1, b2, b3 = coeff_list[step]
a = [
[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
]
b = [
[b1, b2, b3],
]
ci = [0, 0, 0]
if multistep_stages == 3:
b1, b2, b3, b4 = coeff_list[step]
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
]
b = [
[b1, b2, b3, b4],
]
ci = [0, 0, 0, 0]
if multistep_stages > 0:
for i in range(len(b[0])):
b[0][i] *= ((sigma_down - sigma) / (sigma_next - sigma))
case "dormand-prince_6s":
FSAL = True
case "ddim":
b1 = phi(1, -h)
a = [
[0],
]
b = [
[b1],
]
ci = [0]
case "res_2s":
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
ci = [0, c2]
φ = Phi(h, ci, use_analytic_solution)
a2_1 = c2 * φ(1,2)
b2 = φ(2)/c2
b1 = φ(1) - b2
a = [
[0,0],
[a2_1, 0],
]
b = [
[b1, b2],
]
case "res_2s_stable":
c2 = 1.0 #float(get_extra_options_kv("c2", str(c2), extra_options))
ci = [0, c2]
φ = Phi(h, ci, use_analytic_solution)
a2_1 = c2 * φ(1,2)
b2 = φ(2)/c2
b1 = φ(1) - b2
a = [
[0,0],
[a2_1, 0],
]
b = [
[b1, b2],
]
case "res_2s_rkmk2e":
ci = [0, 1]
φ = Phi(h, ci, use_analytic_solution)
b2 = φ(2)
a = [
[0,0],
[0, 0],
]
b = [
[0, b2],
]
gen_first_col_exp(a, b, ci, φ)
case "abnorsett2_1h2s":
c1, c2 = 0, 1
ci = [c1, c2]
φ = Phi(h, ci, use_analytic_solution)
b1 = φ(1) #+ φ(2)
a = [
[0, 0],
[0, 0],
]
b = [
[0, 0],
]
if extra_options_flag("h_prev_h_h_no_eta", extra_options):
φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci)
elif extra_options_flag("h_only", extra_options):
φ1 = Phi(h, ci, use_analytic_solution)
else:
φ1 = Phi(h_prev1_no_eta, ci)
u1 = -φ1(2)
v1 = -φ1(2)
u = [
[0, 0],
[u1, 0],
]
v = [
[v1, 0],
]
gen_first_col_exp_uv(a, b, ci, u, v, φ)
case "abnorsett_2m":
c1, c2 = 0, 1
ci = [c1, c2]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0],
[0, 0],
]
b = [
[0, -φ(2)],
]
gen_first_col_exp(a, b, ci, φ)
case "abnorsett_3m":
c1, c2, c3 = 0, 0, 1
ci = [c1, c2, c3]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
]
b = [
[0, -2*φ(2) - 2*φ(3), (1/2)*φ(2) + φ(3)],
]
gen_first_col_exp(a, b, ci, φ)
case "abnorsett_4m":
c1, c2, c3, c4 = 0, 0, 0, 1
ci = [c1, c2, c3, c4]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
]
b = [
[0,
-3*φ(2) - 5*φ(3) - 3*φ(4),
(3/2)*φ(2) + 4*φ(3) + 3*φ(4),
(-1/3)*φ(2) - φ(3) - φ(4),
],
]
gen_first_col_exp(a, b, ci, φ)
case "abnorsett3_2h2s":
c1,c2 = 0,1
ci = [c1, c2]
φ = Phi(h, ci, use_analytic_solution)
b2 = 0
a = [
[0, 0],
[0, 0],
]
b = [
[0, 0],
]
if extra_options_flag("h_prev_h_h_no_eta", extra_options):
φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci)
φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci)
elif extra_options_flag("h_only", extra_options):
φ1 = Phi(h, ci, use_analytic_solution)
φ2 = Phi(h, ci, use_analytic_solution)
else:
φ1 = Phi(h_prev1_no_eta, ci)
φ2 = Phi(h_prev2_no_eta, ci)
u2_1 = -2*φ1(2) - 2*φ1(3)
u2_2 = (1/2)*φ2(2) + φ2(3)
v1 = u2_1 # -φ1(2) + φ1(3) + 3*φ1(4)
v2 = u2_2 # (1/6)*φ2(2) - φ2(4)
u = [
[ 0, 0],
[u2_1, u2_2],
]
v = [
[v1, v2],
]
gen_first_col_exp_uv(a, b, ci, u, v, φ)
case "pec423_2h2s": #https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2 = 0,1
ci = [c1, c2]
φ = Phi(h, ci, use_analytic_solution)
b2 = (1/3)*φ(2) + φ(3) + φ(4)
a = [
[0, 0],
[0, 0],
]
b = [
[0, b2],
]
if extra_options_flag("h_prev_h_h_no_eta", extra_options):
φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci)
φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci)
elif extra_options_flag("h_only", extra_options):
φ1 = Phi(h, ci, use_analytic_solution)
φ2 = Phi(h, ci, use_analytic_solution)
else:
φ1 = Phi(h_prev1_no_eta, ci)
φ2 = Phi(h_prev2_no_eta, ci)
u2_1 = -2*φ1(2) - 2*φ1(3)
u2_2 = (1/2)*φ2(2) + φ2(3)
v1 = -φ1(2) + φ1(3) + 3*φ1(4)
v2 = (1/6)*φ2(2) - φ2(4)
u = [
[ 0, 0],
[u2_1, u2_2],
]
v = [
[v1, v2],
]
gen_first_col_exp_uv(a, b, ci, u, v, φ)
case "pec433_2h3s": #https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3 = 0, 1, 1
ci = [c1,c2,c3]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = (1/3)*φ(2) + φ(3) + φ(4)
b2 = 0
b3 = (1/3)*φ(2) + φ(3) + φ(4)
a = [
[0, 0, 0],
[0, 0, 0],
[0, a3_2, 0],
]
b = [
[0, b2, b3],
]
if extra_options_flag("h_prev_h_h_no_eta", extra_options):
φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci)
φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci)
elif extra_options_flag("h_only", extra_options):
φ1 = Phi(h, ci, use_analytic_solution)
φ2 = Phi(h, ci, use_analytic_solution)
else:
φ1 = Phi(h_prev1_no_eta, ci)
φ2 = Phi(h_prev2_no_eta, ci)
u2_1 = -2*φ1(2) - 2*φ1(3)
u3_1 = -φ1(2) + φ1(3) + 3*φ1(4)
v1 = -φ1(2) + φ1(3) + 3*φ1(4)
u2_2 = (1/2)*φ2(2) + φ2(3)
u3_2 = (1/6)*φ2(2) - φ2(4)
v2 = (1/6)*φ2(2) - φ2(4)
u = [
[ 0, 0, 0],
[u2_1, u2_2, 0],
[u3_1, u3_2, 0],
]
v = [
[v1, v2, 0],
]
gen_first_col_exp_uv(a, b, ci, u, v, φ)
case "res_3s":
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
c3 = float(get_extra_options_kv("c3", str(c3), extra_options))
ci = [0,c2,c3]
φ = Phi(h, ci, use_analytic_solution)
gamma = calculate_gamma(c2, c3)
a3_2 = gamma * c2 * φ(2,2) + (c3 ** 2 / c2) * φ(2, 3)
b3 = (1 / (gamma * c2 + c3)) * φ(2)
b2 = gamma * b3 #simplified version of: b2 = (gamma / (gamma * c2 + c3)) * phi_2_h
a = [
[0, 0, 0],
[0, 0, 0],
[0, a3_2, 0],
]
b = [
[0, b2, b3],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_3s_non-monotonic":
c2 = float(get_extra_options_kv("c2", "1.0", extra_options))
c3 = float(get_extra_options_kv("c3", "0.5", extra_options))
ci = [0,c2,c3]
φ = Phi(h, ci, use_analytic_solution)
gamma = calculate_gamma(c2, c3)
a3_2 = gamma * c2 * φ(2,2) + (c3 ** 2 / c2) * φ(2, 3)
b3 = (1 / (gamma * c2 + c3)) * φ(2)
b2 = gamma * b3 #simplified version of: b2 = (gamma / (gamma * c2 + c3)) * phi_2_h
a = [
[0, 0, 0],
[0, 0, 0],
[0, a3_2, 0],
]
b = [
[0, b2, b3],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_3s_alt":
c2 = 1/3
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
c1,c2,c3 = 0, c2, 2/3
ci = [c1,c2,c3]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0],
[0, 0, 0],
[0, (4/(9*c2)) * φ(2,3), 0],
]
b = [
[0, 0, (1/c3)*φ(2)],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_3s_strehmel_weiner": #
c2 = 1/2
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
ci = [0,c2,1]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0],
[0, 0, 0],
[0, (1/c2) * φ(2,3), 0],
]
b = [
[0, 0, φ(2)],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_3s_cox_matthews": # Cox & Matthews; known as ETD3RK
c2 = 1/2 # must be 1/2
ci = [0,c2,1]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0],
[0, 0, 0],
[0, (1/c2) * φ(1,3), 0], # paper said 2 * φ(1,3), but this is the same and more consistent with res_3s_strehmel_weiner
]
b = [
[0,
-8*φ(3) + 4*φ(2),
4*φ(3) - φ(2)],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_3s_lie": # Lie; known as ETD2CF3
c1,c2,c3 = 0, 1/3, 2/3
ci = [c1,c2,c3]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0],
[0, 0, 0],
[0, (4/3)*φ(2,3), 0], # paper said 2 * φ(1,3), but this is the same and more consistent with res_3s_strehmel_weiner
]
b = [
[0,
6*φ(2) - 18*φ(3),
(-3/2)*φ(2) + 9*φ(3)],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_3s_sunstar": # https://arxiv.org/pdf/2410.00498 pg 5 (tableau 2.7)
c1,c2,c3 = 0, 1/3, 2/3
ci = [c1,c2,c3]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0],
[0, 0, 0],
[0, (8/9)*φ(2,3), 0], # paper said 2 * φ(1,3), but this is the same and more consistent with res_3s_strehmel_weiner
]
b = [
[0,
0,
(3/2)*φ(2)],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_4s_cox_matthews": # weak 4th order, Cox & Matthews; unresolved issue, see below
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a2_1 = c2 * φ(1,2)
a3_2 = c3 * φ(1,3)
a4_1 = (1/2) * φ(1,3) * (φ(0,3) - 1) # φ(0,3) == torch.exp(-h*c3)
a4_3 = φ(1,3)
b1 = φ(1) - 3*φ(2) + 4*φ(3)
b2 = 2*φ(2) - 4*φ(3)
b3 = 2*φ(2) - 4*φ(3)
b4 = 4*φ(3) - φ(2)
a = [
[0, 0,0,0],
[a2_1, 0,0,0],
[0, a3_2,0,0],
[a4_1, 0, a4_3,0],
]
b = [
[b1, b2, b3, b4],
]
case "res_4s_cfree4": # weak 4th order, Cox & Matthews; unresolved issue, see below
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a2_1 = c2 * φ(1,2)
a3_2 = c3 * φ(1,2)
a4_1 = (1/2) * φ(1,2) * (φ(0,2) - 1) # φ(0,3) == torch.exp(-h*c3)
a4_3 = φ(1,2)
b1 = (1/2)*φ(1) - (1/3)*φ(1,2)
b2 = (1/3)*φ(1)
b3 = (1/3)*φ(1)
b4 = -(1/6)*φ(1) + (1/3)*φ(1,2)
a = [
[0, 0,0,0],
[a2_1, 0,0,0],
[0, a3_2,0,0],
[a4_1, 0, a4_3,0],
]
b = [
[b1, b2, b3, b4],
]
case "res_4s_friedli": # https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = 2*φ(2,2)
a4_2 = -(26/25)*φ(1) + (2/25)*φ(2)
a4_3 = (26/25)*φ(1) + (48/25)*φ(2)
b2 = 0
b3 = 4*φ(2) - 8*φ(3)
b4 = -φ(2) + 4*φ(3)
a = [
[0, 0,0,0],
[0, 0,0,0],
[0, a3_2,0,0],
[0, a4_2, a4_3,0],
]
b = [
[0, b2, b3, b4],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_4s_munthe-kaas": # unstable RKMK4t
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0, 0],
[c2*φ(1,2), 0, 0, 0],
[(h/8)*φ(1,2), (1/2)*(1-h/4)*φ(1,2), 0, 0],
[0, 0, φ(1), 0],
]
b = [
[
(1/6)*φ(1)*(1+h/2),
(1/3)*φ(1),
(1/3)*φ(1),
(1/6)*φ(1)*(1-h/2)
],
]
case "res_4s_krogstad": # weak 4th order, Krogstad
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, φ(2,3), 0, 0],
[0, 0, 2*φ(2,4), 0],
]
b = [
[
0,
2*φ(2) - 4*φ(3),
2*φ(2) - 4*φ(3),
-φ(2) + 4*φ(3)
],
]
#a = [row + [0] * (len(ci) - len(row)) for row in a]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_4s_krogstad_alt": # weak 4th order, Krogstad https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 4*φ(2,2), 0, 0],
[0, 0, 2*φ(2), 0],
]
b = [
[
0,
2*φ(2) - 4*φ(3),
2*φ(2) - 4*φ(3),
-φ(2) + 4*φ(3)
],
]
#a = [row + [0] * (len(ci) - len(row)) for row in a]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_4s_minchev": # https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = (4/25)*φ(1,2) + (24/25)*φ(2,2)
a4_2 = (21/5)*φ(2) - (108/5)*φ(3)
a4_3 = (1/20)*φ(1) - (33/10)*φ(2) + (123/5)*φ(3)
b2 = -(1/10)*φ(1) + (1/5)*φ(2) - 4*φ(3) + 12*φ(4)
b3 = (1/30)*φ(1) + (23/5)*φ(2) - 8*φ(3) - 4*φ(4)
b4 = (1/30)*φ(1) - (7/5)*φ(2) + 6*φ(3) - 4*φ(4)
a = [
[0, 0,0,0],
[0, 0,0,0],
[0, a3_2,0,0],
[0, 0, a4_3,0],
]
b = [
[0, b2, b3, b4],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_4s_strehmel_weiner": # weak 4th order, Strehmel & Weiner
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, c3*φ(2,3), 0, 0],
[0, -2*φ(2,4), 4*φ(2,4), 0],
]
b = [
[
0,
0,
4*φ(2) - 8*φ(3),
-φ(2) + 4*φ(3)
],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_4s_strehmel_weiner_alt": # weak 4th order, Strehmel & Weiner https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 2*φ(2,2), 0, 0],
[0, -2*φ(2), 4*φ(2), 0],
]
b = [
[
0,
0,
4*φ(2) - 8*φ(3),
-φ(2) + 4*φ(3)
],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "lawson2a_2s": # based on midpoint rule, stiff order 1 https://cds.cern.ch/record/848126/files/cer-002531460.pdf
c1,c2 = 0,1/2
ci = [c1, c2]
φ = Phi(h, ci, use_analytic_solution)
a2_1 = c2 * φ(0,2)
b2 = φ(0,2)
b1 = 0
a = [
[0,0],
[a2_1, 0],
]
b = [
[b1, b2],
]
case "lawson2b_2s": # based on trapezoidal rule, stiff order 1 https://cds.cern.ch/record/848126/files/cer-002531460.pdf
c1,c2 = 0,1
ci = [c1, c2]
φ = Phi(h, ci, use_analytic_solution)
a2_1 = φ(0)
b2 = 1/2
b1 = (1/2)*φ(0)
a = [
[0,0],
[a2_1, 0],
]
b = [
[b1, b2],
]
case "lawson4_4s":
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a2_1 = c2 * φ(0,2)
a3_2 = 1/2
a4_3 = φ(0,2)
b1 = (1/6) * φ(0)
b2 = (1/3) * φ(0,2)
b3 = (1/3) * φ(0,2)
b4 = 1/6
a = [
[0, 0, 0, 0],
[a2_1, 0, 0, 0],
[0, a3_2, 0, 0],
[0, 0, a4_3, 0],
]
b = [
[b1,b2,b3,b4],
]
case "lawson41-gen_4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = 1/2
a4_3 = φ(0,2)
b2 = (1/3) * φ(0,2)
b3 = (1/3) * φ(0,2)
b4 = 1/6
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, a3_2, 0, 0],
[0, 0, a4_3, 0],
]
b = [
[0,
b2,
b3,
b4,],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "lawson41-gen-mod_4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = 1/2
a4_3 = φ(0,2)
b2 = (1/3) * φ(0,2)
b3 = (1/3) * φ(0,2)
b4 = φ(2) - (1/3)*φ(0,2)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, a3_2, 0, 0],
[0, 0, a4_3, 0],
]
b = [
[0,
b2,
b3,
b4,],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "lawson42-gen-mod_1h4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = 1/2
a4_3 = φ(0,2)
b2 = (1/3) * φ(0,2)
b3 = (1/3) * φ(0,2)
b4 = (1/2)*φ(2) + φ(3) - (1/4)*φ(0,2)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, a3_2, 0, 0],
[0, 0, a4_3, 0],
]
b = [
[0, b2, b3, b4,],
]
if extra_options_flag("h_prev_h_h_no_eta", extra_options):
φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci, use_analytic_solution)
elif extra_options_flag("h_only", extra_options):
φ1 = Phi(h, ci, use_analytic_solution)
else:
φ1 = Phi(h_prev1_no_eta, ci, use_analytic_solution)
u2_1 = -φ1(2,2)
u3_1 = -φ1(2,2) + 1/4
u4_1 = -φ1(2) + (1/2)*φ1(0,2)
v1 = -(1/2)*φ1(2) + φ1(3) + (1/12)*φ1(0,2)
u = [
[ 0, 0, 0, 0],
[u2_1, 0, 0, 0],
[u3_1, 0, 0, 0],
[u4_1, 0, 0, 0],
]
v = [
[v1, 0, 0, 0,],
]
a, b = gen_first_col_exp_uv(a,b,ci,u,v,φ)
case "lawson43-gen-mod_2h4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = 1/2
a4_3 = φ(0,2)
b3 = b2 = (1/3) * a4_3
b4 = (1/3)*φ(2) + φ(3) + φ(4) - (5/24)*φ(0,2)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, a3_2, 0, 0],
[0, 0, a4_3, 0],
]
b = [
[0, b2, b3, b4,],
]
if extra_options_flag("h_prev_h_h_no_eta", extra_options):
φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci, use_analytic_solution)
φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci, use_analytic_solution)
elif extra_options_flag("h_only", extra_options):
φ1 = Phi(h, ci, use_analytic_solution)
φ2 = Phi(h, ci, use_analytic_solution)
else:
φ1 = Phi(h_prev1_no_eta, ci, use_analytic_solution)
φ2 = Phi(h_prev2_no_eta, ci, use_analytic_solution)
u2_1 = -2*φ1(2,2) - 2*φ1(3,2)
u3_1 = -2*φ1(2,2) - 2*φ1(3,2) + 5/8
u4_1 = -2*φ1(2) - 2*φ1(3) + (5/4)*φ1(0,2)
v1 = -φ1(2) + φ1(3) + 3*φ1(4) + (5/24)*φ1(0,2)
u2_2 = -(1/2)*φ2(2,2) + φ2(3,2)
u3_2 = (1/2)*φ2(2,2) + φ2(3,2) - 3/16
u4_2 = (1/2)*φ2(2) + φ2(3) - (3/8)*φ2(0,2)
v2 = (1/6)*φ2(2) - φ2(4) - (1/24)*φ2(0,2)
u = [
[ 0, 0, 0, 0],
[u2_1, u2_2, 0, 0],
[u3_1, u3_2, 0, 0],
[u4_1, u4_2, 0, 0],
]
v = [
[v1, v2, 0, 0,],
]
a, b = gen_first_col_exp_uv(a,b,ci,u,v,φ)
case "lawson44-gen-mod_3h4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = 1/2
a4_3 = φ(0,2)
b3 = b2 = (1/3) * a4_3
b4 = (1/4)*φ(2) + (11/12)*φ(3) + (3/2)*φ(4) + φ(5) - (35/192)*φ(0,2)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, a3_2, 0, 0],
[0, 0, a4_3, 0],
]
b = [
[0, b2, b3, b4,],
]
if extra_options_flag("h_prev_h_h_no_eta", extra_options):
φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci, use_analytic_solution)
φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci, use_analytic_solution)
φ3 = Phi(h_prev3_no_eta * h/h_no_eta, ci, use_analytic_solution)
elif extra_options_flag("h_only", extra_options):
φ1 = Phi(h, ci, use_analytic_solution)
φ2 = Phi(h, ci, use_analytic_solution)
φ3 = Phi(h, ci, use_analytic_solution)
else:
φ1 = Phi(h_prev1_no_eta, ci, use_analytic_solution)
φ2 = Phi(h_prev2_no_eta, ci, use_analytic_solution)
φ3 = Phi(h_prev3_no_eta, ci, use_analytic_solution)
u2_1 = -3*φ1(2,2) - 5*φ1(3,2) - 3*φ1(4,2)
u3_1 = u2_1 + 35/32
u4_1 = -3*φ1(2) - 5*φ1(3) - 3*φ1(4) + (35/16)*φ1(0,2)
v1 = -(3/2)*φ1(2) + (1/2)*φ1(3) + 6*φ1(4) + 6*φ1(5) + (35/96)*φ1(0,2)
u2_2 = (3/2)*φ2(2,2) + 4*φ2(3,2) + 3*φ2(4,2)
u3_2 = u2_2 - 21/32
u4_2 = (3/2)*φ2(2) + 4*φ2(3) + 3*φ2(4) - (21/16)*φ2(0,2)
v2 = (1/2)*φ2(2) + (1/3)*φ2(3) - 3*φ2(4) - 4*φ2(5) - (7/48)*φ2(0,2)
u2_3 = (-1/3)*φ3(2,2) - φ3(3,2) - φ3(4,2)
u3_3 = u2_3 + 5/32
u4_3 = -(1/3)*φ3(2) - φ3(3) - φ3(4) + (5/16)*φ3(0,2)
v3 = -(1/12)*φ3(2) - (1/12)*φ3(3) + (1/2)*φ3(4) + φ3(5) + (5/192)*φ3(0,2)
u = [
[ 0, 0, 0, 0],
[u2_1, u2_2, u2_3, 0],
[u3_1, u3_2, u3_3, 0],
[u4_1, u4_2, u4_3, 0],
]
v = [
[v1, v2, v3, 0,],
]
a, b = gen_first_col_exp_uv(a,b,ci,u,v,φ)
case "lawson45-gen-mod_4h4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = 1/2
a4_3 = φ(0,2)
b2 = (1/3) * φ(0,2)
b3 = (1/3) * φ(0,2)
b4 = (12/59)*φ(2) + (50/59)*φ(3) + (105/59)*φ(4) + (120/59)*φ(5) - (60/59)*φ(6) - (157/944)*φ(0,2)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, a3_2, 0, 0],
[0, 0, a4_3, 0],
]
b = [
[0, b2, b3, b4,],
]
if extra_options_flag("h_prev_h_h_no_eta", extra_options):
φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci, use_analytic_solution)
φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci, use_analytic_solution)
φ3 = Phi(h_prev3_no_eta * h/h_no_eta, ci, use_analytic_solution)
φ4 = Phi(h_prev4_no_eta * h/h_no_eta, ci, use_analytic_solution)
elif extra_options_flag("h_only", extra_options):
φ1 = Phi(h, ci, use_analytic_solution)
φ2 = Phi(h, ci, use_analytic_solution)
φ3 = Phi(h, ci, use_analytic_solution)
φ4 = Phi(h, ci, use_analytic_solution)
else:
φ1 = Phi(h_prev1_no_eta, ci, use_analytic_solution)
φ2 = Phi(h_prev2_no_eta, ci, use_analytic_solution)
φ3 = Phi(h_prev3_no_eta, ci, use_analytic_solution)
φ4 = Phi(h_prev4_no_eta, ci, use_analytic_solution)
u2_1 = -4*φ1(2,2) - (26/3)*φ1(3,2) - 9*φ1(4,2) - 4*φ1(5,2)
u3_1 = u2_1 + 105/64
u4_1 = -4*φ1(2) - (26/3)*φ1(3) - 9*φ1(4) - 4*φ1(5) + (105/32)*φ1(0,2)
v1 = -(116/59)*φ1(2) - (34/177)*φ1(3) + (519/59)*φ1(4) + (964/59)*φ1(5) - (600/59)*φ1(6) + (495/944)*φ1(0,2)
u2_2 = 3*φ2(2,2) + (19/2)*φ2(3,2) + 12*φ2(4,2) + 6*φ2(5,2)
u3_2 = u2_2 - 189/128
u4_2 = 3*φ2(2) + (19/2)*φ2(3) + 12*φ2(4) + 6*φ2(5) - (189/64)*φ2(0,2)
v2 = (57/59)*φ2(2) + (121/118)*φ2(3) - (342/59)*φ2(4) - (846/59)*φ2(5) + (600/59)*φ2(6) - (577/1888)*φ2(0,2)
u2_3 = -(4/3)*φ3(2,2) - (14/3)*φ3(3,2) - 7*φ3(4,2) - 4*φ3(5,2)
u3_3 = u2_3 + 45/64
u4_3 = -(4/3)*φ3(2) - (14/3)*φ3(3) - 7*φ3(4) - 4*φ3(5) +(45/32)*φ3(0,2)
v3 = -(56/177)*φ3(2) - (76/177)*φ3(3) + (112/59)*φ3(4) + (364/59)*φ3(5) - (300/59)*φ3(6) + (25/236)*φ3(0,2)
u2_4 = (1/4)*φ4(2,2) + (88/96)*φ4(3,2) + (3/2)*φ4(4,2) + φ4(5,2)
u3_4 = u2_4 - 35/256
u4_4 = (1/4)*φ4(2) + (11/12)*φ4(3) + (3/2)*φ4(4) + φ4(5) - (35/128)*φ4(0,2)
v4 = (11/236)*φ4(2) + (49/708)*φ4(3) - (33/118)*φ4(4) - (61/59)*φ4(5) + ( 60/59)*φ4(6) - (181/11328)*φ4(0,2)
u = [
[ 0, 0, 0, 0],
[u2_1, u2_2, u2_3, u2_4],
[u3_1, u3_2, u3_3, u3_4],
[u4_1, u4_2, u4_3, u4_4],
]
v = [
[v1, v2, v3, v4,],
]
a, b = gen_first_col_exp_uv(a,b,ci,u,v,φ)
case "etdrk2_2s": # https://arxiv.org/pdf/2402.15142v1
c1,c2 = 0, 1
ci = [c1,c2]
φ = Phi(h, ci, use_analytic_solution)
a = [
[0, 0],
[φ(1), 0],
]
b = [
[φ(1)-φ(2), φ(2)],
]
case "etdrk3_a_3s": #non-monotonic # https://arxiv.org/pdf/2402.15142v1
c1,c2,c3 = 0, 1, 2/3
ci = [c1,c2,c3]
φ = Phi(h, ci, use_analytic_solution)
a2_1 = c2*φ(1)
a3_2 = (4/9)*φ(2,3)
a3_1 = c3*φ(1,3) - a3_2
b2 = φ(2) - (1/2)*φ(1)
b3 = (3/4) * φ(1)
b1 = φ(1) - b2 - b3
a = [
[0, 0, 0],
[a2_1, 0, 0],
[a3_1, a3_2, 0 ]
]
b = [
[b1, b2, b3],
]
case "etdrk3_b_3s": # https://arxiv.org/pdf/2402.15142v1
c1,c2,c3 = 0, 4/9, 2/3
ci = [c1,c2,c3]
φ = Phi(h, ci, use_analytic_solution)
a2_1 = c2*φ(1,2)
a3_2 = φ(2,3)
a3_1 = c3*φ(1,3) - a3_2
b2 = 0
b3 = (3/2) * φ(2)
b1 = φ(1) - b2 - b3
a = [
[0, 0, 0],
[a2_1, 0, 0],
[a3_1, a3_2, 0 ]
]
b = [
[b1, b2, b3],
]
case "etdrk4_4s": # https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = φ(1,2)
a4_3 = 2*φ(1,2)
b2 = 2*φ(2) - 4*φ(3)
b3 = 2*φ(2) - 4*φ(3)
b4 = -φ(2) + 4*φ(3)
a = [
[0, 0,0,0],
[0, 0,0,0],
[0, a3_2,0,0],
[0, 0, a4_3,0],
]
b = [
[0, b2, b3, b4],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "etdrk4_4s_alt": # pg 70 col 1 computed with (4.9) https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci, use_analytic_solution)
a2_1 = φ(1,2) #unsure about this, looks bad and is pretty different from col #1 implementations for everything else except the other 4s alt and 5s ostermann??? from the link
a3_1 = 0
a4_1 = φ(1) - 2*φ(1,2)
a3_2 = φ(1,2)
a4_3 = 2*φ(1,2)
b1 = φ(1) - 3*φ(2) + 4*φ(3)
b2 = 2*φ(2) - 4*φ(3)
b3 = 2*φ(2) - 4*φ(3)
b4 = -φ(2) + 4*φ(3)
a = [
[ 0, 0, 0,0],
[a2_1, 0, 0,0],
[a3_1, a3_2, 0,0],
[a4_1, 0, a4_3,0],
]
b = [
[0, b2, b3, b4],
]
#a, b = gen_first_col_exp(a,b,ci,φ)
case "dpmpp_2s":
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
ci = [0,c2]
φ = Phi(h, ci, use_analytic_solution)
b2 = (1/(2*c2)) * φ(1)
a = [
[0, 0],
[0, 0],
]
b = [
[0, b2],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "dpmpp_sde_2s":
c2 = 1.0 #hardcoded to 1.0 to more closely emulate the configuration for k-diffusion's implementation
ci = [0,c2]
φ = Phi(h, ci, use_analytic_solution)
b2 = (1/(2*c2)) * φ(1)
a = [
[0, 0],
[0, 0],
]
b = [
[0, b2],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "dpmpp_3s":
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
c3 = float(get_extra_options_kv("c3", str(c3), extra_options))
ci = [0,c2,c3]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = (c3**2 / c2) * φ(2,3)
b3 = (1/c3) * φ(2)
a = [
[0, 0, 0],
[0, 0, 0],
[0, a3_2, 0],
]
b = [
[0, 0, b3],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_5s": #non-monotonic #4th order
c1, c2, c3, c4, c5 = 0, 1/2, 1/2, 1, 1/2
ci = [c1,c2,c3,c4,c5]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = φ(2,3)
a4_2 = φ(2,4)
a5_2 = (1/2)*φ(2,5) - φ(3,4) + (1/4)*φ(2,4) - (1/2)*φ(3,5)
a4_3 = a4_2
a5_3 = a5_2
a5_4 = (1/4)*φ(2,5) - a5_2
b4 = -φ(2) + 4*φ(3)
b5 = 4*φ(2) - 8*φ(3)
a = [
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, a3_2, 0, 0, 0],
[0, a4_2, a4_3, 0, 0],
[0, a5_2, a5_3, a5_4, 0],
]
b = [
[0, 0, 0, b4, b5],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_5s_hochbruck-ostermann": #non-monotonic #4th order
c1, c2, c3, c4, c5 = 0, 1/2, 1/2, 1, 1/2
ci = [c1,c2,c3,c4,c5]
φ = Phi(h, ci, use_analytic_solution)
a3_2 = 4*φ(2,2)
a4_2 = φ(2)
a5_2 = (1/4)*φ(2) - φ(3) + 2*φ(2,2) - 4*φ(3,2)
a4_3 = φ(2)
a5_3 = a5_2
a5_4 = φ(2,2) - a5_2
b4 = -φ(2) + 4*φ(3)
b5 = 4*φ(2) - 8*φ(3)
a = [
[0, 0 , 0 , 0 , 0],
[0, 0 , 0 , 0 , 0],
[0, a3_2, 0 , 0 , 0],
[0, a4_2, a4_3, 0 , 0],
[0, a5_2, a5_3, a5_4, 0],
]
b = [
[0, 0, 0, b4, b5],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_6s": #non-monotonic #4th order
c1, c2, c3, c4, c5, c6 = 0, 1/2, 1/2, 1/3, 1/3, 5/6
ci = [c1, c2, c3, c4, c5, c6]
φ = Phi(h, ci, use_analytic_solution)
a2_1 = c2 * φ(1,2)
a3_1 = 0
a3_2 = (c3**2 / c2) * φ(2,3)
a4_1 = 0
a4_2 = (c4**2 / c2) * φ(2,4)
a4_3 = (c4**2 * φ(2,4) - a4_2 * c2) / c3
a5_1 = 0
a5_2 = 0 #zero
a5_3 = (-c4 * c5**2 * φ(2,5) + 2*c5**3 * φ(3,5)) / (c3 * (c3 - c4))
a5_4 = (-c3 * c5**2 * φ(2,5) + 2*c5**3 * φ(3,5)) / (c4 * (c4 - c3))
a6_1 = 0
a6_2 = 0 #zero
a6_3 = (-c4 * c6**2 * φ(2,6) + 2*c6**3 * φ(3,6)) / (c3 * (c3 - c4))
a6_4 = (-c3 * c6**2 * φ(2,6) + 2*c6**3 * φ(3,6)) / (c4 * (c4 - c3))
a6_5 = (c6**2 * φ(2,6) - a6_3*c3 - a6_4*c4) / c5
#a6_5_alt = (2*c6**3 * φ(3,6) - a6_3*c3**2 - a6_4*c4**2) / c5**2
b1 = 0
b2 = 0
b3 = 0
b4 = 0
b5 = (-c6*φ(2) + 2*φ(3)) / (c5 * (c5 - c6))
b6 = (-c5*φ(2) + 2*φ(3)) / (c6 * (c6 - c5))
a = [
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, a3_2, 0, 0, 0, 0],
[0, a4_2, a4_3, 0, 0, 0],
[0, a5_2, a5_3, a5_4, 0, 0],
[0, a6_2, a6_3, a6_4, a6_5, 0],
]
b = [
[0, b2, b3, b4, b5, b6],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_8s": #non-monotonic # this is not EXPRK5S8 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1, c2, c3, c4, c5, c6, c7, c8 = 0, 1/2, 1/2, 1/4, 1/2, 1/5, 2/3, 1
ci = [c1, c2, c3, c4, c5, c6, c7, c8]
#φ = Phi(h, ci, analytic_solution=use_analytic_solution)
ci = [mpf(c_val) for c_val in ci]
c1, c2, c3, c4, c5, c6, c7, c8 = [c_val for c_val in ci]
φ = Phi(mpf(h.item()), ci, analytic_solution=use_analytic_solution)
a3_2 = (1/2) * φ(2,3)
a4_3 = (1/8) * φ(2,4)
a5_3 = (-1/2) * φ(2,5) + 2 * φ(3,5)
a5_4 = 2 * φ(2,5) - 4 * φ(3,5)
a6_4 = (8/25) * φ(2,6) - (32/125) * φ(3,6)
a6_5 = (2/25) * φ(2,6) - (1/2) * a6_4
a7_4 = (-125/162) * a6_4
a7_5 = (125/1944) * a6_4 - (16/27) * φ(2,7) + (320/81) * φ(3,7)
a7_6 = (3125/3888) * a6_4 + (100/27) * φ(2,7) - (800/81) * φ(3,7)
Φ = (5/32)*a6_4 - (1/28)*φ(2,6) + (36/175)*φ(2,7) - (48/25)*φ(3,7) + (6/175)*φ(4,6) + (192/35)*φ(4,7) + 6*φ(4,8)
a8_5 = (208/3)*φ(3,8) - (16/3) *φ(2,8) - 40*Φ
a8_6 = (-250/3)*φ(3,8) + (250/21)*φ(2,8) + (250/7)*Φ
a8_7 = -27*φ(3,8) + (27/14)*φ(2,8) + (135/7)*Φ
b6 = (125/14)*φ(2) - (625/14)*φ(3) + (1125/14)*φ(4)
b7 = (-27/14)*φ(2) + (162/7) *φ(3) - (405/7) *φ(4)
b8 = (1/2) *φ(2) - (13/2) *φ(3) + (45/2) *φ(4)
b1 = φ(1) - b6 - b7 - b8
a = [
[0 , 0 , 0 , 0 , 0 , 0 , 0 , 0],
[0 , 0 , 0 , 0 , 0 , 0 , 0 , 0],
[0 , a3_2, 0 , 0 , 0 , 0 , 0 , 0],
[0 , 0 , a4_3, 0 , 0 , 0 , 0 , 0],
[0 , 0 , a5_3, a5_4, 0 , 0 , 0 , 0],
[0 , 0 , 0 , a6_4, a6_5, 0 , 0 , 0],
[0 , 0 , 0 , a7_4, a7_5, a7_6, 0 , 0],
[0 , 0 , 0 , 0 , a8_5, a8_6, a8_7, 0],
]
b = [
[0, 0, 0, 0, 0, b6, b7, b8],
]
a, b = gen_first_col_exp(a,b,ci,φ)
a = [[float(val) for val in row] for row in a]
b = [[float(val) for val in row] for row in b]
ci = [c1, c2, c3, c4, c5, c6, c7, c8]
case "res_8s_alt": # this is EXPRK5S8 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1, c2, c3, c4, c5, c6, c7, c8 = 0, 1/2, 1/2, 1/4, 1/2, 1/5, 2/3, 1
#ci = [c1, c2, c3, c4, c5, c6, c7, c8]
#φ = Phi(h, ci, analytic_solution=use_analytic_solution)
ci = [mpf(c_val) for c_val in ci]
c1, c2, c3, c4, c5, c6, c7, c8 = [c_val for c_val in ci]
φ = Phi(mpf(h.item()), ci, analytic_solution=use_analytic_solution)
a3_2 = 2*φ(2,2)
a4_3 = 2*φ(2,4)
a5_3 = -2*φ(2,2) + 16*φ(3,2)
a5_4 = 8*φ(2,2) - 32*φ(3,2)
a6_4 = 8*φ(2,6) - 32*φ(3,6)
a6_5 = -2*φ(2,6) + 16*φ(3,6)
a7_4 = (-125/162) * a6_4
a7_5 = (125/1944) * a6_4 - (4/3) * φ(2,7) + (40/3)*φ(3,7)
a7_6 = (3125/3888) * a6_4 + (25/3) * φ(2,7) - (100/3)*φ(3,7)
Φ = (5/32)*a6_4 - (25/28)*φ(2,6) + (81/175)*φ(2,7) - (162/25)*φ(3,7) + (150/7)*φ(4,6) + (972/35)*φ(4,7) + 6*φ(4)
a8_5 = -(16/3)*φ(2) + (208/3)*φ(3) - 40*Φ
a8_6 = (250/21)*φ(2) - (250/3)*φ(3) + (250/7)*Φ
a8_7 = (27/14)*φ(2) - 27*φ(3) + (135/7)*Φ
b6 = (125/14)*φ(2) - (625/14)*φ(3) + (1125/14)*φ(4)
b7 = (-27/14)*φ(2) + (162/7) *φ(3) - (405/7) *φ(4)
b8 = (1/2) *φ(2) - (13/2) *φ(3) + (45/2) *φ(4)
a = [
[0 , 0 , 0 , 0 , 0 , 0 , 0 , 0],
[0 , 0 , 0 , 0 , 0 , 0 , 0 , 0],
[0 , a3_2, 0 , 0 , 0 , 0 , 0 , 0],
[0 , 0 , a4_3, 0 , 0 , 0 , 0 , 0],
[0 , 0 , a5_3, a5_4, 0 , 0 , 0 , 0],
[0 , 0 , 0 , a6_4, a6_5, 0 , 0 , 0],
[0 , 0 , 0 , a7_4, a7_5, a7_6, 0 , 0],
[0 , 0 , 0 , 0 , a8_5, a8_6, a8_7, 0],
]
b = [
[0, 0, 0, 0, 0, b6, b7, b8],
]
a, b = gen_first_col_exp(a,b,ci,φ)
a = [[float(val) for val in row] for row in a]
b = [[float(val) for val in row] for row in b]
ci = [c1, c2, c3, c4, c5, c6, c7, c8]
case "res_10s":
c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 = 0, 1/2, 1/2, 1/3, 1/2, 1/3, 1/4, 3/10, 3/4, 1
ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10]
#φ = Phi(h, ci, analytic_solution=use_analytic_solution)
ci = [mpf(c_val) for c_val in ci]
c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 = [c_val for c_val in ci]
φ = Phi(mpf(h.item()), ci, analytic_solution=use_analytic_solution)
a3_2 = (c3**2 / c2) * φ(2,3)
a4_2 = (c4**2 / c2) * φ(2,4)
b8 = (c9*c10*φ(2) - 2*(c9+c10)*φ(3) + 6*φ(4)) / (c8 * (c8-c9) * (c8-c10))
b9 = (c8*c10*φ(2) - 2*(c8+c10)*φ(3) + 6*φ(4)) / (c9 * (c9-c8) * (c9-c10))
b10 = (c8*c9*φ(2) - 2*(c8+c9) *φ(3) + 6*φ(4)) / (c10 * (c10-c8) * (c10-c9))
a = [
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, a3_2, 0, 0, 0, 0, 0, 0, 0, 0],
[0, a4_2, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]
b = [
[0, 0, 0, 0, 0, 0, 0, b8, b9, b10],
]
# a5_3, a5_4
# a6_3, a6_4
# a7_3, a7_4
for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k
jk = [(3, 4), (4, 3)]
jk = list(permutations([3, 4], 2))
for j,k in jk:
a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]))
for i in range(8, 11): # i=8,9,10 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff
jkl = list(permutations([5, 6, 7], 3))
for j,k,l in jkl:
a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1]))
gen_first_col_exp(a, b, ci, φ)
a = [[float(val) for val in row] for row in a]
b = [[float(val) for val in row] for row in b]
c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 = 0, 1/2, 1/2, 1/3, 1/2, 1/3, 1/4, 3/10, 3/4, 1
ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10]
case "res_15s":
c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15 = 0, 1/2, 1/2, 1/3, 1/2, 1/5, 1/4, 18/25, 1/3, 3/10, 1/6, 90/103, 1/3, 3/10, 1/5
c1 = 0
c2 = c3 = c5 = 1/2
c4 = c9 = c13 = 1/3
c6 = c15 = 1/5
c7 = 1/4
c8 = 18/25
c10 = c14 = 3/10
c11 = 1/6
c12 = 90/103
c15 = 1/5
ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15]
ci = [mpf(c_val) for c_val in ci]
φ = Phi(mpf(h.item()), ci, analytic_solution=use_analytic_solution)
a = [[mpf(0) for _ in range(15)] for _ in range(15)]
b = [[mpf(0) for _ in range(15)]]
for i in range(3, 5): # i=3,4 j=2
j=2
a[i-1][j-1] = (ci[i-1]**2 / ci[j-1]) * φ(j,i)
for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k
jk = list(permutations([3, 4], 2))
for j,k in jk:
a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / prod_diff(ci[j-1], ci[k-1])
for i in range(8, 12): # i=8,9,10,11 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff
jkl = list(permutations([5, 6, 7], 3))
for j,k,l in jkl:
a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1]))
for i in range(12,16): # i=12,13,14,15
jkld = list(permutations([8,9,10,11], 4))
for j,k,l,d in jkld:
numerator = -ci[i-1]**2 * ci[d-1]*ci[k-1]*ci[l-1] * φ(2,i) + 2*ci[i-1]**3 * (ci[d-1]*ci[k-1] + ci[d-1]*ci[l-1] + ci[k-1]*ci[l-1]) * φ(3,i) - 6*ci[i-1]**4 * (ci[d-1] + ci[k-1] + ci[l-1]) * φ(4,i) + 24*ci[i-1]**5 * φ(5,i)
a[i-1][j-1] = numerator / prod_diff(ci[j-1], ci[k-1], ci[l-1], ci[d-1])
"""ijkl = list(permutations([12,13,14,15], 4))
for i,j,k,l in ijkl:
#numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5)
#b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])
for jjj in range (2, 6): # 2,3,4,5
b[0][i-1] += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj)
b[0][i-1] /= prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])"""
ijkl = list(permutations([12,13,14,15], 4))
for i,j,k,l in ijkl:
numerator = 0
for jjj in range(2, 6): # 2, 3, 4, 5
numerator += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj)
#print(i,j,k,l)
b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])
ijkl = list(permutations([12, 13, 14, 15], 4))
selected_permutations = {}
sign = 1
for i in range(12, 16):
results = []
for j, k, l, d in ijkl:
if i != j and i != k and i != l and i != d:
numerator = 0
for jjj in range(2, 6): # 2, 3, 4, 5
numerator += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj)
theta_value = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])
results.append((theta_value, (i, j, k, l, d)))
results.sort(key=lambda x: abs(x[0]))
for theta_value, permutation in results:
if sign == 1 and theta_value > 0:
selected_permutations[i] = (theta_value, permutation)
sign *= -1
break
elif sign == -1 and theta_value < 0:
selected_permutations[i] = (theta_value, permutation)
sign *= -1
break
for i in range(12, 16):
if i in selected_permutations:
theta_value, (i, j, k, l, d) = selected_permutations[i]
b[0][i-1] = theta_value
for i in selected_permutations:
theta_value, permutation = selected_permutations[i]
print(f"i={i}")
print(f" Selected Theta: {theta_value:.6f}, Permutation: {permutation}")
gen_first_col_exp(a, b, ci, φ)
a = [[float(val) for val in row] for row in a]
b = [[float(val) for val in row] for row in b]
ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15]
case "res_16s": # 6th order without weakened order conditions
c1 = 0
c2 = c3 = c5 = c8 = c12 = 1/2
c4 = c11 = c15 = 1/3
c6 = c9 = c13 = 1/5
c7 = c10 = c14 = 1/4
c16 = 1
ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16]
ci = [mpf(c_val) for c_val in ci]
φ = Phi(mpf(h.item()), ci, analytic_solution=use_analytic_solution)
a3_2 = (1/2) * φ(2,3)
a = [[mpf(0) for _ in range(16)] for _ in range(16)]
b = [[mpf(0) for _ in range(16)]]
for i in range(3, 5): # i=3,4 j=2
j=2
a[i-1][j-1] = (ci[i-1]**2 / ci[j-1]) * φ(j,i)
for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k
jk = list(permutations([3, 4], 2))
for j,k in jk:
a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / prod_diff(ci[j-1], ci[k-1])
for i in range(8, 12): # i=8,9,10,11 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff
jkl = list(permutations([5, 6, 7], 3))
for j,k,l in jkl:
a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1]))
for i in range(12,17): # i=12,13,14,15,16
jkld = list(permutations([8,9,10,11], 4))
for j,k,l,d in jkld:
numerator = -ci[i-1]**2 * ci[d-1]*ci[k-1]*ci[l-1] * φ(2,i) + 2*ci[i-1]**3 * (ci[d-1]*ci[k-1] + ci[d-1]*ci[l-1] + ci[k-1]*ci[l-1]) * φ(3,i) - 6*ci[i-1]**4 * (ci[d-1] + ci[k-1] + ci[l-1]) * φ(4,i) + 24*ci[i-1]**5 * φ(5,i)
a[i-1][j-1] = numerator / prod_diff(ci[j-1], ci[k-1], ci[l-1], ci[d-1])
"""ijdkl = list(permutations([12,13,14,15,16], 5))
for i,j,d,k,l in ijdkl:
#numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5)
b[0][i-1] = theta(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6)
#b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])"""
ijdkl = list(permutations([12,13,14,15,16], 5))
for i,j,d,k,l in ijdkl:
#numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5)
#numerator = theta_numerator(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta_numerator(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta_numerator(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta_numerator(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta_numerator(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6)
#b[0][i-1] = numerator / (ci[i-1] *, ci[d-1], ci[j-1], ci[k-1], ci[l-1])
#b[0][i-1] = numerator / denominator(ci[i-1], ci[d-1], ci[j-1], ci[k-1], ci[l-1])
b[0][i-1] = theta(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6)
ijdkl = list(permutations([12,13,14,15,16], 5))
for i,j,d,k,l in ijdkl:
numerator = 0
for jjj in range(2, 7): # 2, 3, 4, 5, 6
numerator += theta_numerator(jjj, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(jjj)
#print(i,j,d,k,l)
b[0][i-1] = numerator / (ci[i-1] * (ci[i-1] - ci[k-1]) * (ci[i-1] - ci[j-1] * (ci[i-1] - ci[d-1]) * (ci[i-1] - ci[l-1])))
gen_first_col_exp(a, b, ci, φ)
a = [[float(val) for val in row] for row in a]
b = [[float(val) for val in row] for row in b]
ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16]
case "irk_exp_diag_2s":
c1 = 1/3
c2 = 2/3
c1 = float(get_extra_options_kv("c1", str(c1), extra_options))
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
lam = (1 - torch.exp(-c1 * h)) / h
a2_1 = ( torch.exp(c2*h) - torch.exp(c1*h)) / (h * torch.exp(2*c1*h))
b1 = (1 + c2*h + torch.exp(h) * (-1 + h - c2*h)) / ((c1-c2) * h**2 * torch.exp(c1*h))
b2 = -(1 + c1*h - torch.exp(h) * ( 1 - h + c1*h)) / ((c1-c2) * h**2 * torch.exp(c2*h))
a = [
[lam, 0],
[a2_1, lam],
]
b = [
[b1, b2],
]
ci = [c1, c2]
ci = ci[:]
#if rk_type.startswith("lob") == False:
ci.append(1)
if EO("exp2lin_override_coeff") and is_exponential(rk_type):
a = scale_all(a, -sigma.item())
b = scale_all(b, -sigma.item())
return a, b, u, v, ci, multistep_stages, hybrid_stages, FSAL
def scale_all(data, scalar):
if isinstance(data, torch.Tensor):
return data * scalar
elif isinstance(data, list):
return [scale_all(x, scalar) for x in data]
elif isinstance(data, (float, int)):
return data * scalar
else:
return data # passthrough unscaled if unknown type... or None, etc
def gen_first_col_exp(a, b, c, φ):
for i in range(len(c)):
a[i][0] = c[i] * φ(1,i+1) - sum(a[i])
for i in range(len(b)):
b[i][0] = φ(1) - sum(b[i])
return a, b
def gen_first_col_exp_uv(a, b, c, u, v, φ):
for i in range(len(c)):
a[i][0] = c[i] * φ(1,i+1) - sum(a[i]) - sum(u[i])
for i in range(len(b)):
b[i][0] = φ(1) - sum(b[i]) - sum(v[i])
return a, b
def rho(j, ci, ck, cl):
if j == 2:
numerator = ck*cl
if j == 3:
numerator = (-2 * (ck + cl))
if j == 4:
numerator = 6
return numerator / denominator(ci, ck, cl)
def mu(j, cd, ci, ck, cl):
if j == 2:
numerator = -cd * ck * cl
if j == 3:
numerator = 2 * (cd * ck + cd * cl + ck * cl)
if j == 4:
numerator = -6 * (cd + ck + cl)
if j == 5:
numerator = 24
return numerator / denominator(ci, cd, ck, cl)
def mu_numerator(j, cd, ci, ck, cl):
if j == 2:
numerator = -cd * ck * cl
if j == 3:
numerator = 2 * (cd * ck + cd * cl + ck * cl)
if j == 4:
numerator = -6 * (cd + ck + cl)
if j == 5:
numerator = 24
return numerator #/ denominator(ci, cd, ck, cl)
def theta_numerator(j, cd, ci, ck, cj, cl):
if j == 2:
numerator = -cj * cd * ck * cl
if j == 3:
numerator = 2 * (cj * ck * cd + cj*ck*cl + ck*cd*cl + cd*cl*cj)
if j == 4:
numerator = -6*(cj*ck + cj*cd + cj*cl + ck*cd + ck*cl + cd*cl)
if j == 5:
numerator = 24 * (cj + ck + cl + cd)
if j == 6:
numerator = -120
return numerator # / denominator(ci, cj, ck, cl, cd)
def theta(j, cd, ci, ck, cj, cl):
if j == 2:
numerator = -cj * cd * ck * cl
if j == 3:
numerator = 2 * (cj * ck * cd + cj*ck*cl + ck*cd*cl + cd*cl*cj)
if j == 4:
numerator = -6*(cj*ck + cj*cd + cj*cl + ck*cd + ck*cl + cd*cl)
if j == 5:
numerator = 24 * (cj + ck + cl + cd)
if j == 6:
numerator = -120
return numerator / ( ci * (ci - cj) * (ci - ck) * (ci - cl) * (ci - cd))
return numerator / denominator(ci, cj, ck, cl, cd)
def prod_diff(cj, ck, cl=None, cd=None):
if cl is None and cd is None:
return cj * (cj - ck)
if cd is None:
return cj * (cj - ck) * (cj - cl)
else:
return cj * (cj - ck) * (cj - cl) * (cj - cd)
def denominator(ci, *args):
result = ci
for arg in args:
result *= (ci - arg)
return result
def check_condition_4_2(nodes):
c12, c13, c14, c15 = nodes
term_1 = (1 / 5) * (c12 + c13 + c14 + c15)
term_2 = (1 / 4) * (c12 * c13 + c12 * c14 + c12 * c15 + c13 * c14 + c13 * c15 + c14 * c15)
term_3 = (1 / 3) * (c12 * c13 * c14 + c12 * c13 * c15 + c12 * c14 * c15 + c13 * c14 * c15)
term_4 = (1 / 2) * (c12 * c13 * c14 * c15)
result = term_1 - term_2 + term_3 - term_4
return abs(result - (1 / 6)) < 1e-6
================================================
FILE: beta/rk_guide_func_beta.py
================================================
import torch
import torch.nn.functional as F
from torch import Tensor
import itertools
import copy
from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar
if TYPE_CHECKING:
from .noise_classes import NoiseGenerator
NoiseGeneratorSubclass = TypeVar("NoiseGeneratorSubclass", bound="NoiseGenerator")
from einops import rearrange
from ..sigmas import get_sigmas
from ..helper import ExtraOptions, FrameWeightsManager, initialize_or_scale, is_video_model
from ..latents import normalize_zscore, get_collinear, get_orthogonal, get_cosine_similarity, get_pearson_similarity, \
get_slerp_weight_for_cossim, normalize_latent, hard_light_blend, slerp_tensor, get_orthogonal_noise_from_channelwise, get_edge_mask
from .rk_method_beta import RK_Method_Beta
from .constants import MAX_STEPS
from ..models import PRED
#from ..latents import hard_light_blend, normalize_latent
class LatentGuide:
def __init__(self,
model,
sigmas : Tensor,
UNSAMPLE : bool,
VE_MODEL : bool,
LGW_MASK_RESCALE_MIN : bool,
extra_options : str,
device : str = 'cpu',
dtype : torch.dtype = torch.float64,
frame_weights_mgr : FrameWeightsManager = None,
):
self.dtype = dtype
self.device = device
self.model = model
if hasattr(model, "model"):
model_sampling = model.model.model_sampling
elif hasattr(model, "inner_model"):
model_sampling = model.inner_model.inner_model.model_sampling
self.sigma_min = model_sampling.sigma_min.to(dtype=dtype, device=device)
self.sigma_max = model_sampling.sigma_max.to(dtype=dtype, device=device)
self.sigmas = sigmas .to(dtype=dtype, device=device)
self.UNSAMPLE = UNSAMPLE
self.VE_MODEL = VE_MODEL
self.VIDEO = is_video_model(model)
self.SAMPLE = (sigmas[0] > sigmas[1]) # type torch.bool
self.y0 = None
self.y0_inv = None
self.y0_mean = None
self.y0_adain = None
self.y0_attninj = None
self.y0_style_pos = None
self.y0_style_neg = None
self.guide_mode = ""
self.max_steps = MAX_STEPS
self.mask = None
self.mask_inv = None
self.mask_sync = None
self.mask_drift_x = None
self.mask_drift_y = None
self.mask_lure_x = None
self.mask_lure_y = None
self.mask_mean = None
self.mask_adain = None
self.mask_attninj = None
self.mask_style_pos = None
self.mask_style_neg = None
self.x_lying_ = None
self.s_lying_ = None
self.LGW_MASK_RESCALE_MIN = LGW_MASK_RESCALE_MIN
self.HAS_LATENT_GUIDE = False
self.HAS_LATENT_GUIDE_INV = False
self.HAS_LATENT_GUIDE_MEAN = False
self.HAS_LATENT_GUIDE_ADAIN = False
self.HAS_LATENT_GUIDE_ATTNINJ = False
self.HAS_LATENT_GUIDE_STYLE_POS= False
self.HAS_LATENT_GUIDE_STYLE_NEG= False
self.lgw = torch.full_like(sigmas, 0., dtype=dtype)
self.lgw_inv = torch.full_like(sigmas, 0., dtype=dtype)
self.lgw_mean = torch.full_like(sigmas, 0., dtype=dtype)
self.lgw_adain = torch.full_like(sigmas, 0., dtype=dtype)
self.lgw_attninj = torch.full_like(sigmas, 0., dtype=dtype)
self.lgw_style_pos = torch.full_like(sigmas, 0., dtype=dtype)
self.lgw_style_neg = torch.full_like(sigmas, 0., dtype=dtype)
self.cossim_tgt = torch.full_like(sigmas, 0., dtype=dtype)
self.cossim_tgt_inv = torch.full_like(sigmas, 0., dtype=dtype)
self.guide_cossim_cutoff_ = 1.0
self.guide_bkg_cossim_cutoff_ = 1.0
self.guide_mean_cossim_cutoff_ = 1.0
self.guide_adain_cossim_cutoff_ = 1.0
self.guide_attninj_cossim_cutoff_ = 1.0
self.guide_style_pos_cossim_cutoff_= 1.0
self.guide_style_neg_cossim_cutoff_= 1.0
self.frame_weights_mgr = frame_weights_mgr
self.frame_weights = None
self.frame_weights_inv = None
#self.freqsep_lowpass_method = "none"
#self.freqsep_sigma = 0.
#self.freqsep_kernel_size = 0
self.extra_options = extra_options
self.EO = ExtraOptions(extra_options)
def init_guides(self,
x : Tensor,
RK_IMPLICIT : bool,
guides : Optional[Tensor] = None,
noise_sampler : Optional["NoiseGeneratorSubclass"] = None,
batch_num : int = 0,
sigma_init = None,
guide_inversion_y0 = None,
guide_inversion_y0_inv = None,
) -> Tensor:
latent_guide_weight = 0.0
latent_guide_weight_inv = 0.0
latent_guide_weight_sync = 0.0
latent_guide_weight_sync_inv = 0.0
latent_guide_weight_drift_x = 0.0
latent_guide_weight_drift_x_inv = 0.0
latent_guide_weight_drift_y = 0.0
latent_guide_weight_drift_y_inv = 0.0
latent_guide_weight_lure_x = 0.0
latent_guide_weight_lure_x_inv = 0.0
latent_guide_weight_lure_y = 0.0
latent_guide_weight_lure_y_inv = 0.0
latent_guide_weight_mean = 0.0
latent_guide_weight_adain = 0.0
latent_guide_weight_attninj = 0.0
latent_guide_weight_style_pos = 0.0
latent_guide_weight_style_neg = 0.0
latent_guide_weights = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_sync = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_sync_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_drift_x = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_drift_x_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_drift_y = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_drift_y_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_lure_x = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_lure_x_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_lure_y = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_lure_y_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_mean = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_adain = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_attninj = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_style_pos = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide_weights_style_neg = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device)
latent_guide = None
latent_guide_inv = None
latent_guide_mean = None
latent_guide_adain = None
latent_guide_attninj = None
latent_guide_style_pos = None
latent_guide_style_neg = None
self.drift_x_data = 0.0
self.drift_x_sync = 0.0
self.drift_y_data = 0.0
self.drift_y_sync = 0.0
self.drift_y_guide = 0.0
if guides is not None:
self.guide_mode = guides.get("guide_mode", "none")
if self.guide_mode.startswith("inversion"):
self.guide_mode = self.guide_mode.replace("inversion", "epsilon", 1)
else:
self.SAMPLE = True
self.UNSAMPLE = False
latent_guide_weight = guides.get("weight_masked", 0.)
latent_guide_weight_inv = guides.get("weight_unmasked", 0.)
latent_guide_weight_sync = guides.get("weight_masked_sync", 0.)
latent_guide_weight_sync_inv = guides.get("weight_unmasked_sync", 0.)
latent_guide_weight_drift_x = guides.get("weight_masked_drift_x", 0.)
latent_guide_weight_drift_x_inv = guides.get("weight_unmasked_drift_x", 0.)
latent_guide_weight_drift_y = guides.get("weight_masked_drift_y", 0.)
latent_guide_weight_drift_y_inv = guides.get("weight_unmasked_drift_y", 0.)
latent_guide_weight_lure_x = guides.get("weight_masked_lure_x", 0.)
latent_guide_weight_lure_x_inv = guides.get("weight_unmasked_lure_x", 0.)
latent_guide_weight_lure_y = guides.get("weight_masked_lure_y", 0.)
latent_guide_weight_lure_y_inv = guides.get("weight_unmasked_lure_y", 0.)
latent_guide_weight_mean = guides.get("weight_mean", 0.)
latent_guide_weight_adain = guides.get("weight_adain", 0.)
latent_guide_weight_attninj = guides.get("weight_attninj", 0.)
latent_guide_weight_style_pos = guides.get("weight_style_pos", 0.)
latent_guide_weight_style_neg = guides.get("weight_style_neg", 0.)
#latent_guide_synweight_style_pos = guides.get("synweight_style_pos", 0.)
#latent_guide_synweight_style_neg = guides.get("synweight_style_neg", 0.)
self.drift_x_data = guides.get("drift_x_data", 0.)
self.drift_x_sync = guides.get("drift_x_sync", 0.)
self.drift_y_data = guides.get("drift_y_data", 0.)
self.drift_y_sync = guides.get("drift_y_sync", 0.)
self.drift_y_guide = guides.get("drift_y_guide", 0.)
latent_guide_weights = guides.get("weights_masked")
latent_guide_weights_inv = guides.get("weights_unmasked")
latent_guide_weights_sync = guides.get("weights_masked_sync")
latent_guide_weights_sync_inv = guides.get("weights_unmasked_sync")
latent_guide_weights_drift_x = guides.get("weights_masked_drift_x")
latent_guide_weights_drift_x_inv = guides.get("weights_unmasked_drift_x")
latent_guide_weights_drift_y = guides.get("weights_masked_drift_y")
latent_guide_weights_drift_y_inv = guides.get("weights_unmasked_drift_y")
latent_guide_weights_lure_x = guides.get("weights_masked_lure_x")
latent_guide_weights_lure_x_inv = guides.get("weights_unmasked_lure_x")
latent_guide_weights_lure_y = guides.get("weights_masked_lure_y")
latent_guide_weights_lure_y_inv = guides.get("weights_unmasked_lure_y")
latent_guide_weights_mean = guides.get("weights_mean")
latent_guide_weights_adain = guides.get("weights_adain")
latent_guide_weights_attninj = guides.get("weights_attninj")
latent_guide_weights_style_pos = guides.get("weights_style_pos")
latent_guide_weights_style_neg = guides.get("weights_style_neg")
#latent_guide_synweights_style_p os = guides.get("synweights_style_pos")
#latent_guide_synweights_style_neg = guides.get("synweights_style_neg")
latent_guide = guides.get("guide_masked")
latent_guide_inv = guides.get("guide_unmasked")
latent_guide_mean = guides.get("guide_mean")
latent_guide_adain = guides.get("guide_adain")
latent_guide_attninj = guides.get("guide_attninj")
latent_guide_style_pos = guides.get("guide_style_pos")
latent_guide_style_neg = guides.get("guide_style_neg")
self.mask = guides.get("mask")
self.mask_inv = guides.get("unmask")
self.mask_sync = guides.get("mask_sync")
self.mask_drift_x = guides.get("mask_drift_x")
self.mask_drift_y = guides.get("mask_drift_y")
self.mask_lure_x = guides.get("mask_lure_x")
self.mask_lure_y = guides.get("mask_lure_y")
self.mask_mean = guides.get("mask_mean")
self.mask_adain = guides.get("mask_adain")
self.mask_attninj = guides.get("mask_attninj")
self.mask_style_pos = guides.get("mask_style_pos")
self.mask_style_neg = guides.get("mask_style_neg")
scheduler_ = guides.get("weight_scheduler_masked")
scheduler_inv_ = guides.get("weight_scheduler_unmasked")
scheduler_sync_ = guides.get("weight_scheduler_masked_sync")
scheduler_sync_inv_ = guides.get("weight_scheduler_unmasked_sync")
scheduler_drift_x_ = guides.get("weight_scheduler_masked_drift_x")
scheduler_drift_x_inv_ = guides.get("weight_scheduler_unmasked_drift_x")
scheduler_drift_y_ = guides.get("weight_scheduler_masked_drift_y")
scheduler_drift_y_inv_ = guides.get("weight_scheduler_unmasked_drift_y")
scheduler_lure_x_ = guides.get("weight_scheduler_masked_lure_x")
scheduler_lure_x_inv_ = guides.get("weight_scheduler_unmasked_lure_x")
scheduler_lure_y_ = guides.get("weight_scheduler_masked_lure_y")
scheduler_lure_y_inv_ = guides.get("weight_scheduler_unmasked_lure_y")
scheduler_mean_ = guides.get("weight_scheduler_mean")
scheduler_adain_ = guides.get("weight_scheduler_adain")
scheduler_attninj_ = guides.get("weight_scheduler_attninj")
scheduler_style_pos_ = guides.get("weight_scheduler_style_pos")
scheduler_style_neg_ = guides.get("weight_scheduler_style_neg")
start_steps_ = guides.get("start_step_masked", 0)
start_steps_inv_ = guides.get("start_step_unmasked", 0)
start_steps_sync_ = guides.get("start_step_masked_sync", 0)
start_steps_sync_inv_ = guides.get("start_step_unmasked_sync", 0)
start_steps_drift_x_ = guides.get("start_step_masked_drift_x", 0)
start_steps_drift_x_inv_ = guides.get("start_step_unmasked_drift_x", 0)
start_steps_drift_y_ = guides.get("start_step_masked_drift_y", 0)
start_steps_drift_y_inv_ = guides.get("start_step_unmasked_drift_y", 0)
start_steps_lure_x_ = guides.get("start_step_masked_lure_x", 0)
start_steps_lure_x_inv_ = guides.get("start_step_unmasked_lure_x", 0)
start_steps_lure_y_ = guides.get("start_step_masked_lure_y", 0)
start_steps_lure_y_inv_ = guides.get("start_step_unmasked_lure_y", 0)
start_steps_mean_ = guides.get("start_step_mean", 0)
start_steps_adain_ = guides.get("start_step_adain", 0)
start_steps_attninj_ = guides.get("start_step_attninj", 0)
start_steps_style_pos_ = guides.get("start_step_style_pos", 0)
start_steps_style_neg_ = guides.get("start_step_style_neg", 0)
steps_ = guides.get("end_step_masked", 1)
steps_inv_ = guides.get("end_step_unmasked", 1)
steps_sync_ = guides.get("end_step_masked_sync", 1)
steps_sync_inv_ = guides.get("end_step_unmasked_sync", 1)
steps_drift_x_ = guides.get("end_step_masked_drift_x", 1)
steps_drift_x_inv_ = guides.get("end_step_unmasked_drift_x", 1)
steps_drift_y_ = guides.get("end_step_masked_drift_y", 1)
steps_drift_y_inv_ = guides.get("end_step_unmasked_drift_y", 1)
steps_lure_x_ = guides.get("end_step_masked_lure_x", 1)
steps_lure_x_inv_ = guides.get("end_step_unmasked_lure_x", 1)
steps_lure_y_ = guides.get("end_step_masked_lure_y", 1)
steps_lure_y_inv_ = guides.get("end_step_unmasked_lure_y", 1)
steps_mean_ = guides.get("end_step_mean", 1)
steps_adain_ = guides.get("end_step_adain", 1)
steps_attninj_ = guides.get("end_step_attninj", 1)
steps_style_pos_ = guides.get("end_step_style_pos", 1)
steps_style_neg_ = guides.get("end_step_style_neg", 1)
self.guide_cossim_cutoff_ = guides.get("cutoff_masked", 1.)
self.guide_bkg_cossim_cutoff_ = guides.get("cutoff_unmasked", 1.)
self.guide_mean_cossim_cutoff_ = guides.get("cutoff_mean", 1.)
self.guide_adain_cossim_cutoff_ = guides.get("cutoff_adain", 1.)
self.guide_attninj_cossim_cutoff_ = guides.get("cutoff_attninj", 1.)
self.guide_style_pos_cossim_cutoff_ = guides.get("cutoff_style_pos", 1.)
self.guide_style_neg_cossim_cutoff_ = guides.get("cutoff_style_neg", 1.)
self.sync_lure_iter = guides.get("sync_lure_iter", 0)
self.sync_lure_sequence = guides.get("sync_lure_sequence")
#self.SYNC_SEPARATE = False
#if scheduler_sync_ is not None:
# self.SYNC_SEPARATE = True
self.SYNC_SEPARATE = True
if scheduler_sync_ is None and scheduler_ is not None:
latent_guide_weight_sync = latent_guide_weight
latent_guide_weight_sync_inv = latent_guide_weight_inv
latent_guide_weights_sync = latent_guide_weights
latent_guide_weights_sync_inv = latent_guide_weights_inv
scheduler_sync_ = scheduler_
scheduler_sync_inv_ = scheduler_inv_
start_steps_sync_ = start_steps_
start_steps_sync_inv_ = start_steps_inv_
steps_sync_ = steps_
steps_sync_inv_ = steps_inv_
self.SYNC_drift_X = True
if scheduler_drift_x_ is None and scheduler_ is not None:
self.SYNC_drift_X = False
latent_guide_weight_drift_x = latent_guide_weight
latent_guide_weight_drift_x_inv = latent_guide_weight_inv
latent_guide_weights_drift_x = latent_guide_weights
latent_guide_weights_drift_x_inv = latent_guide_weights_inv
scheduler_drift_x_ = scheduler_
scheduler_drift_x_inv_ = scheduler_inv_
start_steps_drift_x_ = start_steps_
start_steps_drift_x_inv_ = start_steps_inv_
steps_drift_x_ = steps_
steps_drift_x_inv_ = steps_inv_
self.SYNC_drift_Y = True
if scheduler_drift_y_ is None and scheduler_ is not None:
self.SYNC_drift_Y = False
latent_guide_weight_drift_y = latent_guide_weight
latent_guide_weight_drift_y_inv = latent_guide_weight_inv
latent_guide_weights_drift_y = latent_guide_weights
latent_guide_weights_drift_y_inv = latent_guide_weights_inv
scheduler_drift_y_ = scheduler_
scheduler_drift_y_inv_ = scheduler_inv_
start_steps_drift_y_ = start_steps_
start_steps_drift_y_inv_ = start_steps_inv_
steps_drift_y_ = steps_
steps_drift_y_inv_ = steps_inv_
self.SYNC_LURE_X = True
if scheduler_lure_x_ is None and scheduler_ is not None:
self.SYNC_LURE_X = False
latent_guide_weight_lure_x = latent_guide_weight
latent_guide_weight_lure_x_inv = latent_guide_weight_inv
latent_guide_weights_lure_x = latent_guide_weights
latent_guide_weights_lure_x_inv = latent_guide_weights_inv
scheduler_lure_x_ = scheduler_
scheduler_lure_x_inv_ = scheduler_inv_
start_steps_lure_x_ = start_steps_
start_steps_lure_x_inv_ = start_steps_inv_
steps_lure_x_ = steps_
steps_lure_x_inv_ = steps_inv_
self.SYNC_LURE_Y = True
if scheduler_lure_y_ is None and scheduler_ is not None:
self.SYNC_LURE_Y = False
latent_guide_weight_lure_y = latent_guide_weight
latent_guide_weight_lure_y_inv = latent_guide_weight_inv
latent_guide_weights_lure_y = latent_guide_weights
latent_guide_weights_lure_y_inv = latent_guide_weights_inv
scheduler_lure_y_ = scheduler_
scheduler_lure_y_inv_ = scheduler_inv_
start_steps_lure_y_ = start_steps_
start_steps_lure_y_inv_ = start_steps_inv_
steps_lure_y_ = steps_
steps_lure_y_inv_ = steps_inv_
if self.mask is not None and self.mask.shape [0] > 1 and self.VIDEO is False:
self.mask = self.mask [batch_num].unsqueeze(0)
if self.mask_inv is not None and self.mask_inv.shape[0] > 1 and self.VIDEO is False:
self.mask_inv = self.mask_inv[batch_num].unsqueeze(0)
if self.mask_sync is not None and self.mask_sync.shape[0] > 1 and self.VIDEO is False:
self.mask_sync = self.mask_sync[batch_num].unsqueeze(0)
if self.mask_drift_x is not None and self.mask_drift_x.shape[0] > 1 and self.VIDEO is False:
self.mask_drift_x = self.mask_drift_x[batch_num].unsqueeze(0)
if self.mask_drift_y is not None and self.mask_drift_y.shape[0] > 1 and self.VIDEO is False:
self.mask_drift_y = self.mask_drift_y[batch_num].unsqueeze(0)
if self.mask_lure_x is not None and self.mask_lure_x.shape[0] > 1 and self.VIDEO is False:
self.mask_lure_x = self.mask_lure_x[batch_num].unsqueeze(0)
if self.mask_lure_y is not None and self.mask_lure_y.shape[0] > 1 and self.VIDEO is False:
self.mask_lure_y = self.mask_lure_y[batch_num].unsqueeze(0)
if self.guide_mode.startswith("fully_") and not RK_IMPLICIT:
self.guide_mode = self.guide_mode[6:] # fully_pseudoimplicit is only supported for implicit samplers, default back to pseudoimplicit
guide_sigma_shift = self.EO("guide_sigma_shift", 0.0) # effectively hardcoding shift to 0 !!!!!!
if latent_guide_weights is None and scheduler_ is not None:
total_steps = steps_ - start_steps_
latent_guide_weights = get_sigmas(self.model, scheduler_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_, dtype=self.dtype, device=self.device)
latent_guide_weights = torch.cat((prepend, latent_guide_weights.to(self.device)), dim=0)
if latent_guide_weights_inv is None and scheduler_inv_ is not None:
total_steps = steps_inv_ - start_steps_inv_
latent_guide_weights_inv = get_sigmas(self.model, scheduler_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_inv_, dtype=self.dtype, device=self.device)
latent_guide_weights_inv = torch.cat((prepend, latent_guide_weights_inv.to(self.device)), dim=0)
if latent_guide_weights_sync is None and scheduler_sync_ is not None:
total_steps = steps_sync_ - start_steps_sync_
latent_guide_weights_sync = get_sigmas(self.model, scheduler_sync_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_sync_, dtype=self.dtype, device=self.device)
latent_guide_weights_sync = torch.cat((prepend, latent_guide_weights_sync.to(self.device)), dim=0)
if latent_guide_weights_sync_inv is None and scheduler_sync_inv_ is not None:
total_steps = steps_sync_inv_ - start_steps_sync_inv_
latent_guide_weights_sync_inv = get_sigmas(self.model, scheduler_sync_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_sync_inv_, dtype=self.dtype, device=self.device)
latent_guide_weights_sync_inv = torch.cat((prepend, latent_guide_weights_sync_inv.to(self.device)), dim=0)
if latent_guide_weights_drift_x is None and scheduler_drift_x_ is not None:
total_steps = steps_drift_x_ - start_steps_drift_x_
latent_guide_weights_drift_x = get_sigmas(self.model, scheduler_drift_x_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_drift_x_, dtype=self.dtype, device=self.device)
latent_guide_weights_drift_x = torch.cat((prepend, latent_guide_weights_drift_x.to(self.device)), dim=0)
if latent_guide_weights_drift_x_inv is None and scheduler_drift_x_inv_ is not None:
total_steps = steps_drift_x_inv_ - start_steps_drift_x_inv_
latent_guide_weights_drift_x_inv = get_sigmas(self.model, scheduler_drift_x_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_drift_x_inv_, dtype=self.dtype, device=self.device)
latent_guide_weights_drift_x_inv = torch.cat((prepend, latent_guide_weights_drift_x_inv.to(self.device)), dim=0)
if latent_guide_weights_drift_y is None and scheduler_drift_y_ is not None:
total_steps = steps_drift_y_ - start_steps_drift_y_
latent_guide_weights_drift_y = get_sigmas(self.model, scheduler_drift_y_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_drift_y_, dtype=self.dtype, device=self.device)
latent_guide_weights_drift_y = torch.cat((prepend, latent_guide_weights_drift_y.to(self.device)), dim=0)
if latent_guide_weights_drift_y_inv is None and scheduler_drift_y_inv_ is not None:
total_steps = steps_drift_y_inv_ - start_steps_drift_y_inv_
latent_guide_weights_drift_y_inv = get_sigmas(self.model, scheduler_drift_y_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_drift_y_inv_, dtype=self.dtype, device=self.device)
latent_guide_weights_drift_y_inv = torch.cat((prepend, latent_guide_weights_drift_y_inv.to(self.device)), dim=0)
if latent_guide_weights_lure_x is None and scheduler_lure_x_ is not None:
total_steps = steps_lure_x_ - start_steps_lure_x_
latent_guide_weights_lure_x = get_sigmas(self.model, scheduler_lure_x_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_lure_x_, dtype=self.dtype, device=self.device)
latent_guide_weights_lure_x = torch.cat((prepend, latent_guide_weights_lure_x.to(self.device)), dim=0)
if latent_guide_weights_lure_x_inv is None and scheduler_lure_x_inv_ is not None:
total_steps = steps_lure_x_inv_ - start_steps_lure_x_inv_
latent_guide_weights_lure_x_inv = get_sigmas(self.model, scheduler_lure_x_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_lure_x_inv_, dtype=self.dtype, device=self.device)
latent_guide_weights_lure_x_inv = torch.cat((prepend, latent_guide_weights_lure_x_inv.to(self.device)), dim=0)
if latent_guide_weights_lure_y is None and scheduler_lure_y_ is not None:
total_steps = steps_lure_y_ - start_steps_lure_y_
latent_guide_weights_lure_y = get_sigmas(self.model, scheduler_lure_y_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_lure_y_, dtype=self.dtype, device=self.device)
latent_guide_weights_lure_y = torch.cat((prepend, latent_guide_weights_lure_y.to(self.device)), dim=0)
if latent_guide_weights_lure_y_inv is None and scheduler_lure_y_inv_ is not None:
total_steps = steps_lure_y_inv_ - start_steps_lure_y_inv_
latent_guide_weights_lure_y_inv = get_sigmas(self.model, scheduler_lure_y_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_lure_y_inv_, dtype=self.dtype, device=self.device)
latent_guide_weights_lure_y_inv = torch.cat((prepend, latent_guide_weights_lure_y_inv.to(self.device)), dim=0)
if latent_guide_weights_mean is None and scheduler_mean_ is not None:
total_steps = steps_mean_ - start_steps_mean_
latent_guide_weights_mean = get_sigmas(self.model, scheduler_mean_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_mean_, dtype=self.dtype, device=self.device)
latent_guide_weights_mean = torch.cat((prepend, latent_guide_weights_mean.to(self.device)), dim=0)
if latent_guide_weights_adain is None and scheduler_adain_ is not None:
total_steps = steps_adain_ - start_steps_adain_
latent_guide_weights_adain = get_sigmas(self.model, scheduler_adain_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_adain_, dtype=self.dtype, device=self.device)
latent_guide_weights_adain = torch.cat((prepend, latent_guide_weights_adain.to(self.device)), dim=0)
if latent_guide_weights_attninj is None and scheduler_attninj_ is not None:
total_steps = steps_attninj_ - start_steps_attninj_
latent_guide_weights_attninj = get_sigmas(self.model, scheduler_attninj_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_attninj_, dtype=self.dtype, device=self.device)
latent_guide_weights_attninj = torch.cat((prepend, latent_guide_weights_attninj.to(self.device)), dim=0)
if latent_guide_weights_style_pos is None and scheduler_style_pos_ is not None:
total_steps = steps_style_pos_ - start_steps_style_pos_
latent_guide_weights_style_pos = get_sigmas(self.model, scheduler_style_pos_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_style_pos_, dtype=self.dtype, device=self.device)
latent_guide_weights_style_pos = torch.cat((prepend, latent_guide_weights_style_pos.to(self.device)), dim=0)
if latent_guide_weights_style_neg is None and scheduler_style_neg_ is not None:
total_steps = steps_style_neg_ - start_steps_style_neg_
latent_guide_weights_style_neg = get_sigmas(self.model, scheduler_style_neg_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max
prepend = torch.zeros(start_steps_style_neg_, dtype=self.dtype, device=self.device)
latent_guide_weights_style_neg = torch.cat((prepend, latent_guide_weights_style_neg.to(self.device)), dim=0)
if scheduler_ != "constant":
latent_guide_weights = initialize_or_scale(latent_guide_weights, latent_guide_weight, self.max_steps)
if scheduler_inv_ != "constant":
latent_guide_weights_inv = initialize_or_scale(latent_guide_weights_inv, latent_guide_weight_inv, self.max_steps)
if scheduler_sync_ != "constant":
latent_guide_weights_sync = initialize_or_scale(latent_guide_weights_sync, latent_guide_weight_sync, self.max_steps)
if scheduler_sync_inv_ != "constant":
latent_guide_weights_sync_inv = initialize_or_scale(latent_guide_weights_sync_inv, latent_guide_weight_sync_inv, self.max_steps)
latent_guide_weights_sync = 1 - latent_guide_weights_sync if latent_guide_weights_sync is not None else latent_guide_weights
latent_guide_weights_sync_inv = 1 - latent_guide_weights_sync_inv if latent_guide_weights_sync_inv is not None else latent_guide_weights_inv
latent_guide_weight_sync = 1 - latent_guide_weight_sync
latent_guide_weight_sync_inv = 1 - latent_guide_weight_sync_inv# these are more intuitive to use if these are reversed... so that sync weight = 1.0 means "maximum guide strength"
if scheduler_drift_x_ != "constant":
latent_guide_weights_drift_x = initialize_or_scale(latent_guide_weights_drift_x, latent_guide_weight_drift_x, self.max_steps)
if scheduler_drift_x_inv_ != "constant":
latent_guide_weights_drift_x_inv = initialize_or_scale(latent_guide_weights_drift_x_inv, latent_guide_weight_drift_x_inv, self.max_steps)
if scheduler_drift_y_ != "constant":
latent_guide_weights_drift_y = initialize_or_scale(latent_guide_weights_drift_y, latent_guide_weight_drift_y, self.max_steps)
if scheduler_drift_y_inv_ != "constant":
latent_guide_weights_drift_y_inv = initialize_or_scale(latent_guide_weights_drift_y_inv, latent_guide_weight_drift_y_inv, self.max_steps)
if scheduler_lure_x_ != "constant":
latent_guide_weights_lure_x = initialize_or_scale(latent_guide_weights_lure_x, latent_guide_weight_lure_x, self.max_steps)
if scheduler_lure_x_inv_ != "constant":
latent_guide_weights_lure_x_inv = initialize_or_scale(latent_guide_weights_lure_x_inv, latent_guide_weight_lure_x_inv, self.max_steps)
if scheduler_lure_y_ != "constant":
latent_guide_weights_lure_y = initialize_or_scale(latent_guide_weights_lure_y, latent_guide_weight_lure_y, self.max_steps)
if scheduler_lure_y_inv_ != "constant":
latent_guide_weights_lure_y_inv = initialize_or_scale(latent_guide_weights_lure_y_inv, latent_guide_weight_lure_y_inv, self.max_steps)
if scheduler_mean_ != "constant":
latent_guide_weights_mean = initialize_or_scale(latent_guide_weights_mean, latent_guide_weight_mean, self.max_steps)
if scheduler_adain_ != "constant":
latent_guide_weights_adain = initialize_or_scale(latent_guide_weights_adain, latent_guide_weight_adain, self.max_steps)
if scheduler_attninj_ != "constant":
latent_guide_weights_attninj = initialize_or_scale(latent_guide_weights_attninj, latent_guide_weight_attninj, self.max_steps)
if scheduler_style_pos_ != "constant":
latent_guide_weights_style_pos = initialize_or_scale(latent_guide_weights_style_pos, latent_guide_weight_style_pos, self.max_steps)
if scheduler_style_neg_ != "constant":
latent_guide_weights_style_neg = initialize_or_scale(latent_guide_weights_style_neg, latent_guide_weight_style_neg, self.max_steps)
latent_guide_weights [steps_ :] = 0
latent_guide_weights_inv [steps_inv_ :] = 0
latent_guide_weights_sync [steps_sync_ :] = 1 #one
latent_guide_weights_sync_inv [steps_sync_inv_ :] = 1 #one
latent_guide_weights_drift_x [steps_drift_x_ :] = 0
latent_guide_weights_drift_x_inv[steps_drift_x_inv_:] = 0
latent_guide_weights_drift_y [steps_drift_y_ :] = 0
latent_guide_weights_drift_y_inv[steps_drift_y_inv_:] = 0
latent_guide_weights_lure_x [steps_lure_x_ :] = 0
latent_guide_weights_lure_x_inv [steps_lure_x_inv_ :] = 0
latent_guide_weights_lure_y [steps_lure_y_ :] = 0
latent_guide_weights_lure_y_inv [steps_lure_y_inv_ :] = 0
latent_guide_weights_mean [steps_mean_ :] = 0
latent_guide_weights_adain [steps_adain_ :] = 0
latent_guide_weights_attninj [steps_attninj_ :] = 0
latent_guide_weights_style_pos [steps_style_pos_ :] = 0
latent_guide_weights_style_neg [steps_style_neg_ :] = 0
self.lgw = F.pad(latent_guide_weights, (0, self.max_steps), value=0.0)
self.lgw_inv = F.pad(latent_guide_weights_inv, (0, self.max_steps), value=0.0)
self.lgw_sync = F.pad(latent_guide_weights_sync, (0, self.max_steps), value=1.0) #one
self.lgw_sync_inv = F.pad(latent_guide_weights_sync_inv, (0, self.max_steps), value=1.0) #one
self.lgw_drift_x = F.pad(latent_guide_weights_drift_x, (0, self.max_steps), value=0.0)
self.lgw_drift_x_inv = F.pad(latent_guide_weights_drift_x_inv, (0, self.max_steps), value=0.0)
self.lgw_drift_y = F.pad(latent_guide_weights_drift_y, (0, self.max_steps), value=0.0)
self.lgw_drift_y_inv = F.pad(latent_guide_weights_drift_y_inv, (0, self.max_steps), value=0.0)
self.lgw_lure_x = F.pad(latent_guide_weights_lure_x, (0, self.max_steps), value=0.0)
self.lgw_lure_x_inv = F.pad(latent_guide_weights_lure_x_inv, (0, self.max_steps), value=0.0)
self.lgw_lure_y = F.pad(latent_guide_weights_lure_y, (0, self.max_steps), value=0.0)
self.lgw_lure_y_inv = F.pad(latent_guide_weights_lure_y_inv, (0, self.max_steps), value=0.0)
self.lgw_mean = F.pad(latent_guide_weights_mean, (0, self.max_steps), value=0.0)
self.lgw_adain = F.pad(latent_guide_weights_adain, (0, self.max_steps), value=0.0)
self.lgw_attninj = F.pad(latent_guide_weights_attninj, (0, self.max_steps), value=0.0)
self.lgw_style_pos = F.pad(latent_guide_weights_style_pos, (0, self.max_steps), value=0.0)
self.lgw_style_neg = F.pad(latent_guide_weights_style_neg, (0, self.max_steps), value=0.0)
mask, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask, self.LGW_MASK_RESCALE_MIN)
self.mask = mask.to(dtype=self.dtype, device=self.device)
if self.mask_inv is not None:
mask_inv, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_inv, self.LGW_MASK_RESCALE_MIN)
self.mask_inv = mask_inv.to(dtype=self.dtype, device=self.device)
else:
self.mask_inv = (1-self.mask)
if self.mask_sync is not None:
mask_sync, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_sync, self.LGW_MASK_RESCALE_MIN)
self.mask_sync = mask_sync.to(dtype=self.dtype, device=self.device)
else:
self.mask_sync = self.mask
if self.mask_drift_x is not None:
mask_drift_x, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_drift_x, self.LGW_MASK_RESCALE_MIN)
self.mask_drift_x = mask_drift_x.to(dtype=self.dtype, device=self.device)
else:
self.mask_drift_x = self.mask
if self.mask_drift_y is not None:
mask_drift_y, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_drift_y, self.LGW_MASK_RESCALE_MIN)
self.mask_drift_y = mask_drift_y.to(dtype=self.dtype, device=self.device)
else:
self.mask_drift_y = self.mask
if self.mask_lure_x is not None:
mask_lure_x, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_lure_x, self.LGW_MASK_RESCALE_MIN)
self.mask_lure_x = mask_lure_x.to(dtype=self.dtype, device=self.device)
else:
self.mask_lure_x = self.mask
if self.mask_lure_y is not None:
mask_lure_y, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_lure_y, self.LGW_MASK_RESCALE_MIN)
self.mask_lure_y = mask_lure_y.to(dtype=self.dtype, device=self.device)
else:
self.mask_lure_y = self.mask
mask_style_pos, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_style_pos, self.LGW_MASK_RESCALE_MIN)
self.mask_style_pos = mask_style_pos.to(dtype=self.dtype, device=self.device)
mask_style_neg, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_style_neg, self.LGW_MASK_RESCALE_MIN)
self.mask_style_neg = mask_style_neg.to(dtype=self.dtype, device=self.device)
if latent_guide is not None:
self.HAS_LATENT_GUIDE = True
if type(latent_guide) is dict:
if latent_guide ['samples'].shape[0] > 1:
latent_guide['samples'] = latent_guide ['samples'][batch_num].unsqueeze(0)
latent_guide_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide['samples']).clone().to(dtype=self.dtype, device=self.device)
elif type(latent_guide) is torch.Tensor:
latent_guide_samples = latent_guide.to(dtype=self.dtype, device=self.device)
else:
raise ValueError(f"Invalid latent type: {type(latent_guide)}")
if self.VIDEO and latent_guide_samples.shape[2] == 1:
latent_guide_samples = latent_guide_samples.repeat(1, 1, x.shape[2], 1, 1)
if self.SAMPLE:
self.y0 = latent_guide_samples
elif sigma_init != 0.0:
pass
elif self.UNSAMPLE: # and self.mask is not None:
mask = self.mask.to(x.device)
x = (1-mask) * x + mask * latent_guide_samples.to(x.device)
else:
x = latent_guide_samples.to(x.device)
else:
self.y0 = torch.zeros_like(x, dtype=self.dtype, device=self.device)
if latent_guide_inv is not None:
self.HAS_LATENT_GUIDE_INV = True
if type(latent_guide_inv) is dict:
if latent_guide_inv['samples'].shape[0] > 1:
latent_guide_inv['samples'] = latent_guide_inv['samples'][batch_num].unsqueeze(0)
latent_guide_inv_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_inv['samples']).clone().to(dtype=self.dtype, device=self.device)
elif type(latent_guide_inv) is torch.Tensor:
latent_guide_inv_samples = latent_guide_inv.to(dtype=self.dtype, device=self.device)
else:
raise ValueError(f"Invalid latent type: {type(latent_guide_inv)}")
if self.VIDEO and latent_guide_inv_samples.shape[2] == 1:
latent_guide_inv_samples = latent_guide_inv_samples.repeat(1, 1, x.shape[2], 1, 1)
if self.SAMPLE:
self.y0_inv = latent_guide_inv_samples
elif sigma_init != 0.0:
pass
elif self.UNSAMPLE: # and self.mask is not None:
mask_inv = self.mask_inv.to(x.device)
x = (1-mask_inv) * x + mask_inv * latent_guide_inv_samples.to(x.device) #fixed old approach, which was mask, (1-mask)
else:
x = latent_guide_inv_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_INV AFTER SETTING TO LG above!
else:
self.y0_inv = torch.zeros_like(x, dtype=self.dtype, device=self.device)
if latent_guide_mean is not None:
self.HAS_LATENT_GUIDE_MEAN = True
if type(latent_guide_mean) is dict:
if latent_guide_mean['samples'].shape[0] > 1:
latent_guide_mean['samples'] = latent_guide_mean['samples'][batch_num].unsqueeze(0)
latent_guide_mean_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_mean['samples']).clone().to(dtype=self.dtype, device=self.device)
elif type(latent_guide_mean) is torch.Tensor:
latent_guide_mean_samples = latent_guide_mean.to(dtype=self.dtype, device=self.device)
else:
raise ValueError(f"Invalid latent type: {type(latent_guide_mean)}")
if self.VIDEO and latent_guide_mean_samples.shape[2] == 1:
latent_guide_mean_samples = latent_guide_mean_samples.repeat(1, 1, x.shape[2], 1, 1)
self.y0_mean = latent_guide_mean_samples
"""if self.SAMPLE:
self.y0_mean = latent_guide_mean_samples
elif self.UNSAMPLE: # and self.mask is not None:
mask_mean = self.mask_mean.to(x.device)
x = (1-mask_mean) * x + mask_mean * latent_guide_mean_samples.to(x.device) #fixed old approach, which was mask, (1-mask) # NECESSARY?
else:
x = latent_guide_mean_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_MEAN AFTER SETTING TO LG above!"""
else:
self.y0_mean = torch.zeros_like(x, dtype=self.dtype, device=self.device)
if latent_guide_adain is not None:
self.HAS_LATENT_GUIDE_ADAIN = True
if type(latent_guide_adain) is dict:
if latent_guide_adain['samples'].shape[0] > 1:
latent_guide_adain['samples'] = latent_guide_adain['samples'][batch_num].unsqueeze(0)
latent_guide_adain_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_adain['samples']).clone().to(dtype=self.dtype, device=self.device)
elif type(latent_guide_adain) is torch.Tensor:
latent_guide_adain_samples = latent_guide_adain.to(dtype=self.dtype, device=self.device)
else:
raise ValueError(f"Invalid latent type: {type(latent_guide_adain)}")
if self.VIDEO and latent_guide_adain_samples.shape[2] == 1:
latent_guide_adain_samples = latent_guide_adain_samples.repeat(1, 1, x.shape[2], 1, 1)
self.y0_adain = latent_guide_adain_samples
"""if self.SAMPLE:
self.y0_adain = latent_guide_adain_samples
elif self.UNSAMPLE: # and self.mask is not None:
if self.mask_adain is not None:
mask_adain = self.mask_adain.to(x.device)
x = (1-mask_adain) * x + mask_adain * latent_guide_adain_samples.to(x.device) #fixed old approach, which was mask, (1-mask) # NECESSARY?
else:
x = latent_guide_adain_samples.to(x.device)
else:
x = latent_guide_adain_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_ADAIN AFTER SETTING TO LG above!"""
else:
self.y0_adain = torch.zeros_like(x, dtype=self.dtype, device=self.device)
if latent_guide_attninj is not None:
self.HAS_LATENT_GUIDE_ATTNINJ = True
if type(latent_guide_attninj) is dict:
if latent_guide_attninj['samples'].shape[0] > 1:
latent_guide_attninj['samples'] = latent_guide_attninj['samples'][batch_num].unsqueeze(0)
latent_guide_attninj_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_attninj['samples']).clone().to(dtype=self.dtype, device=self.device)
elif type(latent_guide_attninj) is torch.Tensor:
latent_guide_attninj_samples = latent_guide_attninj.to(dtype=self.dtype, device=self.device)
else:
raise ValueError(f"Invalid latent type: {type(latent_guide_attninj)}")
if self.VIDEO and latent_guide_attninj_samples.shape[2] == 1:
latent_guide_attninj_samples = latent_guide_attninj_samples.repeat(1, 1, x.shape[2], 1, 1)
self.y0_attninj = latent_guide_attninj_samples
"""if self.SAMPLE:
self.y0_attninj = latent_guide_attninj_samples
elif self.UNSAMPLE: # and self.mask is not None:
if self.mask_attninj is not None:
mask_attninj = self.mask_attninj.to(x.device)
x = (1-mask_attninj) * x + mask_attninj * latent_guide_attninj_samples.to(x.device) #fixed old approach, which was mask, (1-mask) # NECESSARY?
else:
x = latent_guide_attninj_samples.to(x.device)
else:
x = latent_guide_attninj_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_ADAIN AFTER SETTING TO LG above!"""
else:
self.y0_attninj = torch.zeros_like(x, dtype=self.dtype, device=self.device)
if latent_guide_style_pos is not None:
self.HAS_LATENT_GUIDE_STYLE_POS = True
if type(latent_guide_style_pos) is dict:
if latent_guide_style_pos['samples'].shape[0] > 1:
latent_guide_style_pos['samples'] = latent_guide_style_pos['samples'][batch_num].unsqueeze(0)
latent_guide_style_pos_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_style_pos['samples']).clone().to(dtype=self.dtype, device=self.device)
elif type(latent_guide_style_pos) is torch.Tensor:
latent_guide_style_pos_samples = latent_guide_style_pos.to(dtype=self.dtype, device=self.device)
else:
raise ValueError(f"Invalid latent type: {type(latent_guide_style_pos)}")
if self.VIDEO and latent_guide_style_pos_samples.shape[2] == 1:
latent_guide_style_pos_samples = latent_guide_style_pos_samples.repeat(1, 1, x.shape[2], 1, 1)
self.y0_style_pos = latent_guide_style_pos_samples
"""if self.SAMPLE:
self.y0_style_pos = latent_guide_style_pos_samples
elif self.UNSAMPLE: # and self.mask is not None:
if self.mask_style_pos is not None:
mask_style_pos = self.mask_style_pos.to(x.device)
x = (1-mask_style_pos) * x + mask_style_pos * latent_guide_style_pos_samples.to(x.device) #fixed old approach, which was mask, (1-mask) # NECESSARY?
else:
x = latent_guide_style_pos_samples.to(x.device)
else:
x = latent_guide_style_pos_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_ADAIN AFTER SETTING TO LG above!"""
else:
self.y0_style_pos = torch.zeros_like(x, dtype=self.dtype, device=self.device)
if latent_guide_style_neg is not None:
self.HAS_LATENT_GUIDE_STYLE_NEG = True
if type(latent_guide_style_neg) is dict:
if latent_guide_style_neg['samples'].shape[0] > 1:
latent_guide_style_neg['samples'] = latent_guide_style_neg['samples'][batch_num].unsqueeze(0)
latent_guide_style_neg_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_style_neg['samples']).clone().to(dtype=self.dtype, device=self.device)
elif type(latent_guide_style_neg) is torch.Tensor:
latent_guide_style_neg_samples = latent_guide_style_neg.to(dtype=self.dtype, device=self.device)
else:
raise ValueError(f"Invalid latent type: {type(latent_guide_style_neg)}")
if self.VIDEO and latent_guide_style_neg_samples.shape[2] == 1:
latent_guide_style_neg_samples = latent_guide_style_neg_samples.repeat(1, 1, x.shape[2], 1, 1)
self.y0_style_neg = latent_guide_style_neg_samples
"""if self.SAMPLE:
self.y0_style_neg = latent_guide_style_neg_samples
elif self.UNSAMPLE: # and self.mask is not None:
if self.mask_style_neg is not None:
mask_style_neg = self.mask_style_neg.to(x.device)
x = (1-mask_style_neg) * x + mask_style_neg * latent_guide_style_neg_samples.to(x.device) #fixed old approach, which was mask, (1-mask) # NECESSARY?
else:
x = latent_guide_style_neg_samples.to(x.device)
else:
x = latent_guide_style_neg_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_ADAIN AFTER SETTING TO LG above!"""
else:
self.y0_style_neg = torch.zeros_like(x, dtype=self.dtype, device=self.device)
if self.UNSAMPLE and not self.SAMPLE: #sigma_next > sigma: # TODO: VERIFY APPROACH FOR INVERSION
if guide_inversion_y0 is not None:
self.y0 = guide_inversion_y0
else:
self.y0 = noise_sampler(sigma=self.sigma_max, sigma_next=self.sigma_min).to(dtype=self.dtype, device=self.device)
self.y0 = normalize_zscore(self.y0, channelwise=True, inplace=True)
self.y0 *= self.sigma_max
if guide_inversion_y0_inv is not None:
self.y0_inv = guide_inversion_y0_inv
else:
self.y0_inv = noise_sampler(sigma=self.sigma_max, sigma_next=self.sigma_min).to(dtype=self.dtype, device=self.device)
self.y0_inv = normalize_zscore(self.y0_inv, channelwise=True, inplace=True)
self.y0_inv*= self.sigma_max
if self.VIDEO and self.frame_weights_mgr is not None:
num_frames = x.shape[2]
self.frame_weights = self.frame_weights_mgr.get_frame_weights_by_name('frame_weights', num_frames)
self.frame_weights_inv = self.frame_weights_mgr.get_frame_weights_by_name('frame_weights_inv', num_frames)
x, self.y0, self.y0_inv = self.normalize_inputs(x, self.y0, self.y0_inv) # ???
return x
def prepare_weighted_masks(self, step:int, lgw_type="default") -> Tuple[Tensor, Tensor]:
if lgw_type == "sync":
lgw_ = self.lgw_sync [step]
lgw_inv_ = self.lgw_sync_inv[step]
mask = torch.ones_like (self.y0) if self.mask_sync is None else self.mask_sync
mask_inv = torch.zeros_like(self.y0) if self.mask_sync is None else 1-self.mask_sync
elif lgw_type == "drift_x":
lgw_ = self.lgw_drift_x [step]
lgw_inv_ = self.lgw_drift_x_inv[step]
mask = torch.ones_like (self.y0) if self.mask_drift_x is None else self.mask_drift_x
mask_inv = torch.zeros_like(self.y0) if self.mask_drift_x is None else 1-self.mask_drift_x
elif lgw_type == "drift_y":
lgw_ = self.lgw_drift_y [step]
lgw_inv_ = self.lgw_drift_y_inv[step]
mask = torch.ones_like (self.y0) if self.mask_drift_y is None else self.mask_drift_y
mask_inv = torch.zeros_like(self.y0) if self.mask_drift_y is None else 1-self.mask_drift_y
elif lgw_type == "lure_x":
lgw_ = self.lgw_lure_x [step]
lgw_inv_ = self.lgw_lure_x_inv[step]
mask = torch.ones_like (self.y0) if self.mask_lure_x is None else self.mask_lure_x
mask_inv = torch.zeros_like(self.y0) if self.mask_lure_x is None else 1-self.mask_lure_x
elif lgw_type == "lure_y":
lgw_ = self.lgw_lure_y [step]
lgw_inv_ = self.lgw_lure_y_inv[step]
mask = torch.ones_like (self.y0) if self.mask_lure_y is None else self.mask_lure_y
mask_inv = torch.zeros_like(self.y0) if self.mask_lure_y is None else 1-self.mask_lure_y
else:
lgw_ = self.lgw [step]
lgw_inv_ = self.lgw_inv[step]
mask = torch.ones_like (self.y0) if self.mask is None else self.mask
mask_inv = torch.zeros_like(self.y0) if self.mask_inv is None else self.mask_inv
if self.LGW_MASK_RESCALE_MIN:
lgw_mask = mask * (1-lgw_) + lgw_
lgw_mask_inv = (1-mask) * (1-lgw_inv_) + lgw_inv_
else:
if self.HAS_LATENT_GUIDE:
lgw_mask = mask * lgw_
else:
lgw_mask = torch.zeros_like(mask)
if self.HAS_LATENT_GUIDE_INV:
if mask_inv is not None:
lgw_mask_inv = torch.minimum(mask_inv, (1-mask) * lgw_inv_)
#lgw_mask_inv = torch.minimum(1-mask_inv, (1-mask) * lgw_inv_)
else:
lgw_mask_inv = (1-mask) * lgw_inv_
else:
lgw_mask_inv = torch.zeros_like(mask)
return lgw_mask, lgw_mask_inv
def get_masks_for_step(self, step:int, lgw_type="default") -> Tuple[Tensor, Tensor]:
lgw_mask, lgw_mask_inv = self.prepare_weighted_masks(step, lgw_type=lgw_type)
normalize_frame_weights_per_step = self.EO("normalize_frame_weights_per_step")
normalize_frame_weights_per_step_inv = self.EO("normalize_frame_weights_per_step_inv")
if self.VIDEO and self.frame_weights_mgr:
num_frames = lgw_mask.shape[2]
if self.HAS_LATENT_GUIDE:
frame_weights = self.frame_weights_mgr.get_frame_weights_by_name('frame_weights', num_frames, step)
apply_frame_weights(lgw_mask, frame_weights, normalize_frame_weights_per_step)
if self.HAS_LATENT_GUIDE_INV:
frame_weights_inv = self.frame_weights_mgr.get_frame_weights_by_name('frame_weights_inv', num_frames, step)
apply_frame_weights(lgw_mask_inv, frame_weights_inv, normalize_frame_weights_per_step_inv)
return lgw_mask.to(self.device), lgw_mask_inv.to(self.device)
def get_cossim_adjusted_lgw_masks(self, data:Tensor, step:int) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
if self.HAS_LATENT_GUIDE:
y0 = self.y0.clone()
else:
y0 = torch.zeros_like(data)
if self.HAS_LATENT_GUIDE_INV:
y0_inv = self.y0_inv.clone()
else:
y0_inv = torch.zeros_like(data)
if y0.shape[0] > 1: # this is for changing the guide on a per-step basis
y0 = y0[min(step, y0.shape[0]-1)].unsqueeze(0)
lgw_mask, lgw_mask_inv = self.get_masks_for_step(step)
y0_cossim, y0_cossim_inv = 1.0, 1.0
if self.HAS_LATENT_GUIDE:
y0_cossim = get_pearson_similarity(data, y0, mask=lgw_mask)
if self.HAS_LATENT_GUIDE_INV:
y0_cossim_inv = get_pearson_similarity(data, y0_inv, mask=lgw_mask_inv)
#if y0_cossim < self.guide_cossim_cutoff_ or y0_cossim_inv < self.guide_bkg_cossim_cutoff_:
if y0_cossim >= self.guide_cossim_cutoff_:
lgw_mask *= 0
if y0_cossim_inv >= self.guide_bkg_cossim_cutoff_:
lgw_mask_inv *= 0
return y0, y0_inv, lgw_mask, lgw_mask_inv
@torch.no_grad
def process_pseudoimplicit_guides_substep(self,
x_0 : Tensor,
x_ : Tensor,
eps_ : Tensor,
eps_prev_ : Tensor,
data_ : Tensor,
denoised_prev : Tensor,
row : int,
step : int,
step_sched : int,
sigmas : Tensor,
NS ,
RK ,
pseudoimplicit_row_weights : Tensor,
pseudoimplicit_step_weights : Tensor,
full_iter : int,
BONGMATH : bool,
):
if "pseudoimplicit" not in self.guide_mode or (self.lgw[step_sched] == 0 and self.lgw_inv[step_sched] == 0):
return x_0, x_, eps_, None, None
sigma = sigmas[step]
if self.s_lying_ is not None:
if row >= len(self.s_lying_):
return x_0, x_, eps_, None, None
if self.guide_mode.startswith("fully_"):
data_cossim_test = denoised_prev
else:
data_cossim_test = data_[row]
y0, y0_inv, lgw_mask, lgw_mask_inv = self.get_cossim_adjusted_lgw_masks(data_cossim_test, step_sched)
if not (lgw_mask.any() != 0 or lgw_mask_inv.any() != 0): # cossim score too similar! deactivate guide for this step
return x_0, x_, eps_, None, None
if "fully_pseudoimplicit" in self.guide_mode:
if self.x_lying_ is None:
return x_0, x_, eps_, None, None
else:
x_row_pseudoimplicit = self.x_lying_[row]
sub_sigma_pseudoimplicit = self.s_lying_[row]
if RK.IMPLICIT:
x_ = RK.update_substep(x_0,
x_,
eps_,
eps_prev_,
row,
RK.row_offset,
NS.h_new,
NS.h_new_orig,
)
x_[row] = NS.rebound_overshoot_substep(x_0, x_[row])
if row > 0:
x_[row] = NS.swap_noise_substep(x_0, x_[row])
if BONGMATH and step < sigmas.shape[0]-1 and not self.EO("disable_pseudoimplicit_bongmath"):
x_0, x_, eps_ = RK.bong_iter(x_0,
x_,
eps_,
eps_prev_,
data_,
sigma,
NS.s_,
row,
RK.row_offset,
NS.h,
step,
step_sched,
)
else:
eps_[row] = RK.get_epsilon(x_0, x_[row], denoised_prev, sigma, NS.s_[row])
if self.EO("pseudoimplicit_denoised_prev"):
eps_[row] = RK.get_epsilon(x_0, x_[row], denoised_prev, sigma, NS.s_[row])
eps_substep_guide = torch.zeros_like(x_0)
eps_substep_guide_inv = torch.zeros_like(x_0)
if self.HAS_LATENT_GUIDE:
eps_substep_guide = RK.get_guide_epsilon(x_0, x_[row], y0, sigma, NS.s_[row], NS.sigma_down, None)
if self.HAS_LATENT_GUIDE_INV:
eps_substep_guide_inv = RK.get_guide_epsilon(x_0, x_[row], y0_inv, sigma, NS.s_[row], NS.sigma_down, None)
if self.guide_mode in {"pseudoimplicit", "pseudoimplicit_cw", "pseudoimplicit_projection", "pseudoimplicit_projection_cw"}:
maxmin_ratio = (NS.sub_sigma - RK.sigma_min) / NS.sub_sigma
if self.EO("guide_pseudoimplicit_power_substep_flip_maxmin_scaling"):
maxmin_ratio *= (RK.rows-row) / RK.rows
elif self.EO("guide_pseudoimplicit_power_substep_maxmin_scaling"):
maxmin_ratio *= row / RK.rows
sub_sigma_2 = NS.sub_sigma - maxmin_ratio * (NS.sub_sigma * pseudoimplicit_row_weights[row] * pseudoimplicit_step_weights[full_iter] * self.lgw[step_sched])
eps_tmp_ = eps_.clone()
eps_ = self.process_channelwise(x_0,
eps_,
data_,
row,
eps_substep_guide,
eps_substep_guide_inv,
y0,
y0_inv,
lgw_mask,
lgw_mask_inv,
use_projection = self.guide_mode in {"pseudoimplicit_projection", "pseudoimplicit_projection_cw"},
channelwise = self.guide_mode in {"pseudoimplicit_cw", "pseudoimplicit_projection_cw"},
)
x_row_tmp = x_[row] + RK.h_fn(sub_sigma_2, NS.sub_sigma) * eps_[row]
eps_ = eps_tmp_
x_row_pseudoimplicit = x_row_tmp
sub_sigma_pseudoimplicit = sub_sigma_2
if RK.IMPLICIT and BONGMATH and step < sigmas.shape[0]-1 and not self.EO("disable_pseudobongmath"):
x_[row] = NS.sigma_from_to(x_0, x_row_pseudoimplicit, sigma, sub_sigma_pseudoimplicit, NS.s_[row])
x_0, x_, eps_ = RK.bong_iter(x_0,
x_,
eps_,
eps_prev_,
data_,
sigma,
NS.s_,
row,
RK.row_offset,
NS.h,
step,
step_sched,
)
return x_0, x_, eps_, x_row_pseudoimplicit, sub_sigma_pseudoimplicit
@torch.no_grad
def prepare_fully_pseudoimplicit_guides_substep(self,
x_0,
x_,
eps_,
eps_prev_,
data_,
denoised_prev,
row,
step,
step_sched,
sigmas,
eta_substep,
overshoot_substep,
s_noise_substep,
NS,
RK,
pseudoimplicit_row_weights,
pseudoimplicit_step_weights,
full_iter,
BONGMATH,
):
if "fully_pseudoimplicit" not in self.guide_mode or (self.lgw[step_sched] == 0 and self.lgw_inv[step_sched] == 0):
return x_0, x_, eps_
sigma = sigmas[step]
y0, y0_inv, lgw_mask, lgw_mask_inv = self.get_cossim_adjusted_lgw_masks(denoised_prev, step_sched)
if not (lgw_mask.any() != 0 or lgw_mask_inv.any() != 0): # cossim score too similar! deactivate guide for this step
return x_0, x_, eps_
# PREPARE FULLY PSEUDOIMPLICIT GUIDES
if self.guide_mode in {"fully_pseudoimplicit", "fully_pseudoimplicit_cw", "fully_pseudoimplicit_projection", "fully_pseudoimplicit_projection_cw"} and (self.lgw[step_sched] > 0 or self.lgw_inv[step_sched] > 0):
x_lying_ = x_.clone()
eps_lying_ = eps_.clone()
s_lying_ = []
for r in range(RK.rows):
NS.set_sde_substep(r, RK.multistep_stages, eta_substep, overshoot_substep, s_noise_substep)
maxmin_ratio = (NS.sub_sigma - RK.sigma_min) / NS.sub_sigma
fully_sub_sigma_2 = NS.sub_sigma - maxmin_ratio * (NS.sub_sigma * pseudoimplicit_row_weights[r] * pseudoimplicit_step_weights[full_iter] * self.lgw[step_sched])
s_lying_.append(fully_sub_sigma_2)
if RK.IMPLICIT:
x_ = RK.update_substep(x_0,
x_,
eps_,
eps_prev_,
r,
RK.row_offset,
NS.h_new,
NS.h_new_orig,
)
x_[r] = NS.rebound_overshoot_substep(x_0, x_[r])
if r > 0:
x_[r] = NS.swap_noise_substep(x_0, x_[r])
if BONGMATH and step < sigmas.shape[0]-1 and not self.EO("disable_fully_pseudoimplicit_bongmath"):
x_0, x_, eps_ = RK.bong_iter(x_0,
x_,
eps_,
eps_prev_,
data_,
sigma,
NS.s_,
r,
RK.row_offset,
NS.h,
step,
step_sched,
)
if self.EO("fully_pseudoimplicit_denoised_prev"):
eps_[r] = RK.get_epsilon(x_0, x_[r], denoised_prev, sigma, NS.s_[r])
eps_substep_guide = torch.zeros_like(x_0)
eps_substep_guide_inv = torch.zeros_like(x_0)
if self.HAS_LATENT_GUIDE:
eps_substep_guide = RK.get_guide_epsilon(x_0, x_[r], y0, sigma, NS.s_[r], NS.sigma_down, None)
if self.HAS_LATENT_GUIDE_INV:
eps_substep_guide_inv = RK.get_guide_epsilon(x_0, x_[r], y0_inv, sigma, NS.s_[r], NS.sigma_down, None)
eps_ = self.process_channelwise(x_0,
eps_,
data_,
row,
eps_substep_guide,
eps_substep_guide_inv,
y0,
y0_inv,
lgw_mask,
lgw_mask_inv,
use_projection = self.guide_mode in {"fully_pseudoimplicit_projection", "fully_pseudoimplicit_projection_cw"},
channelwise = self.guide_mode in {"fully_pseudoimplicit_cw", "fully_pseudoimplicit_projection_cw"},
)
x_lying_[r] = x_[r] + RK.h_fn(fully_sub_sigma_2, NS.sub_sigma) * eps_[r]
data_lying = x_[r] + RK.h_fn(0, NS.s_[r]) * eps_[r]
eps_lying_[r] = RK.get_epsilon(x_0, x_[r], data_lying, sigma, NS.s_[r])
if not self.EO("pseudoimplicit_disable_eps_lying"):
eps_ = eps_lying_
if not self.EO("pseudoimplicit_disable_newton_iter"):
x_, eps_ = RK.newton_iter(x_0,
x_,
eps_,
eps_prev_,
data_,
NS.s_,
0,
NS.h,
sigmas,
step,
"lying",
)
self.x_lying_ = x_lying_
self.s_lying_ = s_lying_
return x_0, x_, eps_
@torch.no_grad
def process_guides_data_substep(self,
x_row : Tensor,
data_row : Tensor,
step : int,
sigma_row : Tensor,
frame_targets : Optional[Tensor] = None,
):
if not self.HAS_LATENT_GUIDE and not self.HAS_LATENT_GUIDE_INV:
return x_row
y0, y0_inv, lgw_mask, lgw_mask_inv = self.get_cossim_adjusted_lgw_masks(data_row, step)
if not (lgw_mask.any() != 0 or lgw_mask_inv.any() != 0): # cossim score too similar! deactivate guide for this step
return x_row
if self.VIDEO and self.frame_weights_mgr is not None and frame_targets is None:
num_frames = data_row.shape[2]
frame_targets = self.frame_weights_mgr.get_frame_weights_by_name('frame_targets', num_frames, step)
if frame_targets is None:
frame_targets = torch.tensor(self.EO("frame_targets", [1.0]))
frame_targets = torch.clamp(frame_targets, 0.0, 1.0).to(self.device)
if self.guide_mode in {"data", "data_projection", "lure", "lure_projection"}:
if frame_targets is None:
x_row = self.get_data_substep(x_row, data_row, y0, y0_inv, lgw_mask, lgw_mask_inv, step, sigma_row)
else:
t_dim = x_row.shape[-3]
for t in range(t_dim): #temporal dimension
frame_target = float(frame_targets[t] if len(frame_targets) > t else frame_targets[-1])
x_row[...,t:t+1,:,:] = self.get_data_substep(
x_row [...,t:t+1,:,:],
data_row [...,t:t+1,:,:],
y0 [...,t:t+1,:,:],
y0_inv [...,t:t+1,:,:],
lgw_mask [...,t:t+1,:,:],
lgw_mask_inv[...,t:t+1,:,:],
step,
sigma_row,
frame_target)
return x_row
@torch.no_grad
def get_data_substep(self,
x_row : Tensor,
data_row : Tensor,
y0 : Tensor,
y0_inv : Tensor,
lgw_mask : Tensor,
lgw_mask_inv : Tensor,
step : int,
sigma_row : Tensor,
frame_target : float = 1.0,
):
if not self.HAS_LATENT_GUIDE and not self.HAS_LATENT_GUIDE_INV:
return x_row
if self.guide_mode in {"data", "data_projection", "lure", "lure_projection"}:
data_targets = self.EO("data_targets", [1.0])
step_target = step if len(data_targets) > step else len(data_targets)-1
cossim_target = frame_target * data_targets[step_target]
if self.HAS_LATENT_GUIDE:
if self.guide_mode.endswith("projection"):
d_collinear_d_lerp = get_collinear(data_row, y0)
d_lerp_ortho_d = get_orthogonal(y0, data_row)
y0 = d_collinear_d_lerp + d_lerp_ortho_d
if cossim_target == 1.0:
d_slerped = y0
elif cossim_target == 0.0:
d_slerped = data_row
else:
y0_pearsim = get_pearson_similarity(data_row, y0, mask=self.mask)
slerp_weight = get_slerp_weight_for_cossim(y0_pearsim.item(), cossim_target)
d_slerped = slerp_tensor(slerp_weight, data_row, y0) # lgw_mask * slerp_weight same as using mask below
"""if self.guide_mode == "data_projection":
d_collinear_d_lerp = get_collinear(data_row, d_slerped)
d_lerp_ortho_d = get_orthogonal(d_slerped, data_row)
d_slerped = d_collinear_d_lerp + d_lerp_ortho_d"""
if self.VE_MODEL:
x_row = x_row + lgw_mask * (d_slerped - data_row)
else:
x_row = x_row + lgw_mask * (self.sigma_max - sigma_row) * (d_slerped - data_row)
if self.HAS_LATENT_GUIDE_INV:
if self.guide_mode.endswith("projection"):
d_collinear_d_lerp = get_collinear(data_row, y0_inv)
d_lerp_ortho_d = get_orthogonal(y0_inv, data_row)
y0_inv = d_collinear_d_lerp + d_lerp_ortho_d
if cossim_target == 1.0:
d_slerped_inv = y0_inv
elif cossim_target == 0.0:
d_slerped_inv = data_row
else:
y0_pearsim = get_pearson_similarity(data_row, y0_inv, mask=self.mask_inv)
slerp_weight = get_slerp_weight_for_cossim(y0_pearsim.item(), cossim_target)
d_slerped_inv = slerp_tensor(slerp_weight, data_row, y0_inv)
"""if self.guide_mode == "data_projection":
d_collinear_d_lerp = get_collinear(data_row, d_slerped_inv)
d_lerp_ortho_d = get_orthogonal(d_slerped_inv, data_row)
d_slerped_inv = d_collinear_d_lerp + d_lerp_ortho_d"""
if self.VE_MODEL:
x_row = x_row + lgw_mask_inv * (d_slerped_inv - data_row)
else:
x_row = x_row + lgw_mask_inv * (self.sigma_max - sigma_row) * (d_slerped_inv - data_row)
return x_row
@torch.no_grad
def swap_data(self,
x : Tensor,
data : Tensor,
y : Tensor,
sigma : Tensor,
mask : Optional[Tensor] = None,
):
mask = 1.0 if mask is None else mask
if self.VE_MODEL:
return x + mask * (y - data)
else:
return x + mask * (self.sigma_max - sigma) * (y - data)
@torch.no_grad
def process_guides_eps_substep(self,
x_0 : Tensor,
x_row : Tensor,
data_row : Tensor,
eps_row : Tensor,
step : int,
sigma : Tensor,
sigma_down : Tensor,
sigma_row : Tensor,
frame_targets : Optional[Tensor] = None,
RK=None,
):
if not self.HAS_LATENT_GUIDE and not self.HAS_LATENT_GUIDE_INV:
return eps_row
y0, y0_inv, lgw_mask, lgw_mask_inv = self.get_cossim_adjusted_lgw_masks(data_row, step)
if not (lgw_mask.any() != 0 or lgw_mask_inv.any() != 0): # cossim score too similar! deactivate guide for this step
return eps_row
if self.VIDEO and data_row.ndim == 5 and frame_targets is None:
num_frames = data_row.shape[2]
frame_targets = self.frame_weights_mgr.get_frame_weights_by_name('frame_targets', num_frames, step)
if frame_targets is None:
frame_targets = self.EO("frame_targets", [1.0])
frame_targets = torch.clamp(frame_targets, 0.0, 1.0)
eps_y0 = torch.zeros_like(x_0)
eps_y0_inv = torch.zeros_like(x_0)
if self.HAS_LATENT_GUIDE:
eps_y0 = RK.get_guide_epsilon(x_0, x_row, y0, sigma, sigma_row, sigma_down, None)
if self.HAS_LATENT_GUIDE_INV:
eps_y0_inv = RK.get_guide_epsilon(x_0, x_row, y0_inv, sigma, sigma_row, sigma_down, None)
if self.guide_mode in {"epsilon", "epsilon_projection"}:
if frame_targets is None:
eps_row = self.get_eps_substep(eps_row, eps_y0, eps_y0_inv, lgw_mask, lgw_mask_inv, step, sigma_row)
else:
t_dim = x_row.shape[-3]
for t in range(t_dim): #temporal dimension
frame_target = float(frame_targets[t] if len(frame_targets) > t else frame_targets[-1])
eps_row[...,t:t+1,:,:] = self.get_eps_substep(
eps_row [...,t:t+1,:,:],
eps_y0 [...,t:t+1,:,:],
eps_y0_inv [...,t:t+1,:,:],
lgw_mask [...,t:t+1,:,:],
lgw_mask_inv[...,t:t+1,:,:],
step,
sigma_row,
frame_target)
return eps_row
@torch.no_grad
def get_eps_substep(self,
eps_row : Tensor,
eps_y0 : Tensor,
eps_y0_inv : Tensor,
lgw_mask : Tensor,
lgw_mask_inv : Tensor,
step : int,
sigma_row : Tensor,
frame_target : float = 1.0,
):
if not self.HAS_LATENT_GUIDE and not self.HAS_LATENT_GUIDE_INV:
return eps_row
if self.guide_mode in {"epsilon", "epsilon_projection"}:
eps_targets = self.EO("eps_targets", [1.0])
step_target = step if len(eps_targets) > step else len(eps_targets)-1
cossim_target = frame_target * eps_targets[step_target]
if self.HAS_LATENT_GUIDE:
if self.guide_mode == "epsilon_projection":
d_collinear_d_lerp = get_collinear(eps_row, eps_y0)
d_lerp_ortho_d = get_orthogonal(eps_y0, eps_row)
eps_y0 = d_collinear_d_lerp + d_lerp_ortho_d
if cossim_target == 1.0:
d_slerped = eps_y0
elif cossim_target == 0.0:
d_slerped = eps_row
else:
y0_pearsim = get_pearson_similarity(eps_row, eps_y0, mask=self.mask)
slerp_weight = get_slerp_weight_for_cossim(y0_pearsim.item(), cossim_target)
d_slerped = slerp_tensor(slerp_weight, eps_row, eps_y0) # lgw_mask * slerp_weight same as using mask below
"""if self.guide_mode == "data_projection":
d_collinear_d_lerp = get_collinear(data_row, d_slerped)
d_lerp_ortho_d = get_orthogonal(d_slerped, data_row)
d_slerped = d_collinear_d_lerp + d_lerp_ortho_d"""
eps_row = eps_row + lgw_mask * (d_slerped - eps_row)
if self.HAS_LATENT_GUIDE_INV:
if self.guide_mode == "epsilon_projection":
d_collinear_d_lerp = get_collinear(eps_row, eps_y0_inv)
d_lerp_ortho_d = get_orthogonal(eps_y0_inv, eps_row)
eps_y0_inv = d_collinear_d_lerp + d_lerp_ortho_d
if cossim_target == 1.0:
d_slerped_inv = eps_y0_inv
elif cossim_target == 0.0:
d_slerped_inv = eps_row
else:
y0_pearsim = get_pearson_similarity(eps_row, eps_y0_inv, mask=self.mask_inv)
slerp_weight = get_slerp_weight_for_cossim(y0_pearsim.item(), cossim_target)
d_slerped_inv = slerp_tensor(slerp_weight, eps_row, eps_y0_inv)
"""if self.guide_mode == "data_projection":
d_collinear_d_lerp = get_collinear(data_row, d_slerped_inv)
d_lerp_ortho_d = get_orthogonal(d_slerped_inv, data_row)
d_slerped_inv = d_collinear_d_lerp + d_lerp_ortho_d"""
eps_row = eps_row + lgw_mask_inv * (d_slerped_inv - eps_row)
return eps_row
@torch.no_grad
def process_guides_substep(self,
x_0 : Tensor,
x_ : Tensor,
eps_ : Tensor,
data_ : Tensor,
row : int,
step_sched : int,
sigma : Tensor,
sigma_next : Tensor,
sigma_down : Tensor,
s_ : Tensor,
epsilon_scale : float,
RK,
):
if not self.HAS_LATENT_GUIDE and not self.HAS_LATENT_GUIDE_INV:
return eps_, x_
y0, y0_inv, lgw_mask, lgw_mask_inv = self.get_cossim_adjusted_lgw_masks(data_[row], step_sched)
if not (lgw_mask.any() != 0 or lgw_mask_inv.any() != 0): # cossim score too similar! deactivate guide for this step
return eps_, x_
if self.EO(["substep_eps_ch_mean_std", "substep_eps_ch_mean", "substep_eps_ch_std", "substep_eps_mean_std", "substep_eps_mean", "substep_eps_std"]):
eps_orig = eps_.clone()
if self.EO("dynamic_guides_mean_std"):
y_shift, y_inv_shift = normalize_latent([y0, y0_inv], [data_, data_])
y0 = y_shift
if self.EO("dynamic_guides_inv"):
y0_inv = y_inv_shift
if self.EO("dynamic_guides_mean"):
y_shift, y_inv_shift = normalize_latent([y0, y0_inv], [data_, data_], std=False)
y0 = y_shift
if self.EO("dynamic_guides_inv"):
y0_inv = y_inv_shift
if "data_old" == self.guide_mode:
y0_tmp = y0.clone()
if self.HAS_LATENT_GUIDE:
y0_tmp = (1-lgw_mask) * data_[row] + lgw_mask * y0
y0_tmp = (1-lgw_mask_inv) * y0_tmp + lgw_mask_inv * y0_inv
x_[row+1] = y0_tmp + eps_[row]
if self.guide_mode == "data_old_projection":
d_lerp = data_[row] + lgw_mask * (y0-data_[row]) + lgw_mask_inv * (y0_inv-data_[row])
d_collinear_d_lerp = get_collinear(data_[row], d_lerp)
d_lerp_ortho_d = get_orthogonal(d_lerp, data_[row])
data_[row] = d_collinear_d_lerp + d_lerp_ortho_d
x_[row+1] = data_[row] + eps_[row] * sigma
#elif (self.UNSAMPLE or self.guide_mode in {"epsilon", "epsilon_cw", "epsilon_projection", "epsilon_projection_cw"}) and (self.lgw[step] > 0 or self.lgw_inv[step] > 0):
elif self.guide_mode in {"epsilon", "epsilon_cw", "epsilon_projection", "epsilon_projection_cw"} and (self.lgw[step_sched] > 0 or self.lgw_inv[step_sched] > 0):
if sigma_down < sigma or s_[row] < RK.sigma_max:
eps_substep_guide = torch.zeros_like(x_0)
eps_substep_guide_inv = torch.zeros_like(x_0)
if self.HAS_LATENT_GUIDE:
eps_substep_guide = RK.get_guide_epsilon(x_0, x_[row], y0, sigma, s_[row], sigma_down, epsilon_scale)
if self.HAS_LATENT_GUIDE_INV:
eps_substep_guide_inv = RK.get_guide_epsilon(x_0, x_[row], y0_inv, sigma, s_[row], sigma_down, epsilon_scale)
tol_value = self.EO("tol", -1.0)
if tol_value >= 0:
for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])):
current_diff = torch.norm(data_[row][b][c] - y0 [b][c])
current_diff_inv = torch.norm(data_[row][b][c] - y0_inv[b][c])
lgw_scaled = torch.nan_to_num(1-(tol_value/current_diff), 0)
lgw_scaled_inv = torch.nan_to_num(1-(tol_value/current_diff_inv), 0)
lgw_tmp = min(self.lgw[step_sched] , lgw_scaled)
lgw_tmp_inv = min(self.lgw_inv[step_sched], lgw_scaled_inv)
lgw_mask_clamp = torch.clamp(lgw_mask, max=lgw_tmp)
lgw_mask_clamp_inv = torch.clamp(lgw_mask_inv, max=lgw_tmp_inv)
eps_[row][b][c] = eps_[row][b][c] + lgw_mask_clamp[b][0] * (eps_substep_guide[b][c] - eps_[row][b][c]) + lgw_mask_clamp_inv[b][0] * (eps_substep_guide_inv[b][c] - eps_[row][b][c])
elif self.guide_mode in {"epsilon"}:
#eps_[row] = slerp(lgw_mask.mean().item(), eps_[row], eps_substep_guide)
if self.EO("slerp_epsilon_guide"):
if eps_substep_guide.sum() != 0:
eps_[row] = slerp_tensor(lgw_mask, eps_[row], eps_substep_guide)
if eps_substep_guide_inv.sum() != 0:
eps_[row] = slerp_tensor(lgw_mask_inv, eps_[row], eps_substep_guide_inv)
else:
eps_[row] = eps_[row] + lgw_mask * (eps_substep_guide - eps_[row]) + lgw_mask_inv * (eps_substep_guide_inv - eps_[row])
#eps_[row] = slerp_barycentric(eps_[row].norm(), eps_substep_guide.norm(), eps_substep_guide_inv.norm(), 1-lgw_mask-lgw_mask_inv, lgw_mask, lgw_mask_inv)
elif self.guide_mode in {"epsilon_projection"}:
if self.EO("slerp_epsilon_guide"):
if eps_substep_guide.sum() != 0:
eps_row_slerp = slerp_tensor(self.mask, eps_[row], eps_substep_guide)
if eps_substep_guide_inv.sum() != 0:
eps_row_slerp = slerp_tensor((1-self.mask), eps_row_slerp, eps_substep_guide_inv)
eps_collinear_eps_slerp = get_collinear(eps_[row], eps_row_slerp)
eps_slerp_ortho_eps = get_orthogonal(eps_row_slerp, eps_[row])
eps_sum = eps_collinear_eps_slerp + eps_slerp_ortho_eps
eps_[row] = slerp_tensor(lgw_mask, eps_[row] , eps_sum)
eps_[row] = slerp_tensor(lgw_mask_inv, eps_[row], eps_sum)
else:
eps_row_lerp = eps_[row] + self.mask * (eps_substep_guide-eps_[row]) + (1-self.mask) * (eps_substep_guide_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row])
eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps
eps_[row] = eps_[row] + lgw_mask * (eps_sum - eps_[row]) + lgw_mask_inv * (eps_sum - eps_[row])
#eps_row_slerp = eps_[row] + self.mask * (eps_substep_guide-eps_[row]) + (1-self.mask) * (eps_substep_guide_inv-eps_[row])
elif self.guide_mode in {"epsilon_cw", "epsilon_projection_cw"}:
eps_ = self.process_channelwise(x_0,
eps_,
data_,
row,
eps_substep_guide,
eps_substep_guide_inv,
y0,
y0_inv,
lgw_mask,
lgw_mask_inv,
use_projection = self.guide_mode == "epsilon_projection_cw",
channelwise = True
)
temporal_smoothing = self.EO("temporal_smoothing", 0.0)
if temporal_smoothing > 0:
eps_[row] = apply_temporal_smoothing(eps_[row], temporal_smoothing)
if self.EO("substep_eps_ch_mean_std"):
eps_[row] = normalize_latent(eps_[row], eps_orig[row])
if self.EO("substep_eps_ch_mean"):
eps_[row] = normalize_latent(eps_[row], eps_orig[row], std=False)
if self.EO("substep_eps_ch_std"):
eps_[row] = normalize_latent(eps_[row], eps_orig[row], mean=False)
if self.EO("substep_eps_mean_std"):
eps_[row] = normalize_latent(eps_[row], eps_orig[row], channelwise=False)
if self.EO("substep_eps_mean"):
eps_[row] = normalize_latent(eps_[row], eps_orig[row], std=False, channelwise=False)
if self.EO("substep_eps_std"):
eps_[row] = normalize_latent(eps_[row], eps_orig[row], mean=False, channelwise=False)
return eps_, x_
def process_channelwise(self,
x_0 : Tensor,
eps_ : Tensor,
data_ : Tensor,
row : int,
eps_substep_guide : Tensor,
eps_substep_guide_inv : Tensor,
y0 : Tensor,
y0_inv : Tensor,
lgw_mask : Tensor,
lgw_mask_inv : Tensor,
use_projection : bool = False,
channelwise : bool = False
):
avg, avg_inv = 0, 0
for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])):
avg += torch.norm(lgw_mask [b][0] * data_[row][b][c] - lgw_mask [b][0] * y0 [b][c])
avg_inv += torch.norm(lgw_mask_inv[b][0] * data_[row][b][c] - lgw_mask_inv[b][0] * y0_inv[b][c])
avg /= x_0.shape[1]
avg_inv /= x_0.shape[1]
for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])):
if channelwise:
ratio = torch.nan_to_num(torch.norm(lgw_mask [b][0] * data_[row][b][c] - lgw_mask [b][0] * y0 [b][c]) / avg, 0)
ratio_inv = torch.nan_to_num(torch.norm(lgw_mask_inv[b][0] * data_[row][b][c] - lgw_mask_inv[b][0] * y0_inv[b][c]) / avg_inv, 0)
else:
ratio = 1.
ratio_inv = 1.
if self.EO("slerp_epsilon_guide"):
if eps_substep_guide[b][c].sum() != 0:
eps_[row][b][c] = slerp_tensor(ratio * lgw_mask[b][0], eps_[row][b][c], eps_substep_guide[b][c])
if eps_substep_guide_inv[b][c].sum() != 0:
eps_[row][b][c] = slerp_tensor(ratio_inv * lgw_mask_inv[b][0], eps_[row][b][c], eps_substep_guide_inv[b][c])
else:
eps_[row][b][c] = eps_[row][b][c] + ratio * lgw_mask[b][0] * (eps_substep_guide[b][c] - eps_[row][b][c]) + ratio_inv * lgw_mask_inv[b][0] * (eps_substep_guide_inv[b][c] - eps_[row][b][c])
if use_projection:
if self.EO("slerp_epsilon_guide"):
if eps_substep_guide[b][c].sum() != 0:
eps_row_lerp = slerp_tensor(self.mask[b][0], eps_[row][b][c], eps_substep_guide[b][c])
if eps_substep_guide_inv[b][c].sum() != 0:
eps_row_lerp = slerp_tensor((1-self.mask[b][0]), eps_[row][b][c], eps_substep_guide_inv[b][c])
else:
eps_row_lerp = eps_[row][b][c] + self.mask[b][0] * (eps_substep_guide[b][c] - eps_[row][b][c]) + (1-self.mask[b][0]) * (eps_substep_guide_inv[b][c] - eps_[row][b][c]) # should this ever be self.mask_inv?
eps_collinear_eps_lerp = get_collinear (eps_[row][b][c], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp , eps_[row][b][c])
eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps
if self.EO("slerp_epsilon_guide"):
if eps_substep_guide[b][c].sum() != 0:
eps_[row][b][c] = slerp_tensor(ratio * lgw_mask[b][0], eps_[row][b][c], eps_sum)
if eps_substep_guide_inv[b][c].sum() != 0:
eps_[row][b][c] = slerp_tensor(ratio_inv * lgw_mask_inv[b][0], eps_[row][b][c], eps_sum)
else:
eps_[row][b][c] = eps_[row][b][c] + ratio * lgw_mask[b][0] * (eps_sum - eps_[row][b][c]) + ratio_inv * lgw_mask_inv[b][0] * (eps_sum - eps_[row][b][c])
else:
if self.EO("slerp_epsilon_guide"):
if eps_substep_guide[b][c].sum() != 0:
eps_[row][b][c] = slerp_tensor(ratio * lgw_mask[b][0], eps_[row][b][c], eps_substep_guide[b][c])
if eps_substep_guide_inv[b][c].sum() != 0:
eps_[row][b][c] = slerp_tensor(ratio_inv * lgw_mask_inv[b][0], eps_[row][b][c], eps_substep_guide_inv[b][c])
else:
eps_[row][b][c] = eps_[row][b][c] + ratio * lgw_mask[b][0] * (eps_substep_guide[b][c] - eps_[row][b][c]) + ratio_inv * lgw_mask_inv[b][0] * (eps_substep_guide_inv[b][c] - eps_[row][b][c])
return eps_
def normalize_inputs(self, x:Tensor, y0:Tensor, y0_inv:Tensor):
"""
Modifies and returns 'x' by matching its mean and/or std to y0 and/or y0_inv.
Controlled by extra_options.
Returns:
- x (modified)
- y0 (may be modified to match mean and std from y0_inv)
- y0_inv (unchanged)
"""
if self.guide_mode == "epsilon_guide_mean_std_from_bkg":
y0 = normalize_latent(y0, y0_inv)
input_norm = self.EO("input_norm", "")
input_std = self.EO("input_std", 1.0)
if input_norm == "input_ch_mean_set_std_to":
x = normalize_latent(x, set_std=input_std)
if input_norm == "input_ch_set_std_to":
x = normalize_latent(x, set_std=input_std, mean=False)
if input_norm == "input_mean_set_std_to":
x = normalize_latent(x, set_std=input_std, channelwise=False)
if input_norm == "input_std_set_std_to":
x = normalize_latent(x, set_std=input_std, mean=False, channelwise=False)
return x, y0, y0_inv
def apply_frame_weights(mask, frame_weights, normalize=False):
original_mask_mean = mask.mean()
if frame_weights is not None:
for f in range(mask.shape[2]):
frame_weight = frame_weights[f]
mask[..., f:f+1, :, :] *= frame_weight
if normalize:
mask_mean = mask.mean()
mask *= (original_mask_mean / mask_mean)
def prepare_mask(x, mask, LGW_MASK_RESCALE_MIN) -> tuple[torch.Tensor, bool]:
if mask is None:
mask = torch.ones_like(x[:,0:1,...])
LGW_MASK_RESCALE_MIN = False
return mask, LGW_MASK_RESCALE_MIN
target_height = x.shape[-2]
target_width = x.shape[-1]
spatial_mask = None
if x.ndim == 5 and mask.shape[0] > 1 and mask.ndim < 4:
target_frames = x.shape[-3]
spatial_mask = mask.unsqueeze(0).unsqueeze(0) # [B, H, W] -> [1, 1, B, H, W]
spatial_mask = F.interpolate(spatial_mask,
size=(target_frames, target_height, target_width),
mode='trilinear',
align_corners=False) # [1, 1, F, H, W]
repeat_shape = [1] # batch
for i in range(1, x.ndim - 3):
repeat_shape.append(x.shape[i])
repeat_shape.extend([1, 1, 1]) # frames, height, width
elif mask.ndim == 4: #temporal mask batch
mask = F.interpolate(mask, size=(target_height, target_width), mode='bilinear', align_corners=False)
mask = mask.repeat(x.shape[-4],1,1,1)
mask.unsqueeze_(0)
else:
spatial_mask = mask.unsqueeze(1)
spatial_mask = F.interpolate(spatial_mask, size=(target_height, target_width), mode='bilinear', align_corners=False)
while spatial_mask.ndim < x.ndim:
spatial_mask = spatial_mask.unsqueeze(2)
repeat_shape = [1] # batch
for i in range(1, x.ndim - 2):
repeat_shape.append(x.shape[i])
repeat_shape.extend([1, 1]) # height and width
repeat_shape[1] = 1 # only need one channel for masks
if spatial_mask is not None:
mask = spatial_mask.repeat(*repeat_shape).to(x.dtype)
del spatial_mask
return mask, LGW_MASK_RESCALE_MIN
def apply_temporal_smoothing(tensor, temporal_smoothing):
if temporal_smoothing <= 0 or tensor.ndim != 5:
return tensor
kernel_size = 5
padding = kernel_size // 2
temporal_kernel = torch.tensor(
[0.1, 0.2, 0.4, 0.2, 0.1],
device=tensor.device, dtype=tensor.dtype
) * temporal_smoothing
temporal_kernel[kernel_size//2] += (1 - temporal_smoothing)
temporal_kernel = temporal_kernel / temporal_kernel.sum()
# resahpe for conv1d
b, c, f, h, w = tensor.shape
data_flat = tensor.permute(0, 1, 3, 4, 2).reshape(-1, f)
# apply smoohting
data_smooth = F.conv1d(
data_flat.unsqueeze(1),
temporal_kernel.view(1, 1, -1),
padding=padding
).squeeze(1)
return data_smooth.view(b, c, h, w, f).permute(0, 1, 4, 2, 3)
def get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, row_offset, rk_type, b=None, c=None):
s_in = x_0.new_ones([x_0.shape[0]])
if b is not None and c is not None:
index = (b, c)
elif b is not None:
index = (b,)
else:
index = ()
if RK_Method_Beta.is_exponential(rk_type):
eps_row = y0 [index] - x_0[index]
eps_row_inv = y0_inv[index] - x_0[index]
else:
eps_row = (x_[row][index] - y0 [index]) / (s_[row] * s_in) # was row+row_offset before for x_!! not right... also? potential issues here with x_[row+1] being RK.rows+2 with gauss-legendre_2s 1 imp step 1 imp substep
eps_row_inv = (x_[row][index] - y0_inv[index]) / (s_[row] * s_in)
return eps_row, eps_row_inv
def get_guide_epsilon(x_0, x_, y0, sigma, rk_type, b=None, c=None):
s_in = x_0.new_ones([x_0.shape[0]])
if b is not None and c is not None:
index = (b, c)
elif b is not None:
index = (b,)
else:
index = ()
if RK_Method_Beta.is_exponential(rk_type):
eps = y0 [index] - x_0[index]
else:
eps = (x_[index] - y0 [index]) / (sigma * s_in)
return eps
@torch.no_grad
def noise_cossim_guide_tiled(x_list, guide, cossim_mode="forward", tile_size=2, step=0):
guide_tiled = rearrange(guide, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)
x_tiled_list = [
rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)
for x in x_list
]
x_tiled_stack = torch.stack([x_tiled[0] for x_tiled in x_tiled_list]) # [n_x, n_tiles, c, h, w]
guide_flat = guide_tiled[0].view(guide_tiled.shape[1], -1).unsqueeze(0) # [1, n_tiles, c*h*w]
x_flat = x_tiled_stack.view(x_tiled_stack.size(0), x_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w]
cossim_tmp_all = F.cosine_similarity(x_flat, guide_flat, dim=-1) # [n_x, n_tiles]
if cossim_mode == "forward":
indices = cossim_tmp_all.argmax(dim=0)
elif cossim_mode == "reverse":
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal":
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
elif cossim_mode == "forward_reverse":
if step % 2 == 0:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_forward":
if step % 2 == 1:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal_reverse":
if step % 2 == 0:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_orthogonal":
if step % 2 == 1:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
else:
target_value = float(cossim_mode)
indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0)
x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w]
x_tiled_out = x_tiled_out.unsqueeze(0)
x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size)
return x_detiled
@torch.no_grad
def noise_cossim_eps_tiled(x_list, eps, noise_list, cossim_mode="forward", tile_size=2, step=0):
eps_tiled = rearrange(eps, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)
x_tiled_list = [
rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)
for x in x_list
]
noise_tiled_list = [
rearrange(noise, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)
for noise in noise_list
]
noise_tiled_stack = torch.stack([noise_tiled[0] for noise_tiled in noise_tiled_list]) # [n_x, n_tiles, c, h, w]
eps_expanded = eps_tiled[0].view(eps_tiled.shape[1], -1).unsqueeze(0) # [1, n_tiles, c*h*w]
noise_flat = noise_tiled_stack.view(noise_tiled_stack.size(0), noise_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w]
cossim_tmp_all = F.cosine_similarity(noise_flat, eps_expanded, dim=-1) # [n_x, n_tiles]
if cossim_mode == "forward":
indices = cossim_tmp_all.argmax(dim=0)
elif cossim_mode == "reverse":
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal":
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
elif cossim_mode == "orthogonal_pos":
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
elif cossim_mode == "orthogonal_neg":
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "orthogonal_posneg":
if step % 2 == 0:
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
else:
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "orthogonal_negpos":
if step % 2 == 1:
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
else:
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "forward_reverse":
if step % 2 == 0:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_forward":
if step % 2 == 1:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal_reverse":
if step % 2 == 0:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_orthogonal":
if step % 2 == 1:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
else:
target_value = float(cossim_mode)
indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0)
#else:
# raise ValueError(f"Unknown cossim_mode: {cossim_mode}")
x_tiled_stack = torch.stack([x_tiled[0] for x_tiled in x_tiled_list]) # [n_x, n_tiles, c, h, w]
x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w]
x_tiled_out = x_tiled_out.unsqueeze(0) # restore batch dim
x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size)
return x_detiled
@torch.no_grad
def noise_cossim_guide_eps_tiled(x_0, x_list, y0, noise_list, cossim_mode="forward", tile_size=2, step=0, sigma=None, rk_type=None):
x_tiled_stack = torch.stack([
rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0]
for x in x_list
]) # [n_x, n_tiles, c, h, w]
eps_guide_stack = torch.stack([
rearrange(x - y0, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0]
for x in x_list
]) # [n_x, n_tiles, c, h, w]
del x_list
noise_tiled_stack = torch.stack([
rearrange(noise, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0]
for noise in noise_list
]) # [n_x, n_tiles, c, h, w]
del noise_list
noise_flat = noise_tiled_stack.view(noise_tiled_stack.size(0), noise_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w]
eps_guide_flat = eps_guide_stack.view(eps_guide_stack.size(0), eps_guide_stack.size(1), -1) # [n_x, n_tiles, c*h*w]
cossim_tmp_all = F.cosine_similarity(noise_flat, eps_guide_flat, dim=-1) # [n_x, n_tiles]
del noise_tiled_stack, noise_flat, eps_guide_stack, eps_guide_flat
if cossim_mode == "forward":
indices = cossim_tmp_all.argmax(dim=0)
elif cossim_mode == "reverse":
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal":
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
elif cossim_mode == "orthogonal_pos":
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
elif cossim_mode == "orthogonal_neg":
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "orthogonal_posneg":
if step % 2 == 0:
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
else:
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "orthogonal_negpos":
if step % 2 == 1:
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
else:
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "forward_reverse":
if step % 2 == 0:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_forward":
if step % 2 == 1:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal_reverse":
if step % 2 == 0:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_orthogonal":
if step % 2 == 1:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
else:
target_value = float(cossim_mode)
indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0)
x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w]
del x_tiled_stack
x_tiled_out = x_tiled_out.unsqueeze(0)
x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size)
return x_detiled
class NoiseStepHandlerOSDE:
def __init__(self, x, eps=None, data=None, x_init=None, guide=None, guide_bkg=None):
self.noise = None
self.x = x
self.eps = eps
self.data = data
self.x_init = x_init
self.guide = guide
self.guide_bkg = guide_bkg
self.eps_list = None
self.noise_cossim_map = {
"eps_orthogonal": [self.noise, self.eps],
"eps_data_orthogonal": [self.noise, self.eps, self.data],
"data_orthogonal": [self.noise, self.data],
"xinit_orthogonal": [self.noise, self.x_init],
"x_orthogonal": [self.noise, self.x],
"x_data_orthogonal": [self.noise, self.x, self.data],
"x_eps_orthogonal": [self.noise, self.x, self.eps],
"x_eps_data_orthogonal": [self.noise, self.x, self.eps, self.data],
"x_eps_data_xinit_orthogonal": [self.noise, self.x, self.eps, self.data, self.x_init],
"x_eps_guide_orthogonal": [self.noise, self.x, self.eps, self.guide],
"x_eps_guide_bkg_orthogonal": [self.noise, self.x, self.eps, self.guide_bkg],
"noise_orthogonal": [self.noise, self.x_init],
"guide_orthogonal": [self.noise, self.guide],
"guide_bkg_orthogonal": [self.noise, self.guide_bkg],
}
def check_cossim_source(self, source):
return source in self.noise_cossim_map
def get_ortho_noise(self, noise, prev_noises=None, max_iter=100, max_score=1e-7, NOISE_COSSIM_SOURCE="eps_orthogonal"):
if NOISE_COSSIM_SOURCE not in self.noise_cossim_map:
raise ValueError(f"Invalid NOISE_COSSIM_SOURCE: {NOISE_COSSIM_SOURCE}")
self.noise_cossim_map[NOISE_COSSIM_SOURCE][0] = noise
params = self.noise_cossim_map[NOISE_COSSIM_SOURCE]
noise = get_orthogonal_noise_from_channelwise(*params, max_iter=max_iter, max_score=max_score)
return noise
# NOTE: NS AND SUBSTEP ADDED!
def handle_tiled_etc_noise_steps(
x_0,
x,
x_prenoise,
x_init,
eps,
denoised,
y0,
y0_inv,
step,
rk_type,
RK,
NS,
SUBSTEP,
sigma_up,
sigma,
sigma_next,
alpha_ratio,
s_noise,
noise_mode,
SDE_NOISE_EXTERNAL,
sde_noise_t,
NOISE_COSSIM_SOURCE,
NOISE_COSSIM_MODE,
noise_cossim_tile_size,
noise_cossim_iterations,
extra_options):
EO = ExtraOptions(extra_options)
x_tmp = []
cossim_tmp = []
noise_tmp_list = []
if step > EO("noise_cossim_end_step", MAX_STEPS):
NOISE_COSSIM_SOURCE = EO("noise_cossim_takeover_source" , "eps")
NOISE_COSSIM_MODE = EO("noise_cossim_takeover_mode" , "forward" )
noise_cossim_tile_size = EO("noise_cossim_takeover_tile" , noise_cossim_tile_size )
noise_cossim_iterations = EO("noise_cossim_takeover_iterations", noise_cossim_iterations)
for i in range(noise_cossim_iterations):
#x_tmp.append(NS.swap_noise(x_0, x, sigma, sigma, sigma_next, ))
x_tmp.append(NS.add_noise_post(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) )#y0, lgw, sigma_down are currently unused
noise_tmp = x_tmp[i] - x
if EO("noise_noise_zscore_norm"):
noise_tmp = normalize_zscore(noise_tmp, channelwise=False, inplace=True)
if EO("noise_noise_zscore_norm_cw"):
noise_tmp = normalize_zscore(noise_tmp, channelwise=True, inplace=True)
if EO("noise_eps_zscore_norm"):
eps = normalize_zscore(eps, channelwise=False, inplace=True)
if EO("noise_eps_zscore_norm_cw"):
eps = normalize_zscore(eps, channelwise=True, inplace=True)
if NOISE_COSSIM_SOURCE in ("eps_tiled", "guide_epsilon_tiled", "guide_bkg_epsilon_tiled", "iig_tiled"):
noise_tmp_list.append(noise_tmp)
if NOISE_COSSIM_SOURCE == "eps":
cossim_tmp.append(get_cosine_similarity(eps, noise_tmp))
if NOISE_COSSIM_SOURCE == "eps_ch":
cossim_total = torch.zeros_like(eps[0][0][0][0])
for ch in range(eps.shape[1]):
cossim_total += get_cosine_similarity(eps[0][ch], noise_tmp[0][ch])
cossim_tmp.append(cossim_total)
elif NOISE_COSSIM_SOURCE == "data":
cossim_tmp.append(get_cosine_similarity(denoised, noise_tmp))
elif NOISE_COSSIM_SOURCE == "latent":
cossim_tmp.append(get_cosine_similarity(x_prenoise, noise_tmp))
elif NOISE_COSSIM_SOURCE == "x_prenoise":
cossim_tmp.append(get_cosine_similarity(x_prenoise, x_tmp[i]))
elif NOISE_COSSIM_SOURCE == "x":
cossim_tmp.append(get_cosine_similarity(x, x_tmp[i]))
elif NOISE_COSSIM_SOURCE == "x_data":
cossim_tmp.append(get_cosine_similarity(denoised, x_tmp[i]))
elif NOISE_COSSIM_SOURCE == "x_init_vs_noise":
cossim_tmp.append(get_cosine_similarity(x_init, noise_tmp))
elif NOISE_COSSIM_SOURCE == "mom":
cossim_tmp.append(get_cosine_similarity(denoised, x + sigma_next*noise_tmp))
elif NOISE_COSSIM_SOURCE == "guide":
cossim_tmp.append(get_cosine_similarity(y0, x_tmp[i]))
elif NOISE_COSSIM_SOURCE == "guide_bkg":
cossim_tmp.append(get_cosine_similarity(y0_inv, x_tmp[i]))
if step < EO("noise_cossim_start_step", 0):
x = x_tmp[0]
elif (NOISE_COSSIM_SOURCE == "eps_tiled"):
x = noise_cossim_eps_tiled(x_tmp, eps, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step)
elif (NOISE_COSSIM_SOURCE == "guide_epsilon_tiled"):
x = noise_cossim_guide_eps_tiled(x_0, x_tmp, y0, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step, sigma=sigma, rk_type=rk_type)
elif (NOISE_COSSIM_SOURCE == "guide_bkg_epsilon_tiled"):
x = noise_cossim_guide_eps_tiled(x_0, x_tmp, y0_inv, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step, sigma=sigma, rk_type=rk_type)
elif (NOISE_COSSIM_SOURCE == "guide_tiled"):
x = noise_cossim_guide_tiled(x_tmp, y0, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step)
elif (NOISE_COSSIM_SOURCE == "guide_bkg_tiled"):
x = noise_cossim_guide_tiled(x_tmp, y0_inv, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size)
else:
for i in range(len(x_tmp)):
if (NOISE_COSSIM_MODE == "forward") and (cossim_tmp[i] == max(cossim_tmp)):
x = x_tmp[i]
break
elif (NOISE_COSSIM_MODE == "reverse") and (cossim_tmp[i] == min(cossim_tmp)):
x = x_tmp[i]
break
elif (NOISE_COSSIM_MODE == "orthogonal") and (abs(cossim_tmp[i]) == min(abs(val) for val in cossim_tmp)):
x = x_tmp[i]
break
elif (NOISE_COSSIM_MODE != "forward") and (NOISE_COSSIM_MODE != "reverse") and (NOISE_COSSIM_MODE != "orthogonal"):
x = x_tmp[0]
break
return x
def get_masked_epsilon_projection(x_0, x_, eps_, y0, y0_inv, s_, row, row_offset, rk_type, LG, step):
eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, row_offset, rk_type)
eps_row_lerp = eps_[row] + LG.mask * (eps_row-eps_[row]) + (1-LG.mask) * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row])
eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps
lgw_mask, lgw_mask_inv = LG.get_masks_for_step(step)
eps_substep_guide = eps_[row] + lgw_mask * (eps_sum - eps_[row]) + lgw_mask_inv * (eps_sum - eps_[row])
return eps_substep_guide
================================================
FILE: beta/rk_method_beta.py
================================================
import torch
from torch import Tensor
from typing import Optional, Callable, Tuple, List, Dict, Any, Union
import comfy.model_patcher
import comfy.supported_models
import itertools
from .phi_functions import Phi
from .rk_coefficients_beta import get_implicit_sampler_name_list, get_rk_methods_beta
from ..helper import ExtraOptions
from ..latents import get_orthogonal, get_collinear, get_cosine_similarity, tile_latent, untile_latent
from ..res4lyf import RESplain
MAX_STEPS = 10000
def get_data_from_step (x:Tensor, x_next:Tensor, sigma:Tensor, sigma_next:Tensor) -> Tensor:
h = sigma_next - sigma
return (sigma_next * x - sigma * x_next) / h
def get_epsilon_from_step(x:Tensor, x_next:Tensor, sigma:Tensor, sigma_next:Tensor) -> Tensor:
h = sigma_next - sigma
return (x - x_next) / h
class RK_Method_Beta:
def __init__(self,
model,
rk_type : str,
VE_MODEL : bool,
noise_anchor : float,
noise_boost_normalize : bool = True,
model_device : str = 'cuda',
work_device : str = 'cpu',
dtype : torch.dtype = torch.float64,
extra_options : str = ""
):
self.work_device = work_device
self.model_device = model_device
self.dtype : torch.dtype = dtype
self.model = model
if hasattr(model, "model"):
model_sampling = model.model.model_sampling
elif hasattr(model, "inner_model"):
model_sampling = model.inner_model.inner_model.model_sampling
self.sigma_min : Tensor = model_sampling.sigma_min.to(dtype=dtype, device=work_device)
self.sigma_max : Tensor = model_sampling.sigma_max.to(dtype=dtype, device=work_device)
self.rk_type : str = rk_type
self.IMPLICIT : str = rk_type in get_implicit_sampler_name_list(nameOnly=True)
self.EXPONENTIAL : bool = RK_Method_Beta.is_exponential(rk_type)
self.VE_MODEL : bool = VE_MODEL
self.SYNC_SUBSTEP_MEAN_CW : bool = noise_boost_normalize
self.A : Optional[Tensor] = None
self.B : Optional[Tensor] = None
self.U : Optional[Tensor] = None
self.V : Optional[Tensor] = None
self.rows : int = 0
self.cols : int = 0
self.denoised : Optional[Tensor] = None
self.uncond : Optional[Tensor] = None
self.y0 : Optional[Tensor] = None
self.y0_inv : Optional[Tensor] = None
self.multistep_stages : int = 0
self.row_offset : Optional[int] = None
self.cfg_cw : float = 1.0
self.extra_args : Optional[Dict[str, Any]] = None
self.extra_options : str = extra_options
self.EO : ExtraOptions = ExtraOptions(extra_options)
self.reorder_tableau_indices : list[int] = self.EO("reorder_tableau_indices", [-1])
self.LINEAR_ANCHOR_X_0 : float = noise_anchor
self.tile_sizes : Optional[List[Tuple[int,int]]] = None
self.tile_cnt : int = 0
self.latent_compression_ratio : int = 8
@staticmethod
def is_exponential(rk_type:str) -> bool:
if rk_type.startswith(( "res",
"dpmpp",
"ddim",
"pec",
"etdrk",
"lawson",
"abnorsett",
)):
return True
else:
return False
@staticmethod
def create(model,
rk_type : str,
VE_MODEL : bool,
noise_anchor : float = 1.0,
noise_boost_normalize : bool = True,
model_device : str = 'cuda',
work_device : str = 'cpu',
dtype : torch.dtype = torch.float64,
extra_options : str = ""
) -> "Union[RK_Method_Exponential, RK_Method_Linear]":
if RK_Method_Beta.is_exponential(rk_type):
return RK_Method_Exponential(model, rk_type, VE_MODEL, noise_anchor, noise_boost_normalize, model_device, work_device, dtype, extra_options)
else:
return RK_Method_Linear (model, rk_type, VE_MODEL, noise_anchor, noise_boost_normalize, model_device, work_device, dtype, extra_options)
def __call__(self):
raise NotImplementedError("This method got clownsharked!")
def model_epsilon(self, x:Tensor, sigma:Tensor, **extra_args) -> Tuple[Tensor, Tensor]:
s_in = x.new_ones([x.shape[0]])
denoised = self.model(x, sigma * s_in, **extra_args)
denoised = self.calc_cfg_channelwise(denoised)
eps = (x - denoised) / (sigma * s_in).view(x.shape[0], 1, 1, 1) #return x0 ###################################THIS WORKS ONLY WITH THE MODEL SAMPLING PATCH
return eps, denoised
def model_denoised(self, x:Tensor, sigma:Tensor, **extra_args) -> Tensor:
s_in = x.new_ones([x.shape[0]])
control_tiles = None
y0_style_pos = self.extra_args['model_options']['transformer_options'].get("y0_style_pos")
y0_style_neg = self.extra_args['model_options']['transformer_options'].get("y0_style_neg")
y0_style_pos_tile, sy0_style_neg_tiles = None, None
if self.EO("tile_model_calls"):
tile_h = self.EO("tile_h", 128)
tile_w = self.EO("tile_w", 128)
denoised_tiles = []
tiles, orig_shape, grid, strides = tile_latent(x, tile_size=(tile_h,tile_w))
for i in range(tiles.shape[0]):
tile = tiles[i].unsqueeze(0)
denoised_tile = self.model(tile, sigma * s_in, **extra_args)
denoised_tiles.append(denoised_tile)
denoised_tiles = torch.cat(denoised_tiles, dim=0)
denoised = untile_latent(denoised_tiles, orig_shape, grid, strides)
elif self.tile_sizes is not None:
tile_h_full = self.tile_sizes[self.tile_cnt % len(self.tile_sizes)][0]
tile_w_full = self.tile_sizes[self.tile_cnt % len(self.tile_sizes)][1]
if tile_h_full == -1:
tile_h = x.shape[-2]
tile_h_full = tile_h * self.latent_compression_ratio
else:
tile_h = tile_h_full // self.latent_compression_ratio
if tile_w_full == -1:
tile_w = x.shape[-1]
tile_w_full = tile_w * self.latent_compression_ratio
else:
tile_w = tile_w_full // self.latent_compression_ratio
#tile_h = tile_h_full // self.latent_compression_ratio
#tile_w = tile_w_full // self.latent_compression_ratio
self.tile_cnt += 1
#if len(self.tile_sizes) == 1 and self.tile_cnt % 2 == 1:
# tile_h, tile_w = tile_w, tile_h
# tile_h_full, tile_w_full = tile_w_full, tile_h_full
if (self.tile_cnt // len(self.tile_sizes)) % 2 == 1 and self.EO("tiles_autorotate"):
tile_h, tile_w = tile_w, tile_h
tile_h_full, tile_w_full = tile_w_full, tile_h_full
xt_negative = self.model.inner_model.conds.get('xt_negative', self.model.inner_model.conds.get('negative'))
negative_control = xt_negative[0].get('control')
if negative_control is not None and hasattr(negative_control, 'cond_hint_original'):
negative_cond_hint_init = negative_control.cond_hint.clone() if negative_control.cond_hint is not None else None
xt_positive = self.model.inner_model.conds.get('xt_positive', self.model.inner_model.conds.get('positive'))
positive_control = xt_positive[0].get('control')
if positive_control is not None and hasattr(positive_control, 'cond_hint_original'):
positive_cond_hint_init = positive_control.cond_hint.clone() if positive_control.cond_hint is not None else None
if positive_control.cond_hint_original.shape[-1] != x.shape[-2] * self.latent_compression_ratio or positive_control.cond_hint_original.shape[-2] != x.shape[-1] * self.latent_compression_ratio:
positive_control_pretile = comfy.utils.bislerp(positive_control.cond_hint_original.clone().to(torch.float16).to('cuda'), x.shape[-1] * self.latent_compression_ratio, x.shape[-2] * self.latent_compression_ratio)
positive_control.cond_hint_original = positive_control_pretile.to(positive_control.cond_hint_original)
positive_control_pretile = positive_control.cond_hint_original.clone().to(torch.float16).to('cuda')
control_tiles, control_orig_shape, control_grid, control_strides = tile_latent(positive_control_pretile, tile_size=(tile_h_full,tile_w_full))
control_tiles = control_tiles
denoised_tiles = []
tiles, orig_shape, grid, strides = tile_latent(x, tile_size=(tile_h,tile_w))
if y0_style_pos is not None:
y0_style_pos_tiles, _, _, _ = tile_latent(y0_style_pos, tile_size=(tile_h,tile_w))
if y0_style_neg is not None:
y0_style_neg_tiles, _, _, _ = tile_latent(y0_style_neg, tile_size=(tile_h,tile_w))
for i in range(tiles.shape[0]):
tile = tiles[i].unsqueeze(0)
self.extra_args['model_options']['transformer_options']['x_tmp'] = tile
if control_tiles is not None:
positive_control.cond_hint = control_tiles[i].unsqueeze(0).to(positive_control.cond_hint)
if negative_control is not None:
negative_control.cond_hint = control_tiles[i].unsqueeze(0).to(positive_control.cond_hint)
if y0_style_pos is not None:
self.extra_args['model_options']['transformer_options']['y0_style_pos'] = y0_style_pos_tiles[i].unsqueeze(0)
if y0_style_neg is not None:
self.extra_args['model_options']['transformer_options']['y0_style_neg'] = y0_style_neg_tiles[i].unsqueeze(0)
denoised_tile = self.model(tile, sigma * s_in, **extra_args)
denoised_tiles.append(denoised_tile)
denoised_tiles = torch.cat(denoised_tiles, dim=0)
denoised = untile_latent(denoised_tiles, orig_shape, grid, strides)
else:
denoised = self.model(x, sigma * s_in, **extra_args)
if control_tiles is not None:
positive_control.cond_hint = positive_cond_hint_init
if negative_control is not None:
negative_control.cond_hint = negative_cond_hint_init
if y0_style_pos is not None:
self.extra_args['model_options']['transformer_options']['y0_style_pos'] = y0_style_pos
if y0_style_neg is not None:
self.extra_args['model_options']['transformer_options']['y0_style_neg'] = y0_style_neg
denoised = self.calc_cfg_channelwise(denoised)
return denoised
def update_transformer_options(self,
transformer_options : Optional[dict] = None,
):
self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options)
return
def set_coeff(self,
rk_type : str,
h : Tensor,
c1 : float = 0.0,
c2 : float = 0.5,
c3 : float = 1.0,
step : int = 0,
sigmas : Optional[Tensor] = None,
sigma_down : Optional[Tensor] = None,
) -> None:
self.rk_type = rk_type
self.IMPLICIT = rk_type in get_implicit_sampler_name_list(nameOnly=True)
self.EXPONENTIAL = RK_Method_Beta.is_exponential(rk_type)
sigma = sigmas[step]
sigma_next = sigmas[step+1]
h_prev = []
a, b, u, v, ci, multistep_stages, hybrid_stages, FSAL = get_rk_methods_beta(rk_type,
h,
c1,
c2,
c3,
h_prev,
step,
sigmas,
sigma,
sigma_next,
sigma_down,
self.extra_options,
)
self.multistep_stages = multistep_stages
self.hybrid_stages = hybrid_stages
self.A = torch.tensor(a, dtype=h.dtype, device=h.device)
self.B = torch.tensor(b, dtype=h.dtype, device=h.device)
self.C = torch.tensor(ci, dtype=h.dtype, device=h.device)
self.U = torch.tensor(u, dtype=h.dtype, device=h.device) if u is not None else None
self.V = torch.tensor(v, dtype=h.dtype, device=h.device) if v is not None else None
self.rows = self.A.shape[0]
self.cols = self.A.shape[1]
self.row_offset = 1 if not self.IMPLICIT and self.A[0].sum() == 0 else 0
if self.IMPLICIT and self.reorder_tableau_indices[0] != -1:
self.reorder_tableau(self.reorder_tableau_indices)
def reorder_tableau(self, indices:list[int]) -> None:
#if indices[0]:
self.A = self.A [indices]
self.B[0] = self.B[0][indices]
self.C = self.C [indices]
self.C = torch.cat((self.C, self.C[-1:]))
return
def update_substep(self,
x_0 : Tensor,
x_ : Tensor,
eps_ : Tensor,
eps_prev_ : Tensor,
row : int,
row_offset : int,
h_new : Tensor,
h_new_orig : Tensor,
lying_eps_row_factor : float = 1.0,
sigma : Optional[Tensor] = None,
) -> Tensor:
if row < self.rows - row_offset and self.multistep_stages == 0:
row_tmp_offset = row + row_offset
else:
row_tmp_offset = row + 1
#zr_base = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_) # TODO: why unused?
if self.SYNC_SUBSTEP_MEAN_CW and lying_eps_row_factor != 1.0:
zr_orig = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_)
x_orig_row = x_0 + h_new * zr_orig
#eps_row = eps_ [row].clone()
#eps_prev_row = eps_prev_[row].clone()
eps_ [row] *= lying_eps_row_factor
eps_prev_[row] *= lying_eps_row_factor
if self.EO("exp2lin_override"):
zr = self.zum2(row+row_offset+self.multistep_stages, eps_, eps_prev_, h_new, sigma)
x_[row_tmp_offset] = x_0 + zr
else:
zr = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_)
x_[row_tmp_offset] = x_0 + h_new * zr
if self.SYNC_SUBSTEP_MEAN_CW and lying_eps_row_factor != 1.0:
x_[row_tmp_offset] = x_[row_tmp_offset] - x_[row_tmp_offset].mean(dim=(-2,-1), keepdim=True) + x_orig_row.mean(dim=(-2,-1), keepdim=True)
#eps_ [row] = eps_row
#eps_prev_[row] = eps_prev_row
if (self.SYNC_SUBSTEP_MEAN_CW and h_new != h_new_orig) or self.EO("sync_mean_noise"):
if not self.EO("disable_sync_mean_noise"):
x_row_down = x_0 + h_new_orig * zr
x_[row_tmp_offset] = x_[row_tmp_offset] - x_[row_tmp_offset].mean(dim=(-2,-1), keepdim=True) + x_row_down.mean(dim=(-2,-1), keepdim=True)
return x_
def zum2(self, row:int, k:Tensor, k_prev:Tensor=None, h_new:Tensor=None, sigma:Tensor=None) -> Tensor:
if row < self.rows:
return self.a_k_einsum2(row, k, h_new, sigma)
else:
row = row - self.rows
return self.b_k_einsum2(row, k, h_new, sigma)
def a_k_einsum2(self, row:int, k:Tensor, h:Tensor, sigma:Tensor) -> Tensor:
return torch.einsum('i,j,k,i... -> ...', self.A[row], h.unsqueeze(0), -sigma.unsqueeze(0), k[:self.cols])
def b_k_einsum2(self, row:int, k:Tensor, h:Tensor, sigma:Tensor) -> Tensor:
return torch.einsum('i,j,k,i... -> ...', self.B[row], h.unsqueeze(0), -sigma.unsqueeze(0), k[:self.cols])
def a_k_einsum(self, row:int, k :Tensor) -> Tensor:
return torch.einsum('i, i... -> ...', self.A[row], k[:self.cols])
def b_k_einsum(self, row:int, k :Tensor) -> Tensor:
return torch.einsum('i, i... -> ...', self.B[row], k[:self.cols])
def u_k_einsum(self, row:int, k_prev:Tensor) -> Tensor:
return torch.einsum('i, i... -> ...', self.U[row], k_prev[:self.cols]) if (self.U is not None and k_prev is not None) else 0
def v_k_einsum(self, row:int, k_prev:Tensor) -> Tensor:
return torch.einsum('i, i... -> ...', self.V[row], k_prev[:self.cols]) if (self.V is not None and k_prev is not None) else 0
def zum(self, row:int, k:Tensor, k_prev:Tensor=None,) -> Tensor:
if row < self.rows:
return self.a_k_einsum(row, k) + self.u_k_einsum(row, k_prev)
else:
row = row - self.rows
return self.b_k_einsum(row, k) + self.v_k_einsum(row, k_prev)
def zum_tableau(self, k:Tensor, k_prev:Tensor=None,) -> Tensor:
a_k_sum = torch.einsum('ij, j... -> i...', self.A, k[:self.cols])
u_k_sum = torch.einsum('ij, j... -> i...', self.U, k_prev[:self.cols]) if (self.U is not None and k_prev is not None) else 0
return a_k_sum + u_k_sum
def get_x(self, data:Tensor, noise:Tensor, sigma:Tensor):
if self.VE_MODEL:
return data + sigma * noise
else:
return (self.sigma_max - sigma) * data + sigma * noise
def init_cfg_channelwise(self, x:Tensor, cfg_cw:float=1.0, **extra_args) -> Dict[str, Any]:
self.uncond = [torch.full_like(x, 0.0)]
self.cfg_cw = cfg_cw
if cfg_cw != 1.0:
def post_cfg_function(args):
self.uncond[0] = args["uncond_denoised"]
return args["denoised"]
model_options = extra_args.get("model_options", {}).copy()
extra_args["model_options"] = comfy.model_patcher.set_model_options_post_cfg_function(model_options, post_cfg_function, disable_cfg1_optimization=True)
return extra_args
def calc_cfg_channelwise(self, denoised:Tensor) -> Tensor:
if self.cfg_cw != 1.0:
avg = 0
for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])):
avg += torch.norm(denoised[b][c] - self.uncond[0][b][c])
avg /= denoised.shape[1]
for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])):
ratio = torch.nan_to_num(torch.norm(denoised[b][c] - self.uncond[0][b][c]) / avg, 0)
denoised_new = self.uncond[0] + ratio * self.cfg_cw * (denoised - self.uncond[0])
return denoised_new
else:
return denoised
@staticmethod
def calculate_res_2m_step(
x_0 : Tensor,
denoised_ : Tensor,
sigma_down : Tensor,
sigmas : Tensor,
step : int,
) -> Tuple[Tensor, Tensor]:
if denoised_[2].sum() == 0:
return None, None
sigma = sigmas[step]
sigma_prev = sigmas[step-1]
h_prev = -torch.log(sigma/sigma_prev)
h = -torch.log(sigma_down/sigma)
c1 = 0
c2 = (-h_prev / h).item()
ci = [c1,c2]
φ = Phi(h, ci, analytic_solution=True)
b2 = φ(2)/c2
b1 = φ(1) - b2
eps_2 = denoised_[1] - x_0
eps_1 = denoised_[0] - x_0
h_a_k_sum = h * (b1 * eps_1 + b2 * eps_2)
x = torch.exp(-h) * x_0 + h_a_k_sum
denoised = x_0 + (sigma / (sigma - sigma_down)) * h_a_k_sum
return x, denoised
@staticmethod
def calculate_res_3m_step(
x_0 : Tensor,
denoised_ : Tensor,
sigma_down : Tensor,
sigmas : Tensor,
step : int,
) -> Tuple[Tensor, Tensor]:
if denoised_[3].sum() == 0:
return None, None
sigma = sigmas[step]
sigma_prev = sigmas[step-1]
sigma_prev2 = sigmas[step-2]
h = -torch.log(sigma_down/sigma)
h_prev = -torch.log(sigma/sigma_prev)
h_prev2 = -torch.log(sigma/sigma_prev2)
c1 = 0
c2 = (-h_prev / h).item()
c3 = (-h_prev2 / h).item()
ci = [c1,c2,c3]
φ = Phi(h, ci, analytic_solution=True)
gamma = (3*(c3**3) - 2*c3) / (c2*(2 - 3*c2))
b3 = (1 / (gamma * c2 + c3)) * φ(2, -h)
b2 = gamma * b3
b1 = φ(1, -h) - b2 - b3
eps_3 = denoised_[2] - x_0
eps_2 = denoised_[1] - x_0
eps_1 = denoised_[0] - x_0
h_a_k_sum = h * (b1 * eps_1 + b2 * eps_2 + b3 * eps_3)
x = torch.exp(-h) * x_0 + h_a_k_sum
denoised = x_0 + (sigma / (sigma - sigma_down)) * h_a_k_sum
return x, denoised
def swap_rk_type_at_step_or_threshold(self,
x_0 : Tensor,
data_prev_ : Tensor,
NS,
sigmas : Tensor,
step : Tensor,
rk_swap_step : int,
rk_swap_threshold : float,
rk_swap_type : str,
rk_swap_print : bool,
) -> str:
if rk_swap_type == "":
if self.EXPONENTIAL:
rk_swap_type = "res_3m"
else:
rk_swap_type = "deis_3m"
if step > rk_swap_step and self.rk_type != rk_swap_type:
RESplain("Switching rk_type to:", rk_swap_type)
self.rk_type = rk_swap_type
if RK_Method_Beta.is_exponential(rk_swap_type):
self.__class__ = RK_Method_Exponential
else:
self.__class__ = RK_Method_Linear
if rk_swap_type in get_implicit_sampler_name_list(nameOnly=True):
self.IMPLICIT = True
self.row_offset = 0
NS.row_offset = 0
else:
self.IMPLICIT = False
self.row_offset = 1
NS.row_offset = 1
NS.h_fn = self.h_fn
NS.t_fn = self.t_fn
NS.sigma_fn = self.sigma_fn
if step > 2 and sigmas[step+1] > 0 and self.rk_type != rk_swap_type and rk_swap_threshold > 0:
x_res_2m, denoised_res_2m = self.calculate_res_2m_step(x_0, data_prev_, NS.sigma_down, sigmas, step)
x_res_3m, denoised_res_3m = self.calculate_res_3m_step(x_0, data_prev_, NS.sigma_down, sigmas, step)
if denoised_res_2m is not None:
if rk_swap_print:
RESplain("res_3m - res_2m:", torch.norm(denoised_res_3m - denoised_res_2m).item())
if rk_swap_threshold > torch.norm(denoised_res_2m - denoised_res_3m):
RESplain("Switching rk_type to:", rk_swap_type, "at step:", step)
self.rk_type = rk_swap_type
if RK_Method_Beta.is_exponential(rk_swap_type):
self.__class__ = RK_Method_Exponential
else:
self.__class__ = RK_Method_Linear
if rk_swap_type in get_implicit_sampler_name_list(nameOnly=True):
self.IMPLICIT = True
self.row_offset = 0
NS.row_offset = 0
else:
self.IMPLICIT = False
self.row_offset = 1
NS.row_offset = 1
NS.h_fn = self.h_fn
NS.t_fn = self.t_fn
NS.sigma_fn = self.sigma_fn
return self.rk_type
def bong_iter(self,
x_0 : Tensor,
x_ : Tensor,
eps_ : Tensor,
eps_prev_ : Tensor,
data_ : Tensor,
sigma : Tensor,
s_ : Tensor,
row : int,
row_offset: int,
h : Tensor,
step : int,
step_sched: int,
BONGMATH_Y : bool = False,
y0_bongflow : Optional[Tensor] = None,
noise_sync: Optional[Tensor] = None,
eps_x_ : Optional[Tensor] = None,
eps_y_ : Optional[Tensor] = None,
#eps_x2y_ : Optional[Tensor] = None,
data_x_ : Optional[Tensor] = None,
data_y_ : Optional[Tensor] = None,
#yt_ : Optional[Tensor] = None,
#yt_0 : Optional[Tensor] = None,
LG = None,
) -> Tuple[Tensor, Tensor, Tensor]:
if x_0.ndim == 4:
norm_dim = (-2,-1)
elif x_0.ndim == 5:
norm_dim = (-4,-2,-1)
if BONGMATH_Y:
lgw_mask_, lgw_mask_inv_ = LG.get_masks_for_step(step_sched)
lgw_mask_sync_, lgw_mask_sync_inv_ = LG.get_masks_for_step(step_sched, lgw_type="sync")
weight_mask = lgw_mask_+lgw_mask_inv_
if LG.SYNC_SEPARATE:
sync_mask = lgw_mask_sync_+lgw_mask_sync_inv_
else:
sync_mask = 1.
if self.EO("bong_start_step", 0) > step or step > self.EO("bong_stop_step", 10000) or (self.unsample_bongmath == False and s_[-1] > s_[0]):
return x_0, x_, eps_
bong_iter_max_row = self.rows - row_offset
if self.EO("bong_iter_max_row_full"):
bong_iter_max_row = self.rows
if self.EO("bong_iter_lock_x_0_ch_means"):
x_0_ch_means = x_0.mean(dim=norm_dim, keepdim=True)
if self.EO("bong_iter_lock_x_row_ch_means"):
x_row_means = []
for rr in range(row+row_offset):
x_row_mean = x_[rr].mean(dim=norm_dim, keepdim=True)
x_row_means.append(x_row_mean)
if row < bong_iter_max_row and self.multistep_stages == 0:
bong_strength = self.EO("bong_strength", 1.0)
if bong_strength != 1.0:
x_0_tmp = x_0 .clone()
x_tmp_ = x_ .clone()
eps_tmp_ = eps_.clone()
for i in range(100): #bongmath for eps_prev_ not implemented?
x_0 = x_[row+row_offset] - h * self.zum(row+row_offset, eps_, eps_prev_)
if self.EO("bong_iter_lock_x_0_ch_means"):
x_0 = x_0 - x_0.mean(dim=norm_dim, keepdim=True) + x_0_ch_means
for rr in range(row+row_offset):
x_[rr] = x_0 + h * self.zum(rr, eps_, eps_prev_)
if self.EO("bong_iter_lock_x_row_ch_means"):
for rr in range(row+row_offset):
x_[rr] = x_[rr] - x_[rr].mean(dim=norm_dim, keepdim=True) + x_row_means[rr]
for rr in range(row+row_offset):
if self.EO("zonkytar"):
#eps_[rr] = self.get_unsample_epsilon(x_[rr], x_0, data_[rr], sigma, s_[rr])
eps_[rr] = self.get_epsilon(x_[rr], x_0, data_[rr], sigma, s_[rr])
else:
if BONGMATH_Y and not self.EO("disable_bongmath_y"):
if self.EXPONENTIAL:
eps_x_ = data_x_ - x_0
eps_x2y_ = data_y_ - x_0
if self.VE_MODEL:
eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (-eps_y_+sigma*(-noise_sync))
if self.EO("sync_x2y"):
eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (-eps_x2y_+sigma*(-noise_sync))
else:
eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (-eps_y_+sigma*(y0_bongflow-noise_sync))
if self.EO("sync_x2y"):
eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (-eps_x2y_+sigma*(y0_bongflow-noise_sync))
else:
eps_x_ [:s_.shape[0]] = (x_[:s_.shape[0]] - data_x_[:s_.shape[0]]) / s_.view(-1,1,1,1,1) # or should it be vs x_0???
eps_x2y_ = torch.zeros_like(eps_x_)
eps_x2y_[:s_.shape[0]] = (x_[:s_.shape[0]] - data_y_[:s_.shape[0]]) / s_.view(-1,1,1,1,1) # or should it be vs x_0???
if self.VE_MODEL:
eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (noise_sync-eps_y_)
if self.EO("sync_x2y"):
eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (noise_sync-eps_x2y_)
else:
eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (noise_sync-eps_y_-y0_bongflow)
if self.EO("sync_x2y"):
eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (noise_sync-eps_x2y_-y0_bongflow)
else:
eps_[rr] = self.get_epsilon(x_0, x_[rr], data_[rr], sigma, s_[rr])
if bong_strength != 1.0:
x_0 = x_0_tmp + bong_strength * (x_0 - x_0_tmp)
x_ = x_tmp_ + bong_strength * (x_ - x_tmp_)
eps_ = eps_tmp_ + bong_strength * (eps_ - eps_tmp_)
return x_0, x_, eps_ #, yt_0, yt_
def newton_iter(self,
x_0 : Tensor,
x_ : Tensor,
eps_ : Tensor,
eps_prev_ : Tensor,
data_ : Tensor,
s_ : Tensor,
row : int,
h : Tensor,
sigmas : Tensor,
step : int,
newton_name: str,
SYNC_GUIDE_ACTIVE: bool,
) -> Tuple[Tensor, Tensor]:
if SYNC_GUIDE_ACTIVE:
return x_, eps_
newton_iter_name = "newton_iter_" + newton_name
default_anchor_x_all = False
if newton_name == "lying":
default_anchor_x_all = True
newton_iter = self.EO(newton_iter_name, 100)
newton_iter_skip_last_steps = self.EO(newton_iter_name + "_skip_last_steps", 0)
newton_iter_mixing_rate = self.EO(newton_iter_name + "_mixing_rate", 1.0)
newton_iter_anchor = self.EO(newton_iter_name + "_anchor", 0)
newton_iter_anchor_x_all = self.EO(newton_iter_name + "_anchor_x_all", default_anchor_x_all)
newton_iter_type = self.EO(newton_iter_name + "_type", "from_epsilon")
newton_iter_sequence = self.EO(newton_iter_name + "_sequence", "double")
row_b_offset = 0
if self.EO(newton_iter_name + "_include_row_b"):
row_b_offset = 1
if step >= len(sigmas)-1-newton_iter_skip_last_steps or sigmas[step+1] == 0 or not self.IMPLICIT:
return x_, eps_
sigma = sigmas[step]
start, stop = 0, self.rows+row_b_offset
if newton_name == "pre":
start = row
elif newton_name == "post":
start = row + 1
if newton_iter_anchor >= 0:
eps_anchor = eps_[newton_iter_anchor].clone()
if newton_iter_anchor_x_all:
x_orig_ = x_.clone()
for n_iter in range(newton_iter):
for r in range(start, stop):
if newton_iter_anchor >= 0:
eps_[newton_iter_anchor] = eps_anchor.clone()
if newton_iter_anchor_x_all:
x_ = x_orig_.clone()
x_tmp, eps_tmp = x_[r].clone(), eps_[r].clone()
seq_start, seq_stop = r, r+1
if newton_iter_sequence == "double":
seq_start, seq_stop = start, stop
for r_ in range(seq_start, seq_stop):
x_[r_] = x_0 + h * self.zum(r_, eps_, eps_prev_)
for r_ in range(seq_start, seq_stop):
if newton_iter_type == "from_data":
data_[r_] = get_data_from_step(x_0, x_[r_], sigma, s_[r_])
eps_ [r_] = self.get_epsilon(x_0, x_[r_], data_[r_], sigma, s_[r_])
elif newton_iter_type == "from_step":
eps_ [r_] = get_epsilon_from_step(x_0, x_[r_], sigma, s_[r_])
elif newton_iter_type == "from_alt":
eps_ [r_] = x_0/sigma - x_[r_]/s_[r_]
elif newton_iter_type == "from_epsilon":
eps_ [r_] = self.get_epsilon(x_0, x_[r_], data_[r_], sigma, s_[r_])
if self.EO(newton_iter_name + "_opt"):
opt_timing, opt_type, opt_subtype = self.EO(newton_iter_name+"_opt", [str])
opt_start, opt_stop = 0, self.rows+row_b_offset
if opt_timing == "early":
opt_stop = row + 1
elif opt_timing == "late":
opt_start = row + 1
for r2 in range(opt_start, opt_stop):
if r_ != r2:
if opt_subtype == "a":
eps_a = eps_[r2]
eps_b = eps_[r_]
elif opt_subtype == "b":
eps_a = eps_[r_]
eps_b = eps_[r2]
if opt_type == "ortho":
eps_ [r_] = get_orthogonal(eps_a, eps_b)
elif opt_type == "collin":
eps_ [r_] = get_collinear (eps_a, eps_b)
elif opt_type == "proj":
eps_ [r_] = get_collinear (eps_a, eps_b) + get_orthogonal(eps_b, eps_a)
x_ [r_] = x_tmp + newton_iter_mixing_rate * (x_ [r_] - x_tmp)
eps_[r_] = eps_tmp + newton_iter_mixing_rate * (eps_[r_] - eps_tmp)
if newton_iter_sequence == "double":
break
return x_, eps_
class RK_Method_Exponential(RK_Method_Beta):
def __init__(self,
model,
rk_type : str,
VE_MODEL : bool,
noise_anchor : float,
noise_boost_normalize : bool,
model_device : str = 'cuda',
work_device : str = 'cpu',
dtype : torch.dtype = torch.float64,
extra_options : str = "",
):
super().__init__(model,
rk_type,
VE_MODEL,
noise_anchor,
noise_boost_normalize,
model_device = model_device,
work_device = work_device,
dtype = dtype,
extra_options = extra_options,
)
@staticmethod
def alpha_fn(neg_h:Tensor) -> Tensor:
return torch.exp(neg_h)
@staticmethod
def sigma_fn(t:Tensor) -> Tensor:
#return 1/(torch.exp(-t)+1)
return t.neg().exp()
@staticmethod
def t_fn(sigma:Tensor) -> Tensor:
#return -torch.log((1.-sigma)/sigma)
return sigma.log().neg()
@staticmethod
def h_fn(sigma_down:Tensor, sigma:Tensor) -> Tensor:
#return (-torch.log((1.-sigma_down)/sigma_down)) - (-torch.log((1.-sigma)/sigma))
return -torch.log(sigma_down/sigma)
def __call__(self,
x : Tensor,
sub_sigma : Tensor,
x_0 : Optional[Tensor] = None,
sigma : Optional[Tensor] = None,
transformer_options : Optional[dict] = None,
) -> Tuple[Tensor, Tensor]:
x_0 = x if x_0 is None else x_0
sigma = sub_sigma if sigma is None else sigma
if transformer_options is not None:
self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options)
denoised = self.model_denoised(x.to(self.model_device), sub_sigma.to(self.model_device), **self.extra_args).to(sigma.device)
eps_anchored = (x_0 - denoised) / sigma
eps_unmoored = (x - denoised) / sub_sigma
eps = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored)
denoised = x_0 - sigma * eps
epsilon = denoised - x_0
#epsilon = denoised - x
if self.EO("exp2lin_override"):
epsilon = (x_0 - denoised) / sigma
return epsilon, denoised
def get_eps(self, *args):
if len(args) == 3:
x, denoised, sigma = args
return denoised - x
elif len(args) == 5:
x_0, x, denoised, sigma, sub_sigma = args
eps_anchored = (x_0 - denoised) / sigma
eps_unmoored = (x - denoised) / sub_sigma
eps = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored)
denoised = x_0 - sigma * eps
eps_out = denoised - x_0
if self.EO("exp2lin_override"):
eps_out = (x_0 - denoised) / sigma
return eps_out
else:
raise ValueError(f"get_eps expected 3 or 5 arguments, got {len(args)}")
def get_epsilon(self,
x_0 : Tensor,
x : Tensor,
denoised : Tensor,
sigma : Tensor,
sub_sigma : Tensor,
) -> Tensor:
eps_anchored = (x_0 - denoised) / sigma
eps_unmoored = (x - denoised) / sub_sigma
eps = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored)
denoised = x_0 - sigma * eps
if self.EO("exp2lin_override"):
return (x_0 - denoised) / sigma
else:
return denoised - x_0
def get_epsilon_anchored(self, x_0:Tensor, denoised:Tensor, sigma:Tensor) -> Tensor:
return denoised - x_0
def get_guide_epsilon(self,
x_0 : Tensor,
x : Tensor,
y : Tensor,
sigma : Tensor,
sigma_cur : Tensor,
sigma_down : Optional[Tensor] = None,
epsilon_scale : Optional[Tensor] = None,
) -> Tensor:
sigma_cur = epsilon_scale if epsilon_scale is not None else sigma_cur
if sigma_down > sigma:
eps_unmoored = (sigma_cur/(self.sigma_max - sigma_cur)) * (x - y)
else:
eps_unmoored = y - x
if self.EO("manually_anchor_unsampler"):
if sigma_down > sigma:
eps_anchored = (sigma /(self.sigma_max - sigma)) * (x_0 - y)
else:
eps_anchored = y - x_0
eps_guide = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored)
else:
eps_guide = eps_unmoored
return eps_guide
class RK_Method_Linear(RK_Method_Beta):
def __init__(self,
model,
rk_type : str,
VE_MODEL : bool,
noise_anchor : float,
noise_boost_normalize : bool,
model_device : str = 'cuda',
work_device : str = 'cpu',
dtype : torch.dtype = torch.float64,
extra_options : str = "",
):
super().__init__(model,
rk_type,
VE_MODEL,
noise_anchor,
noise_boost_normalize,
model_device = model_device,
work_device = work_device,
dtype = dtype,
extra_options = extra_options,
)
@staticmethod
def alpha_fn(neg_h:Tensor) -> Tensor:
return torch.ones_like(neg_h)
@staticmethod
def sigma_fn(t:Tensor) -> Tensor:
return t
@staticmethod
def t_fn(sigma:Tensor) -> Tensor:
return sigma
@staticmethod
def h_fn(sigma_down:Tensor, sigma:Tensor) -> Tensor:
return sigma_down - sigma
def __call__(self,
x : Tensor,
sub_sigma : Tensor,
x_0 : Optional[Tensor] = None,
sigma : Optional[Tensor] = None,
transformer_options : Optional[dict] = None,
) -> Tuple[Tensor, Tensor]:
x_0 = x if x_0 is None else x_0
sigma = sub_sigma if sigma is None else sigma
if transformer_options is not None:
self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options)
denoised = self.model_denoised(x.to(self.model_device), sub_sigma.to(self.model_device), **self.extra_args).to(sigma.device)
epsilon_anchor = (x_0 - denoised) / sigma
epsilon_unmoored = (x - denoised) / sub_sigma
epsilon = epsilon_unmoored + self.LINEAR_ANCHOR_X_0 * (epsilon_anchor - epsilon_unmoored)
return epsilon, denoised
def get_eps(self, *args):
if len(args) == 3:
x, denoised, sigma = args
return (x - denoised) / sigma
elif len(args == 5):
x_0, x, denoised, sigma, sub_sigma = args
eps_anchor = (x_0 - denoised) / sigma
eps_unmoored = (x - denoised) / sub_sigma
return eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchor - eps_unmoored)
else:
raise ValueError(f"get_eps expected 3 or 5 arguments, got {len(args)}")
def get_epsilon(self,
x_0 : Tensor,
x : Tensor,
denoised : Tensor,
sigma : Tensor,
sub_sigma : Tensor,
) -> Tensor:
eps_anchor = (x_0 - denoised) / sigma
eps_unmoored = (x - denoised) / sub_sigma
return eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchor - eps_unmoored)
def get_epsilon_anchored(self, x_0:Tensor, denoised:Tensor, sigma:Tensor) -> Tensor:
return (x_0 - denoised) / sigma
def get_guide_epsilon(self,
x_0 : Tensor,
x : Tensor,
y : Tensor,
sigma : Tensor,
sigma_cur : Tensor,
sigma_down : Optional[Tensor] = None,
epsilon_scale : Optional[Tensor] = None,
) -> Tensor:
if sigma_down > sigma:
sigma_ratio = self.sigma_max - sigma_cur.clone()
else:
sigma_ratio = sigma_cur.clone()
sigma_ratio = epsilon_scale if epsilon_scale is not None else sigma_ratio
if sigma_down is None:
return (x - y) / sigma_ratio
else:
if sigma_down > sigma:
return (y - x) / sigma_ratio
else:
return (x - y) / sigma_ratio
"""
if EO("bong2m") and RK.multistep_stages > 0 and step < len(sigmas)-4:
h_no_eta = -torch.log(sigmas[step+1]/sigmas[step])
h_prev1_no_eta = -torch.log(sigmas[step] /sigmas[step-1])
c2_prev = (-h_prev1_no_eta / h_no_eta).item()
eps_prev = denoised_data_prev - x_0
φ = Phi(h_prev, [0.,c2_prev])
a2_1 = c2_prev * φ(1,2)
for i in range(100):
x_prev = x_0 - h_prev * (a2_1 * eps_prev)
eps_prev = denoised_data_prev - x_prev
eps_[1] = eps_prev
if EO("bong3m") and RK.multistep_stages > 0 and step < len(sigmas)-10:
h_no_eta = -torch.log(sigmas[step+1]/sigmas[step])
h_prev1_no_eta = -torch.log(sigmas[step] /sigmas[step-1])
h_prev2_no_eta = -torch.log(sigmas[step] /sigmas[step-2])
c2_prev = (-h_prev1_no_eta / h_no_eta).item()
c3_prev = (-h_prev2_no_eta / h_no_eta).item()
eps_prev2 = denoised_data_prev2 - x_0
eps_prev = denoised_data_prev - x_0
φ = Phi(h_prev1_no_eta, [0.,c2_prev, c3_prev])
a2_1 = c2_prev * φ(1,2)
for i in range(100):
x_prev = x_0 - h_prev1_no_eta * (a2_1 * eps_prev)
eps_prev = denoised_data_prev2 - x_prev
eps_[1] = eps_prev
φ = Phi(h_prev2_no_eta, [0.,c3_prev, c3_prev])
def calculate_gamma(c2_prev, c3_prev):
return (3*(c3_prev**3) - 2*c3_prev) / (c2_prev*(2 - 3*c2_prev))
gamma = calculate_gamma(c2_prev, c3_prev)
a2_1 = c2_prev * φ(1,2)
a3_2 = gamma * c2_prev * φ(2,2) + (c3_prev ** 2 / c2_prev) * φ(2, 3)
a3_1 = c3_prev * φ(1,3) - a3_2
for i in range(100):
x_prev2 = x_0 - h_prev2_no_eta * (a3_1 * eps_prev + a3_2 * eps_prev2)
x_prev = x_prev2 + h_prev2_no_eta * (a2_1 * eps_prev)
eps_prev2 = denoised_data_prev - x_prev2
eps_prev = denoised_data_prev2 - x_prev
eps_[2] = eps_prev2
"""
================================================
FILE: beta/rk_noise_sampler_beta.py
================================================
import torch
from torch import Tensor
from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar
if TYPE_CHECKING:
from .rk_method_beta import RK_Method_Exponential, RK_Method_Linear
import comfy.model_patcher
import comfy.supported_models
from .noise_classes import NOISE_GENERATOR_CLASSES, NOISE_GENERATOR_CLASSES_SIMPLE
from .constants import MAX_STEPS
from ..helper import ExtraOptions, has_nested_attr
from ..latents import normalize_zscore, get_orthogonal, get_collinear
from ..res4lyf import RESplain
NOISE_MODE_NAMES = ["none",
#"hard_sq",
"hard",
"lorentzian",
"soft",
"soft-linear",
"softer",
"eps",
"sinusoidal",
"exp",
"vpsde",
"er4",
"hard_var",
]
def get_data_from_step(x, x_next, sigma, sigma_next): # assumes 100% linear trajectory
h = sigma_next - sigma
return (sigma_next * x - sigma * x_next) / h
def get_epsilon_from_step(x, x_next, sigma, sigma_next):
h = sigma_next - sigma
return (x - x_next) / h
class RK_NoiseSampler:
def __init__(self,
RK : Union["RK_Method_Exponential", "RK_Method_Linear"],
model,
step : int=0,
device : str='cuda',
dtype : torch.dtype=torch.float64,
extra_options : str=""
):
self.device = device
self.dtype = dtype
self.model = model
if has_nested_attr(model, "inner_model.inner_model.model_sampling"):
model_sampling = model.inner_model.inner_model.model_sampling
elif has_nested_attr(model, "model.model_sampling"):
model_sampling = model.model.model_sampling
self.sigma_max = model_sampling.sigma_max.to(dtype=self.dtype, device=self.device)
self.sigma_min = model_sampling.sigma_min.to(dtype=self.dtype, device=self.device)
self.sigma_fn = RK.sigma_fn
self.t_fn = RK.t_fn
self.h_fn = RK.h_fn
self.row_offset = 1 if not RK.IMPLICIT else 0
self.step = step
self.noise_sampler = None
self.noise_sampler2 = None
self.noise_mode_sde = None
self.noise_mode_sde_substep = None
self.LOCK_H_SCALE = True
self.CONST = isinstance(model_sampling, comfy.model_sampling.CONST)
self.VARIANCE_PRESERVING = isinstance(model_sampling, comfy.model_sampling.CONST)
self.extra_options = extra_options
self.EO = ExtraOptions(extra_options)
self.DOWN_SUBSTEP = self.EO("down_substep")
self.DOWN_STEP = self.EO("down_step")
self.init_noise = None
def init_noise_samplers(self,
x : Tensor,
noise_seed : int,
noise_seed_substep : int,
noise_sampler_type : str,
noise_sampler_type2 : str,
noise_mode_sde : str,
noise_mode_sde_substep : str,
overshoot_mode : str,
overshoot_mode_substep : str,
noise_boost_step : float,
noise_boost_substep : float,
alpha : float,
alpha2 : float,
k : float = 1.0,
k2 : float = 1.0,
scale : float = 0.1,
scale2 : float = 0.1,
last_rng = None,
last_rng_substep = None,
) -> None:
self.noise_sampler_type = noise_sampler_type
self.noise_sampler_type2 = noise_sampler_type2
self.noise_mode_sde = noise_mode_sde
self.noise_mode_sde_substep = noise_mode_sde_substep
self.overshoot_mode = overshoot_mode
self.overshoot_mode_substep = overshoot_mode_substep
self.noise_boost_step = noise_boost_step
self.noise_boost_substep = noise_boost_substep
self.s_in = x.new_ones([1], dtype=self.dtype, device=self.device)
if noise_seed < 0 and last_rng is None:
seed = torch.initial_seed()+1
RESplain("SDE noise seed: ", seed, " (set via torch.initial_seed()+1)", debug=True)
if noise_seed < 0 and last_rng is not None:
seed = torch.initial_seed()
RESplain("SDE noise seed: ", seed, " (set via torch.initial_seed())", debug=True)
else:
seed = noise_seed
RESplain("SDE noise seed: ", seed, debug=True)
#seed2 = seed + MAX_STEPS #for substep noise generation. offset needed to ensure seeds are not reused
if noise_sampler_type == "fractal":
self.noise_sampler = NOISE_GENERATOR_CLASSES.get(noise_sampler_type )(x=x, seed=seed, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
self.noise_sampler.alpha = alpha
self.noise_sampler.k = k
self.noise_sampler.scale = scale
if noise_sampler_type2 == "fractal":
self.noise_sampler2 = NOISE_GENERATOR_CLASSES.get(noise_sampler_type2)(x=x, seed=noise_seed_substep, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
self.noise_sampler2.alpha = alpha2
self.noise_sampler2.k = k2
self.noise_sampler2.scale = scale2
else:
self.noise_sampler = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_sampler_type )(x=x, seed=seed, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
self.noise_sampler2 = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_sampler_type2)(x=x, seed=noise_seed_substep, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
if last_rng is not None:
self.noise_sampler .generator.set_state(last_rng)
self.noise_sampler2.generator.set_state(last_rng_substep)
def set_substep_list(self, RK:Union["RK_Method_Exponential", "RK_Method_Linear"]) -> None:
self.multistep_stages = RK.multistep_stages
self.rows = RK.rows
self.C = RK.C
self.s_ = self.sigma_fn(self.t_fn(self.sigma) + self.h * self.C)
def get_substep_list(self, RK:Union["RK_Method_Exponential", "RK_Method_Linear"], sigma, h) -> None:
s_ = RK.sigma_fn(RK.t_fn(sigma) + h * RK.C)
return s_
def get_sde_coeff(self, sigma_next:Tensor, sigma_down:Tensor=None, sigma_up:Tensor=None, eta:float=0.0, VP_OVERRIDE=None) -> Tuple[Tensor,Tensor,Tensor]:
VARIANCE_PRESERVING = VP_OVERRIDE if VP_OVERRIDE is not None else self.VARIANCE_PRESERVING
if VARIANCE_PRESERVING:
if sigma_down is not None:
alpha_ratio = (1 - sigma_next) / (1 - sigma_down)
sigma_up = (sigma_next ** 2 - sigma_down ** 2 * alpha_ratio ** 2) ** 0.5
elif sigma_up is not None:
if sigma_up >= sigma_next:
RESplain("Maximum VPSDE noise level exceeded: falling back to hard noise mode.", debug=True)
if eta >= 1:
sigma_up = sigma_next * 0.9999 #avoid sqrt(neg_num) later
else:
sigma_up = sigma_next * eta
if VP_OVERRIDE is not None:
sigma_signal = 1 - sigma_next
else:
sigma_signal = self.sigma_max - sigma_next
sigma_residual = (sigma_next ** 2 - sigma_up ** 2) ** .5
alpha_ratio = sigma_signal + sigma_residual
sigma_down = sigma_residual / alpha_ratio
else:
alpha_ratio = torch.ones_like(sigma_next)
if sigma_down is not None:
sigma_up = (sigma_next ** 2 - sigma_down ** 2) ** .5 # not sure this is correct #TODO: CHECK THIS
elif sigma_up is not None:
sigma_down = (sigma_next ** 2 - sigma_up ** 2) ** .5
return alpha_ratio, sigma_down, sigma_up
def set_sde_step(self, sigma:Tensor, sigma_next:Tensor, eta:float, overshoot:float, s_noise:float) -> None:
self.sigma_0 = sigma
self.sigma_next = sigma_next
self.s_noise = s_noise
self.eta = eta
self.overshoot = overshoot
self.sigma_up_eta, self.sigma_eta, self.sigma_down_eta, self.alpha_ratio_eta \
= self.get_sde_step(sigma, sigma_next, eta, self.noise_mode_sde, self.DOWN_STEP, SUBSTEP=False)
self.sigma_up, self.sigma, self.sigma_down, self.alpha_ratio \
= self.get_sde_step(sigma, sigma_next, overshoot, self.overshoot_mode, self.DOWN_STEP, SUBSTEP=False)
self.h = self.h_fn(self.sigma_down, self.sigma)
self.h_no_eta = self.h_fn(self.sigma_next, self.sigma)
self.h = self.h + self.noise_boost_step * (self.h_no_eta - self.h)
def set_sde_substep(self,
row : int,
multistep_stages : int,
eta_substep : float,
overshoot_substep : float,
s_noise_substep : float,
full_iter : int = 0,
diag_iter : int = 0,
implicit_steps_full : int = 0,
implicit_steps_diag : int = 0
) -> None:
# start with stepsizes for no overshoot/noise addition/noise swapping
self.sub_sigma_up_eta = self.sub_sigma_up = 0.0
self.sub_sigma_eta = self.sub_sigma = self.s_[row]
self.sub_sigma_down_eta = self.sub_sigma_down = self.sub_sigma_next = self.s_[row+self.row_offset+multistep_stages]
self.sub_alpha_ratio_eta = self.sub_alpha_ratio = 1.0
self.s_noise_substep = s_noise_substep
self.eta_substep = eta_substep
self.overshoot_substep = overshoot_substep
if row < self.rows and self.s_[row+self.row_offset+multistep_stages] > 0:
if diag_iter > 0 and diag_iter == implicit_steps_diag and self.EO("implicit_substep_skip_final_eta"):
pass
elif diag_iter > 0 and self.EO("implicit_substep_only_first_eta"):
pass
elif full_iter > 0 and full_iter == implicit_steps_full and self.EO("implicit_step_skip_final_eta"):
pass
elif full_iter > 0 and self.EO("implicit_step_only_first_eta"):
pass
elif (full_iter > 0 or diag_iter > 0) and self.noise_sampler_type2 == "brownian":
pass # brownian noise does not increment its seed when generated, deactivate on implicit repeats to avoid burn
elif full_iter > 0 and self.EO("implicit_step_only_first_all_eta"):
self.sigma_down_eta = self.sigma_next
self.sigma_up_eta *= 0
self.alpha_ratio_eta /= self.alpha_ratio_eta
self.sigma_down = self.sigma_next
self.sigma_up *= 0
self.alpha_ratio /= self.alpha_ratio
self.h_new = self.h = self.h_no_eta
elif (row < self.rows-self.row_offset-multistep_stages or diag_iter < implicit_steps_diag) or self.EO("substep_eta_use_final"):
self.sub_sigma_up, self.sub_sigma, self.sub_sigma_down, self.sub_alpha_ratio = self.get_sde_substep(sigma = self.s_[row],
sigma_next = self.s_[row+self.row_offset+multistep_stages],
eta = overshoot_substep,
noise_mode_override = self.overshoot_mode_substep,
DOWN = self.DOWN_SUBSTEP)
self.sub_sigma_up_eta, self.sub_sigma_eta, self.sub_sigma_down_eta, self.sub_alpha_ratio_eta = self.get_sde_substep(sigma = self.s_[row],
sigma_next = self.s_[row+self.row_offset+multistep_stages],
eta = eta_substep,
noise_mode_override = self.noise_mode_sde_substep,
DOWN = self.DOWN_SUBSTEP)
if self.h_fn(self.sub_sigma_next, self.sigma) != 0:
self.h_new = self.h * self.h_fn(self.sub_sigma_down, self.sigma) / self.h_fn(self.sub_sigma_next, self.sigma)
self.h_eta = self.h * self.h_fn(self.sub_sigma_down_eta, self.sigma) / self.h_fn(self.sub_sigma_next, self.sigma)
self.h_new_orig = self.h_new.clone()
self.h_new = self.h_new + self.noise_boost_substep * (self.h - self.h_eta)
else:
self.h_new = self.h_eta = self.h
self.h_new_orig = self.h_new.clone()
def get_sde_substep(self,
sigma :Tensor,
sigma_next :Tensor,
eta :float = 0.0 ,
noise_mode_override :Optional[str] = None ,
DOWN :bool = False,
) -> Tuple[Tensor,Tensor,Tensor,Tensor]:
return self.get_sde_step(sigma=sigma, sigma_next=sigma_next, eta=eta, noise_mode_override=noise_mode_override, DOWN=DOWN, SUBSTEP=True,)
def get_sde_step(self,
sigma :Tensor,
sigma_next :Tensor,
eta :float = 0.0 ,
noise_mode_override :Optional[str] = None ,
DOWN :bool = False,
SUBSTEP :bool = False,
VP_OVERRIDE = None,
) -> Tuple[Tensor,Tensor,Tensor,Tensor]:
VARIANCE_PRESERVING = VP_OVERRIDE if VP_OVERRIDE is not None else self.VARIANCE_PRESERVING
if noise_mode_override is not None:
noise_mode = noise_mode_override
elif SUBSTEP:
noise_mode = self.noise_mode_sde_substep
else:
noise_mode = self.noise_mode_sde
if DOWN: #calculates noise level by first scaling sigma_down from sigma_next, instead of sigma_up from sigma_next
eta_fn = lambda eta_scale: 1-eta_scale
sud_fn = lambda sd: (sd, None)
else:
eta_fn = lambda eta_scale: eta_scale
sud_fn = lambda su: (None, su)
su, sd, sud = None, None, None
eta_ratio = None
sigma_base = sigma_next
sigmax = self.sigma_max if VP_OVERRIDE is None else 1
match noise_mode:
case "hard":
eta_ratio = eta
case "exp":
h = -(sigma_next/sigma).log()
eta_ratio = (1 - (-2*eta*h).exp())**.5
case "soft":
eta_ratio = 1-(1 - eta) + eta * ((sigma_next) / sigma)
case "softer":
eta_ratio = 1-torch.sqrt(1 - (eta**2 * (sigma**2 - sigma_next**2)) / sigma**2)
case "soft-linear":
eta_ratio = 1-eta * (sigma_next - sigma)
case "sinusoidal":
eta_ratio = eta * torch.sin(torch.pi * (sigma_next / sigmax)) ** 2
case "eps":
eta_ratio = eta * torch.sqrt((sigma_next/sigma) ** 2 * (sigma ** 2 - sigma_next ** 2) )
case "lorentzian":
eta_ratio = eta
alpha = 1 / ((sigma_next.to(sigma.dtype))**2 + 1)
sigma_base = ((1 - alpha) ** 0.5).to(sigma.dtype)
case "hard_var":
sigma_var = (-1 + torch.sqrt(1 + 4 * sigma)) / 2
if sigma_next > sigma_var:
eta_ratio = 0
sigma_base = sigma_next
else:
eta_ratio = eta
sigma_base = torch.sqrt((sigma - sigma_next).abs() + 1e-10)
case "hard_sq":
sigma_hat = sigma * (1 + eta)
su = (sigma_hat ** 2 - sigma ** 2) ** .5 #su
if VARIANCE_PRESERVING:
alpha_ratio, sd, su = self.get_sde_coeff(sigma_next, None, su, eta, VARIANCE_PRESERVING)
else:
sd = sigma_next
sigma = sigma_hat
alpha_ratio = torch.ones_like(sigma)
case "vpsde":
alpha_ratio, sd, su = self.get_vpsde_step_RF(sigma, sigma_next, eta)
case "er4":
#def noise_scaler(sigma):
# return sigma * ((sigma ** 0.3).exp() + 10.0)
noise_scaler = lambda sigma: sigma * ((sigma ** eta).exp() + 10.0)
alpha_ratio = noise_scaler(sigma_next) / noise_scaler(sigma)
sigma_up = (sigma_next ** 2 - sigma ** 2 * alpha_ratio ** 2) ** 0.5
eta_ratio = sigma_up / sigma_next
if eta_ratio is not None:
sud = sigma_base * eta_fn(eta_ratio)
alpha_ratio, sd, su = self.get_sde_coeff(sigma_next, *sud_fn(sud), eta, VARIANCE_PRESERVING)
su = torch.nan_to_num(su, 0.0)
sd = torch.nan_to_num(sd, float(sigma_next))
alpha_ratio = torch.nan_to_num(alpha_ratio, 1.0)
return su, sigma, sd, alpha_ratio
def get_vpsde_step_RF(self, sigma:Tensor, sigma_next:Tensor, eta:float) -> Tuple[Tensor,Tensor,Tensor]:
dt = sigma - sigma_next
sigma_up = eta * sigma * dt**0.5
alpha_ratio = 1 - dt * (eta**2/4) * (1 + sigma)
sigma_down = sigma_next - (eta/4)*sigma*(1-sigma)*(sigma - sigma_next)
return sigma_up, sigma_down, alpha_ratio
def linear_noise_init(self, y:Tensor, sigma_curr:Tensor, x_base:Optional[Tensor]=None, x_curr:Optional[Tensor]=None, mask:Optional[Tensor]=None) -> Tensor:
y_noised = (self.sigma_max - sigma_curr) * y + sigma_curr * self.init_noise
if x_curr is not None:
x_curr = x_curr + sigma_curr * (self.init_noise - y)
x_base = x_base + self.sigma * (self.init_noise - y)
return y_noised, x_base, x_curr
if mask is not None:
y_noised = mask * y_noised + (1-mask) * y
return y_noised
def linear_noise_step(self, y:Tensor, sigma_curr:Optional[Tensor]=None, x_base:Optional[Tensor]=None, x_curr:Optional[Tensor]=None, brownian_sigma:Optional[Tensor]=None, brownian_sigma_next:Optional[Tensor]=None, mask:Optional[Tensor]=None) -> Tensor:
if self.sigma_up_eta == 0 or self.sigma_next == 0:
return y, x_base, x_curr
sigma_curr = self.sub_sigma if sigma_curr is None else sigma_curr
brownian_sigma = sigma_curr if brownian_sigma is None else brownian_sigma
brownian_sigma_next = self.sigma_next.clone() if brownian_sigma_next is None else brownian_sigma_next
if brownian_sigma == brownian_sigma_next:
brownian_sigma_next *= 0.999
if brownian_sigma_next > brownian_sigma and not self.EO("disable_brownian_swap"): # should this really be done?
brownian_sigma, brownian_sigma_next = brownian_sigma_next, brownian_sigma
noise = self.noise_sampler(sigma=brownian_sigma, sigma_next=brownian_sigma_next)
noise = normalize_zscore(noise, channelwise=True, inplace=True)
y_noised = (self.sigma_max - sigma_curr) * y + sigma_curr * noise
if x_curr is not None:
x_curr = x_curr + sigma_curr * (noise - y)
x_base = x_base + self.sigma * (noise - y)
return y_noised, x_base, x_curr
if mask is not None:
y_noised = mask * y_noised + (1-mask) * y
return y_noised
def linear_noise_substep(self, y:Tensor, sigma_curr:Optional[Tensor]=None, x_base:Optional[Tensor]=None, x_curr:Optional[Tensor]=None, brownian_sigma:Optional[Tensor]=None, brownian_sigma_next:Optional[Tensor]=None, mask:Optional[Tensor]=None) -> Tensor:
if self.sub_sigma_up_eta == 0 or self.sub_sigma_next == 0:
return y, x_base, x_curr
sigma_curr = self.sub_sigma if sigma_curr is None else sigma_curr
brownian_sigma = sigma_curr if brownian_sigma is None else brownian_sigma
brownian_sigma_next = self.sub_sigma_next.clone() if brownian_sigma_next is None else brownian_sigma_next
if brownian_sigma == brownian_sigma_next:
brownian_sigma_next *= 0.999
if brownian_sigma_next > brownian_sigma and not self.EO("disable_brownian_swap"): # should this really be done?
brownian_sigma, brownian_sigma_next = brownian_sigma_next, brownian_sigma
noise = self.noise_sampler2(sigma=brownian_sigma, sigma_next=brownian_sigma_next)
noise = normalize_zscore(noise, channelwise=True, inplace=True)
y_noised = (self.sigma_max - sigma_curr) * y + sigma_curr * noise
if x_curr is not None:
x_curr = x_curr + sigma_curr * (noise - y)
x_base = x_base + self.sigma * (noise - y)
return y_noised, x_base, x_curr
if mask is not None:
y_noised = mask * y_noised + (1-mask) * y
return y_noised
def swap_noise_step(self, x_0:Tensor, x_next:Tensor, brownian_sigma:Optional[Tensor]=None, brownian_sigma_next:Optional[Tensor]=None, mask:Optional[Tensor]=None) -> Tensor:
if self.sigma_up_eta == 0 or self.sigma_next == 0:
return x_next
brownian_sigma = self.sigma.clone() if brownian_sigma is None else brownian_sigma
brownian_sigma_next = self.sigma_next.clone() if brownian_sigma_next is None else brownian_sigma_next
if brownian_sigma == brownian_sigma_next:
brownian_sigma_next *= 0.999
eps_next = (x_0 - x_next) / (self.sigma - self.sigma_next)
denoised_next = x_0 - self.sigma * eps_next
if brownian_sigma_next > brownian_sigma and not self.EO("disable_brownian_swap"): # should this really be done?
brownian_sigma, brownian_sigma_next = brownian_sigma_next, brownian_sigma
noise = self.noise_sampler(sigma=brownian_sigma, sigma_next=brownian_sigma_next)
noise = normalize_zscore(noise, channelwise=True, inplace=True)
x_noised = self.alpha_ratio_eta * (denoised_next + self.sigma_down_eta * eps_next) + self.sigma_up_eta * noise * self.s_noise
if mask is not None:
x = mask * x_noised + (1-mask) * x_next
else:
x = x_noised
return x
def swap_noise_substep(self, x_0:Tensor, x_next:Tensor, brownian_sigma:Optional[Tensor]=None, brownian_sigma_next:Optional[Tensor]=None, mask:Optional[Tensor]=None, guide:Optional[Tensor]=None) -> Tensor:
if self.sub_sigma_up_eta == 0 or self.sub_sigma_next == 0:
return x_next
brownian_sigma = self.sub_sigma.clone() if brownian_sigma is None else brownian_sigma
brownian_sigma_next = self.sub_sigma_next.clone() if brownian_sigma_next is None else brownian_sigma_next
if brownian_sigma == brownian_sigma_next:
brownian_sigma_next *= 0.999
eps_next = (x_0 - x_next) / (self.sigma - self.sub_sigma_next)
denoised_next = x_0 - self.sigma * eps_next
if brownian_sigma_next > brownian_sigma and not self.EO("disable_brownian_swap"): # should this really be done?
brownian_sigma, brownian_sigma_next = brownian_sigma_next, brownian_sigma
noise = self.noise_sampler2(sigma=brownian_sigma, sigma_next=brownian_sigma_next)
noise = normalize_zscore(noise, channelwise=True, inplace=True)
x_noised = self.sub_alpha_ratio_eta * (denoised_next + self.sub_sigma_down_eta * eps_next) + self.sub_sigma_up_eta * noise * self.s_noise_substep
if mask is not None:
x = mask * x_noised + (1-mask) * x_next
else:
x = x_noised
return x
def swap_noise_inv_substep(self, x_0:Tensor, x_next:Tensor, eta_substep:float, row:int, row_offset_multistep_stages:int, brownian_sigma:Optional[Tensor]=None, brownian_sigma_next:Optional[Tensor]=None, mask:Optional[Tensor]=None, guide:Optional[Tensor]=None) -> Tensor:
if self.sub_sigma_up_eta == 0 or self.sub_sigma_next == 0:
return x_next
brownian_sigma = self.sub_sigma.clone() if brownian_sigma is None else brownian_sigma
brownian_sigma_next = self.sub_sigma_next.clone() if brownian_sigma_next is None else brownian_sigma_next
if brownian_sigma == brownian_sigma_next:
brownian_sigma_next *= 0.999
eps_next = (x_0 - x_next) / ((1-self.sigma) - (1-self.sub_sigma_next))
denoised_next = x_0 - (1-self.sigma) * eps_next
if brownian_sigma_next > brownian_sigma and not self.EO("disable_brownian_swap"): # should this really be done?
brownian_sigma, brownian_sigma_next = brownian_sigma_next, brownian_sigma
noise = self.noise_sampler2(sigma=brownian_sigma, sigma_next=brownian_sigma_next)
noise = normalize_zscore(noise, channelwise=True, inplace=True)
sub_sigma_up, sub_sigma, sub_sigma_down, sub_alpha_ratio = self.get_sde_substep(sigma = 1-self.s_[row],
sigma_next = 1-self.s_[row_offset_multistep_stages],
eta = eta_substep,
noise_mode_override = self.noise_mode_sde_substep,
DOWN = self.DOWN_SUBSTEP)
x_noised = sub_alpha_ratio * (denoised_next + sub_sigma_down * eps_next) + sub_sigma_up * noise * self.s_noise_substep
if mask is not None:
x = mask * x_noised + (1-mask) * x_next
else:
x = x_noised
return x
def swap_noise(self,
x_0 :Tensor,
x_next :Tensor,
sigma_0 :Tensor,
sigma :Tensor,
sigma_next :Tensor,
sigma_down :Tensor,
sigma_up :Tensor,
alpha_ratio :Tensor,
s_noise :float,
SUBSTEP :bool = False,
brownian_sigma :Optional[Tensor] = None,
brownian_sigma_next :Optional[Tensor] = None,
) -> Tensor:
if sigma_up == 0:
return x_next
if brownian_sigma is None:
brownian_sigma = sigma.clone()
if brownian_sigma_next is None:
brownian_sigma_next = sigma_next.clone()
if sigma_next == 0:
return x_next
if brownian_sigma == brownian_sigma_next:
brownian_sigma_next *= 0.999
eps_next = (x_0 - x_next) / (sigma_0 - sigma_next)
denoised_next = x_0 - sigma_0 * eps_next
if brownian_sigma_next > brownian_sigma:
s_tmp = brownian_sigma
brownian_sigma = brownian_sigma_next
brownian_sigma_next = s_tmp
if not SUBSTEP:
noise = self.noise_sampler(sigma=brownian_sigma, sigma_next=brownian_sigma_next)
else:
noise = self.noise_sampler2(sigma=brownian_sigma, sigma_next=brownian_sigma_next)
noise = normalize_zscore(noise, channelwise=True, inplace=True)
x = alpha_ratio * (denoised_next + sigma_down * eps_next) + sigma_up * noise * s_noise
return x
# not used. WARNING: some parameters have a different order than swap_noise!
def add_noise_pre(self,
x_0 :Tensor,
x :Tensor,
sigma_up :Tensor,
sigma_0 :Tensor,
sigma :Tensor,
sigma_next :Tensor,
real_sigma_down :Tensor,
alpha_ratio :Tensor,
s_noise :float,
noise_mode :str,
SDE_NOISE_EXTERNAL :bool = False,
sde_noise_t :Optional[Tensor] = None,
SUBSTEP :bool = False,
) -> Tensor:
if not self.CONST and noise_mode == "hard_sq":
if self.LOCK_H_SCALE:
x = self.swap_noise(x_0 = x_0,
x = x,
sigma = sigma,
sigma_0 = sigma_0,
sigma_next = sigma_next,
real_sigma_down = real_sigma_down,
sigma_up = sigma_up,
alpha_ratio = alpha_ratio,
s_noise = s_noise,
SUBSTEP = SUBSTEP,
)
else:
x = self.add_noise( x = x,
sigma_up = sigma_up,
sigma = sigma,
sigma_next = sigma_next,
alpha_ratio = alpha_ratio,
s_noise = s_noise,
SDE_NOISE_EXTERNAL = SDE_NOISE_EXTERNAL,
sde_noise_t = sde_noise_t,
SUBSTEP = SUBSTEP,
)
return x
# only used for handle_tiled_etc_noise_steps() in rk_guide_func_beta.py
def add_noise_post(self,
x_0 :Tensor,
x :Tensor,
sigma_up :Tensor,
sigma_0 :Tensor,
sigma :Tensor,
sigma_next :Tensor,
real_sigma_down :Tensor,
alpha_ratio :Tensor,
s_noise :float,
noise_mode :str,
SDE_NOISE_EXTERNAL :bool = False,
sde_noise_t :Optional[Tensor] = None,
SUBSTEP :bool = False,
) -> Tensor:
if self.CONST or (not self.CONST and noise_mode != "hard_sq"):
if self.LOCK_H_SCALE:
x = self.swap_noise(x_0 = x_0,
x = x,
sigma = sigma,
sigma_0 = sigma_0,
sigma_next = sigma_next,
real_sigma_down = real_sigma_down,
sigma_up = sigma_up,
alpha_ratio = alpha_ratio,
s_noise = s_noise,
SUBSTEP = SUBSTEP,
)
else:
x = self.add_noise( x = x,
sigma_up = sigma_up,
sigma = sigma,
sigma_next = sigma_next,
alpha_ratio = alpha_ratio,
s_noise = s_noise,
SDE_NOISE_EXTERNAL = SDE_NOISE_EXTERNAL,
sde_noise_t = sde_noise_t,
SUBSTEP = SUBSTEP,
)
return x
def add_noise(self,
x :Tensor,
sigma_up :Tensor,
sigma :Tensor,
sigma_next :Tensor,
alpha_ratio :Tensor,
s_noise :float,
SDE_NOISE_EXTERNAL :bool = False,
sde_noise_t :Optional[Tensor] = None,
SUBSTEP :bool = False,
) -> Tensor:
if sigma_next > 0.0 and sigma_up > 0.0:
if sigma_next > sigma:
sigma, sigma_next = sigma_next, sigma
if sigma == sigma_next:
sigma_next = sigma * 0.9999
if not SUBSTEP:
noise = self.noise_sampler (sigma=sigma, sigma_next=sigma_next)
else:
noise = self.noise_sampler2(sigma=sigma, sigma_next=sigma_next)
#noise_ortho = get_orthogonal(noise, x)
#noise_ortho = noise_ortho / noise_ortho.std()model,
noise = normalize_zscore(noise, channelwise=True, inplace=True)
if SDE_NOISE_EXTERNAL:
noise = (1-s_noise) * noise + s_noise * sde_noise_t
x_next = alpha_ratio * x + noise * sigma_up * s_noise
return x_next
else:
return x
def sigma_from_to(self,
x_0 : Tensor,
x_down : Tensor,
sigma : Tensor,
sigma_down : Tensor,
sigma_next : Tensor) -> Tensor: #sigma, sigma_from, sigma_to
eps = (x_0 - x_down) / (sigma - sigma_down)
denoised = x_0 - sigma * eps
x_next = denoised + sigma_next * eps # VESDE vs VPSDE equiv.?
return x_next
def rebound_overshoot_step(self, x_0:Tensor, x:Tensor) -> Tensor:
eps = (x_0 - x) / (self.sigma - self.sigma_down)
denoised = x_0 - self.sigma * eps
x = denoised + self.sigma_next * eps
return x
def rebound_overshoot_substep(self, x_0:Tensor, x:Tensor) -> Tensor:
if self.sigma - self.sub_sigma_down > 0:
sub_eps = (x_0 - x) / (self.sigma - self.sub_sigma_down)
sub_denoised = x_0 - self.sigma * sub_eps
x = sub_denoised + self.sub_sigma_next * sub_eps
return x
def prepare_sigmas(self,
sigmas : Tensor,
sigmas_override : Tensor,
d_noise : float,
d_noise_start_step : int,
sampler_mode : str) -> Tuple[Tensor,bool]:
#SIGMA_MIN = torch.full_like(self.sigma_min, 0.00227896) if self.sigma_min < 0.00227896 else self.sigma_min # prevent black image with unsampling flux, which has a sigma_min of 0.0002
SIGMA_MIN = self.sigma_min #torch.full_like(self.sigma_min, max(0.01, self.sigma_min.item()))
if sigmas_override is not None:
sigmas = sigmas_override.clone().to(sigmas.device).to(sigmas.dtype)
if d_noise_start_step == 0:
sigmas = sigmas.clone() * d_noise
UNSAMPLE_FROM_ZERO = False
if sigmas[0] == 0.0: #remove padding used to prevent comfy from adding noise to the latent (for unsampling, etc.)
UNSAMPLE = True
if sigmas[-1] == 0.0:
UNSAMPLE_FROM_ZERO = True
#sigmas = sigmas[1:-1] # was cleaving off 1.0 at the end when restart looping
sigmas = sigmas[1:]
if sigmas[-1] == 0.0:
sigmas = sigmas[:-1]
else:
UNSAMPLE = False
if hasattr(self.model, "sigmas"):
self.model.sigmas = sigmas
if sampler_mode == "standard":
UNSAMPLE = False
consecutive_duplicate_mask = torch.cat((torch.tensor([True], device=sigmas.device), torch.diff(sigmas) != 0))
sigmas = sigmas[consecutive_duplicate_mask]
if sigmas[-1] == 0:
if sigmas[-2] < SIGMA_MIN:
sigmas[-2] = SIGMA_MIN
elif (sigmas[-2] - SIGMA_MIN).abs() > 1e-4:
sigmas = torch.cat((sigmas[:-1], SIGMA_MIN.unsqueeze(0), sigmas[-1:]))
elif UNSAMPLE_FROM_ZERO and not torch.isclose(sigmas[0], SIGMA_MIN):
sigmas = torch.cat([SIGMA_MIN.unsqueeze(0), sigmas])
self.sigmas = sigmas
self.UNSAMPLE = UNSAMPLE
self.d_noise = d_noise
self.sampler_mode = sampler_mode
return sigmas, UNSAMPLE
def extract_latent_swap_noise(self, x:Tensor, x_noise_swapped:Tensor, sigma:Tensor, old_noise:Tensor) -> Tensor:
return (x - x_noise_swapped) / sigma + old_noise
def update_latent_swap_noise(self, x:Tensor, sigma:Tensor, old_noise:Tensor, new_noise:Tensor) -> Tensor:
return x + sigma * (new_noise - old_noise)
================================================
FILE: beta/rk_sampler_beta.py
================================================
import torch
from torch import Tensor
import torch.nn.functional as F
from tqdm.auto import trange
import gc
from typing import Optional, Callable, Tuple, List, Dict, Any, Union
import math
import copy
from comfy.model_sampling import EPS
import comfy
from ..res4lyf import RESplain
from ..helper import ExtraOptions, FrameWeightsManager
from ..latents import lagrange_interpolation, get_collinear, get_orthogonal, get_cosine_similarity, get_pearson_similarity, get_slerp_weight_for_cossim, get_slerp_ratio, slerp_tensor, get_edge_mask, normalize_zscore, compute_slerp_ratio_for_target, find_slerp_ratio_grid
from ..style_transfer import apply_scattersort_spatial, apply_adain_spatial
from .rk_method_beta import RK_Method_Beta
from .rk_noise_sampler_beta import RK_NoiseSampler
from .rk_guide_func_beta import LatentGuide
from .phi_functions import Phi
from .constants import MAX_STEPS, GUIDE_MODE_NAMES_PSEUDOIMPLICIT
def init_implicit_sampling(
RK : RK_Method_Beta,
x_0 : Tensor,
x_ : Tensor,
eps_ : Tensor,
eps_prev_ : Tensor,
data_ : Tensor,
eps : Tensor,
denoised : Tensor,
denoised_prev2 : Tensor,
step : int,
sigmas : Tensor,
h : Tensor,
s_ : Tensor,
EO : ExtraOptions,
SYNC_GUIDE_ACTIVE,
):
sigma = sigmas[step]
if EO("implicit_skip_model_call_at_start") and denoised.sum() + eps.sum() != 0:
if denoised_prev2.sum() == 0:
eps_ [0] = eps.clone()
data_[0] = denoised.clone()
eps_ [0] = RK.get_epsilon_anchored(x_0, denoised, sigma)
else:
sratio = sigma - s_[0]
data_[0] = denoised + sratio * (denoised - denoised_prev2)
elif EO("implicit_full_skip_model_call_at_start") and denoised.sum() + eps.sum() != 0:
if denoised_prev2.sum() == 0:
eps_ [0] = eps.clone()
data_[0] = denoised.clone()
eps_ [0] = RK.get_epsilon_anchored(x_0, denoised, sigma)
else:
for r in range(RK.rows):
sratio = sigma - s_[r]
data_[r] = denoised + sratio * (denoised - denoised_prev2)
eps_ [r] = RK.get_epsilon_anchored(x_0, data_[r], s_[r])
elif EO("implicit_lagrange_skip_model_call_at_start") and denoised.sum() + eps.sum() != 0:
if denoised_prev2.sum() == 0:
eps_ [0] = eps.clone()
data_[0] = denoised.clone()
eps_ [0] = RK.get_epsilon_anchored(x_0, denoised, sigma)
else:
sigma_prev = sigmas[step-1]
h_prev = sigma - sigma_prev
w = h / h_prev
substeps_prev = len(RK.C[:-1])
for r in range(RK.rows):
sratio = sigma - s_[r]
data_[r] = lagrange_interpolation([0,1], [denoised_prev2, denoised], 1 + w*RK.C[r]).squeeze(0) + denoised_prev2 - denoised
eps_ [r] = RK.get_epsilon_anchored(x_0, data_[r], s_[r])
if EO("implicit_lagrange_skip_model_call_at_start_0_only"):
for r in range(RK.rows):
eps_ [r] = eps_ [0].clone() * s_[0] / s_[r]
data_[r] = denoised.clone()
elif EO("implicit_lagrange_init") and denoised.sum() + eps.sum() != 0:
sigma_prev = sigmas[step-1]
h_prev = sigma - sigma_prev
w = h / h_prev
substeps_prev = len(RK.C[:-1])
z_prev_ = eps_.clone()
for r in range (substeps_prev):
z_prev_[r] = h * RK.zum(r, eps_) # u,v not implemented for lagrange guess for implicit
zi_1 = lagrange_interpolation(RK.C[:-1], z_prev_[:substeps_prev], RK.C[0]).squeeze(0) # + x_prev - x_0"""
x_[0] = x_0 + zi_1
else:
eps_[0], data_[0] = RK(x_[0], sigma, x_0, sigma)
if not EO(("implicit_lagrange_init", "radaucycle", "implicit_full_skip_model_call_at_start", "implicit_lagrange_skip_model_call_at_start")):
for r in range(RK.rows):
eps_ [r] = eps_ [0].clone() * sigma / s_[r]
data_[r] = data_[0].clone()
x_, eps_ = RK.newton_iter(x_0, x_, eps_, eps_prev_, data_, s_, 0, h, sigmas, step, "init", SYNC_GUIDE_ACTIVE)
return x_, eps_, data_
@torch.no_grad()
def sample_rk_beta(
model,
x : Tensor,
sigmas : Tensor,
sigmas_override : Optional[Tensor] = None,
extra_args : Optional[Tensor] = None,
callback : Optional[Callable] = None,
disable : bool = None,
sampler_mode : str = "standard",
rk_type : str = "res_2m",
implicit_sampler_name : str = "use_explicit",
c1 : float = 0.0,
c2 : float = 0.5,
c3 : float = 1.0,
noise_sampler_type : str = "gaussian",
noise_sampler_type_substep : str = "gaussian",
noise_mode_sde : str = "hard",
noise_mode_sde_substep : str = "hard",
eta : float = 0.5,
eta_substep : float = 0.5,
noise_scaling_weight : float = 0.0,
noise_scaling_type : str = "sampler",
noise_scaling_mode : str = "linear",
noise_scaling_eta : float = 0.0,
noise_scaling_cycles : int = 1,
noise_scaling_weights : Optional[Tensor] = None,
noise_scaling_etas : Optional[Tensor] = None,
noise_boost_step : float = 0.0,
noise_boost_substep : float = 0.0,
noise_boost_normalize : bool = True,
noise_anchor : float = 1.0,
s_noise : float = 1.0,
s_noise_substep : float = 1.0,
d_noise : float = 1.0,
d_noise_start_step : int = 0,
d_noise_inv : float = 1.0,
d_noise_inv_start_step : int = 0,
alpha : float = -1.0,
alpha_substep : float = -1.0,
k : float = 1.0,
k_substep : float = 1.0,
momentum : float = 0.0,
overshoot_mode : str = "hard",
overshoot_mode_substep : str = "hard",
overshoot : float = 0.0,
overshoot_substep : float = 0.0,
implicit_type : str = "predictor-corrector",
implicit_type_substeps : str = "predictor-corrector",
implicit_steps_diag : int = 0,
implicit_steps_full : int = 0,
etas : Optional[Tensor] = None,
etas_substep : Optional[Tensor] = None,
s_noises : Optional[Tensor] = None,
s_noises_substep : Optional[Tensor] = None,
momentums : Optional[Tensor] = None,
regional_conditioning_weights : Optional[Tensor] = None,
regional_conditioning_floors : Optional[Tensor] = None,
narcissism_start_step : int = 0,
narcissism_end_step : int = 5,
LGW_MASK_RESCALE_MIN : bool = True,
guides : Optional[Tuple[Any, ...]] = None,
epsilon_scales : Optional[Tensor] = None,
frame_weights_mgr : Optional[FrameWeightsManager] = None,
sde_noise : list [Tensor] = [],
noise_seed : int = -1,
noise_initial : Optional[Tensor] = None,
image_initial : Optional[Tensor] = None,
cfgpp : float = 0.0,
cfg_cw : float = 1.0,
BONGMATH : bool = True,
unsample_bongmath = None,
state_info : Optional[dict[str, Any]] = None,
state_info_out : Optional[dict[str, Any]] = None,
rk_swap_type : str = "",
rk_swap_step : int = MAX_STEPS,
rk_swap_threshold : float = 0.0,
rk_swap_print : bool = False,
steps_to_run : int = -1,
start_at_step : int = -1,
tile_sizes : Optional[List[Tuple[int,int]]] = None,
flow_sync_eps : float = 0.0,
sde_mask : Optional[Tensor] = None,
batch_num : int = 0,
extra_options : str = "",
AttnMask = None,
RegContext = None,
RegParam = None,
AttnMask_neg = None,
RegContext_neg = None,
RegParam_neg = None,
):
if sampler_mode == "NULL":
return x
EO = ExtraOptions(extra_options)
default_dtype = EO("default_dtype", torch.float64)
extra_args = {} if extra_args is None else extra_args
model_device = model.inner_model.inner_model.device #x.device
work_device = 'cpu' if EO("work_device_cpu") else model_device
state_info = {} if state_info is None else state_info
state_info_out = {} if state_info_out is None else state_info_out
VE_MODEL = isinstance(model.inner_model.inner_model.model_sampling, EPS)
RENOISE = False
if 'raw_x' in state_info and sampler_mode in {"resample", "unsample"}:
if x.shape == state_info['raw_x'].shape:
x = state_info['raw_x'].to(work_device) #clone()
else:
denoised = comfy.utils.bislerp(state_info['denoised'], x.shape[-1], x.shape[-2])
x = denoised.to(x)
RENOISE = True
RESplain("Continuing from raw latent from previous sampler.", debug=False)
start_step = 0
if 'end_step' in state_info and (sampler_mode == "resample" or sampler_mode == "unsample"):
if state_info['completed'] != True and state_info['end_step'] != 0 and state_info['end_step'] != -1 and state_info['end_step'] < len(state_info['sigmas'])-1 : #incomplete run in previous sampler node
if state_info['sampler_mode'] in {"standard","resample"} and sampler_mode == "unsample" and sigmas[2] < sigmas[1]:
sigmas = torch.flip(state_info['sigmas'], dims=[0])
start_step = (len(sigmas)-1) - (state_info['end_step']) #-1) #removed -1 at the end here. correct?
if state_info['sampler_mode'] == "unsample" and sampler_mode == "resample" and sigmas[2] > sigmas[1]:
sigmas = torch.flip(state_info['sigmas'], dims=[0])
start_step = (len(sigmas)-1) - state_info['end_step'] #-1)
elif state_info['sampler_mode'] == "unsample" and sampler_mode == "resample":
start_step = 0
if state_info['sampler_mode'] in {"standard", "resample"} and sampler_mode == "resample":
start_step = state_info['end_step'] if state_info['end_step'] != -1 else 0
if start_step > 0:
sigmas = state_info['sigmas'].clone()
if sde_mask is not None:
from .rk_guide_func_beta import prepare_mask
sde_mask, _ = prepare_mask(x, sde_mask, LGW_MASK_RESCALE_MIN)
sde_mask = sde_mask.to(x.device).to(x.dtype)
x = x .to(dtype=default_dtype, device=work_device)
sigmas = sigmas.to(dtype=default_dtype, device=work_device)
c1 = EO("c1" , c1)
c2 = EO("c2" , c2)
c3 = EO("c3" , c3)
cfg_cw = EO("cfg_cw" , cfg_cw)
noise_seed = EO("noise_seed" , noise_seed)
noise_seed_substep = EO("noise_seed_substep" , noise_seed + MAX_STEPS)
pseudoimplicit_row_weights = EO("pseudoimplicit_row_weights" , [1. for _ in range(100)])
pseudoimplicit_step_weights = EO("pseudoimplicit_step_weights", [1. for _ in range(max(implicit_steps_diag, implicit_steps_full)+1)])
noise_scaling_cycles = EO("noise_scaling_cycles", 1)
noise_boost_step = EO("noise_boost_step", 0.0)
noise_boost_substep = EO("noise_boost_substep", 0.0)
# SETUP SAMPLER
if implicit_sampler_name not in ("use_explicit", "none"):
rk_type = implicit_sampler_name
RESplain("rk_type:", rk_type)
if implicit_sampler_name == "none":
implicit_steps_diag = implicit_steps_full = 0
RK = RK_Method_Beta.create(model, rk_type, VE_MODEL, noise_anchor, noise_boost_normalize, model_device=model_device, work_device=work_device, dtype=default_dtype, extra_options=extra_options)
RK.extra_args = RK.init_cfg_channelwise(x, cfg_cw, **extra_args)
RK.tile_sizes = tile_sizes
RK.extra_args['model_options']['transformer_options']['regional_conditioning_weight'] = 0.0
RK.extra_args['model_options']['transformer_options']['regional_conditioning_floor'] = 0.0
RK.unsample_bongmath = BONGMATH if unsample_bongmath is None else unsample_bongmath # allow turning off bongmath for unsampling with cycles
# SETUP SIGMAS
sigmas_orig = sigmas.clone()
NS = RK_NoiseSampler(RK, model, device=work_device, dtype=default_dtype, extra_options=extra_options)
sigmas, UNSAMPLE = NS.prepare_sigmas(sigmas, sigmas_override, d_noise, d_noise_start_step, sampler_mode)
if UNSAMPLE and sigmas_orig[0] == 0.0 and sigmas_orig[0] != sigmas[0] and sigmas[1] < sigmas[2]:
sigmas = torch.cat([torch.full_like(sigmas[0], 0.0).unsqueeze(0), sigmas])
if start_step == 0:
start_step = 1
else:
start_step -= 1
if sampler_mode in {"resample", "unsample"}:
state_info_sigma_next = state_info.get('sigma_next', -1)
state_info_start_step = (sigmas == state_info_sigma_next).nonzero().flatten()
if state_info_start_step.shape[0] > 0:
start_step = state_info_start_step.item()
start_step = start_at_step if start_at_step >= 0 else start_step
SDE_NOISE_EXTERNAL = False
if sde_noise is not None:
if len(sde_noise) > 0 and sigmas[1] > sigmas[2]:
SDE_NOISE_EXTERNAL = True
sigma_up_total = torch.zeros_like(sigmas[0])
for i in range(len(sde_noise)-1):
sigma_up_total += sigmas[i+1]
etas = torch.full_like(sigmas, eta / sigma_up_total)
if 'last_rng' in state_info and sampler_mode in {"resample", "unsample"}:
last_rng = state_info['last_rng'].clone()
last_rng_substep = state_info['last_rng_substep'].clone()
else:
last_rng = None
last_rng_substep = None
NS.init_noise_samplers(x, noise_seed, noise_seed_substep, noise_sampler_type, noise_sampler_type_substep, noise_mode_sde, noise_mode_sde_substep, \
overshoot_mode, overshoot_mode_substep, noise_boost_step, noise_boost_substep, alpha, alpha_substep, k, k_substep, \
last_rng=last_rng, last_rng_substep=last_rng_substep,)
data_ = None
eps_ = None
eps = torch.zeros_like(x, dtype=default_dtype, device=work_device)
denoised = torch.zeros_like(x, dtype=default_dtype, device=work_device)
denoised_prev = torch.zeros_like(x, dtype=default_dtype, device=work_device)
denoised_prev2 = torch.zeros_like(x, dtype=default_dtype, device=work_device)
x_ = None
eps_prev_ = None
denoised_data_prev = None
denoised_data_prev2 = None
h_prev = None
eps_y2x_ = None
eps_x2y_ = None
eps_y_ = None
eps_prev_y_ = None
data_y_ = None
yt_ = None
yt_0 = None
eps_yt_ = None
eps_x_ = None
data_y_ = None
data_x_ = None
z_ = None # for tracking residual noise for model scattersort/synchronized diffusion
y0_bongflow = state_info.get('y0_bongflow')
y0_bongflow_orig = state_info.get('y0_bongflow_orig')
noise_bongflow = state_info.get('noise_bongflow')
y0_standard_guide = state_info.get('y0_standard_guide')
y0_inv_standard_guide = state_info.get('y0_inv_standard_guide')
data_prev_y_ = state_info.get('data_prev_y_')
data_prev_x_ = state_info.get('data_prev_x_')
data_prev_x2y_ = state_info.get('data_prev_x2y_')
# BEGIN SAMPLING LOOP
num_steps = len(sigmas[start_step:])-2 if sigmas[-1] == 0 else len(sigmas[start_step:])-1
if steps_to_run >= 0:
current_steps = min(num_steps, steps_to_run)
num_steps = start_step + min(num_steps, steps_to_run)
else:
current_steps = num_steps
num_steps = start_step + num_steps
#current_steps = current_steps + 1 if sigmas[-1] == 0 and steps_to_run < 0 and UNSAMPLE else current_steps
INIT_SAMPLE_LOOP = True
step = start_step
sigma, sigma_next, data_prev_ = None, None, None
if (num_steps-1) == len(sigmas)-2 and sigmas[-1] == 0 and sigmas[-2] == NS.sigma_min:
progress_bar = trange(current_steps+1, disable=disable)
else:
progress_bar = trange(current_steps, disable=disable)
# SETUP GUIDES
LG = LatentGuide(model, sigmas, UNSAMPLE, VE_MODEL, LGW_MASK_RESCALE_MIN, extra_options, device=work_device, dtype=default_dtype, frame_weights_mgr=frame_weights_mgr)
guide_inversion_y0 = state_info.get('guide_inversion_y0')
guide_inversion_y0_inv = state_info.get('guide_inversion_y0_inv')
x = LG.init_guides(x, RK.IMPLICIT, guides, NS.noise_sampler, batch_num, sigmas[step], guide_inversion_y0, guide_inversion_y0_inv)
LG.y0 = y0_standard_guide if y0_standard_guide is not None else LG.y0
LG.y0_inv = y0_inv_standard_guide if y0_inv_standard_guide is not None else LG.y0_inv
if (LG.mask != 1.0).any() and ((LG.y0 == 0).all() or (LG.y0_inv == 0).all()) : # and not LG.guide_mode.startswith("flow"): # (LG.y0.sum() == 0 or LG.y0_inv.sum() == 0):
SKIP_PSEUDO = True
RESplain("skipping pseudo...")
if LG.y0 .sum() == 0:
SKIP_PSEUDO_Y = "y0"
elif LG.y0_inv.sum() == 0:
SKIP_PSEUDO_Y = "y0_inv"
else:
SKIP_PSEUDO = False
if guides is not None and guides.get('guide_mode', '') != "inversion" or sampler_mode != "unsample": #do not set denoised_prev to noise guide with inversion!
if LG.y0.sum() != 0 and LG.y0_inv.sum() != 0:
denoised_prev = LG.mask * LG.y0 + (1-LG.mask) * LG.y0_inv
elif LG.y0.sum() != 0:
denoised_prev = LG.y0
elif LG.y0_inv.sum() != 0:
denoised_prev = LG.y0_inv
data_cached = None
if EO("pseudo_mix_strength"):
orig_y0 = LG.y0.clone()
orig_y0_inv = LG.y0_inv.clone()
#gc.collect()
BASE_STARTED = False
INV_STARTED = False
FLOW_STARTED = False
FLOW_STOPPED = False
noise_xt, noise_yt = None, None
FLOW_RESUMED = False
if state_info.get('FLOW_STARTED', False) and not state_info.get('FLOW_STOPPED', False):
FLOW_RESUMED = True
y0 = state_info['y0'].to(work_device)
data_cached = state_info['data_cached'].to(work_device)
data_x_prev_ = state_info['data_x_prev_'].to(work_device)
if noise_initial is not None:
x_init = noise_initial.to(x)
RK.update_transformer_options({'x_init': x_init._copy() if hasattr(x_init, 'is_nested') and x_init.is_nested else x_init.clone()})
#progress_bar = trange(len(sigmas)-1-start_step, disable=disable)
#if EO("eps_adain") or EO("x_init_to_model"):
if AttnMask is not None:
RK.update_transformer_options({'AttnMask' : AttnMask})
RK.update_transformer_options({'RegContext': RegContext})
if AttnMask_neg is not None:
RK.update_transformer_options({'AttnMask_neg' : AttnMask_neg})
RK.update_transformer_options({'RegContext_neg': RegContext_neg})
if EO("y0_to_transformer_options"):
RK.update_transformer_options({'y0': LG.y0.clone()})
if EO("y0_inv_to_transformer_options"):
RK.update_transformer_options({'y0_inv': LG.y0_inv.clone()})
for block in model.inner_model.inner_model.diffusion_model.double_stream_blocks:
for attr in ["txt_q_cache", "txt_k_cache", "txt_v_cache", "img_q_cache", "img_k_cache", "img_v_cache"]:
if hasattr(block.block.attn1, attr):
delattr(block.block.attn1, attr)
for block in model.inner_model.inner_model.diffusion_model.single_stream_blocks:
block.block.attn1.EO = EO
for attr in ["txt_q_cache", "txt_k_cache", "txt_v_cache", "img_q_cache", "img_k_cache", "img_v_cache"]:
if hasattr(block.block.attn1, attr):
delattr(block.block.attn1, attr)
RK.update_transformer_options({'ExtraOptions': copy.deepcopy(EO)})
if EO("update_cross_attn"):
update_cross_attn = {
'src_llama_start': EO('src_llama_start', 0),
'src_llama_end': EO('src_llama_end', 0),
'src_t5_start': EO('src_t5_start', 0),
'src_t5_end': EO('src_t5_end', 0),
'tgt_llama_start': EO('tgt_llama_start', 0),
'tgt_llama_end': EO('tgt_llama_end', 0),
'tgt_t5_start': EO('tgt_t5_start', 0),
'tgt_t5_end': EO('tgt_t5_end', 0),
'skip_cross_attn': EO('skip_cross_attn', False),
'update_q': EO('update_q', False),
'update_k': EO('update_k', True),
'update_v': EO('update_v', True),
'lamb': EO('lamb', 0.01),
'erase': EO('erase', 10.0),
}
RK.update_transformer_options({'update_cross_attn': update_cross_attn})
else:
RK.update_transformer_options({'update_cross_attn': None})
if LG.HAS_LATENT_GUIDE_ADAIN:
RK.update_transformer_options({'blocks_adain_cache': []})
if LG.HAS_LATENT_GUIDE_ATTNINJ:
RK.update_transformer_options({'blocks_attninj_cache': []})
if LG.HAS_LATENT_GUIDE_STYLE_POS:
if LG.HAS_LATENT_GUIDE and y0_standard_guide is None:
y0_cache = LG.y0.clone().cpu()
RK.update_transformer_options({'y0_standard_guide': LG.y0})
sigmas_scheduled = sigmas.clone() # store for return in state_info_out
if EO("sigma_restarts"):
sigma_restarts = 1 + EO("sigma_restarts", 0)
sigmas = sigmas[step:num_steps+1].repeat(sigma_restarts)
step = 0
num_steps = 2 * sigma_restarts - 1
if RENOISE: # TODO: adapt for noise inversion somehow
if VE_MODEL:
x = x + sigmas[step] * NS.noise_sampler(sigma=sigmas[step], sigma_next=sigmas[step+1])
else:
x = (1 - sigmas[step]) * x + sigmas[step] * NS.noise_sampler(sigma=sigmas[step], sigma_next=sigmas[step+1])
LG.ADAIN_NOISE_MODE = ""
StyleMMDiT = None
if guides is not None:
RK.update_transformer_options({"freqsep_lowpass_method": guides.get("freqsep_lowpass_method")})
RK.update_transformer_options({"freqsep_sigma": guides.get("freqsep_sigma")})
RK.update_transformer_options({"freqsep_kernel_size": guides.get("freqsep_kernel_size")})
RK.update_transformer_options({"freqsep_inner_kernel_size": guides.get("freqsep_inner_kernel_size")})
RK.update_transformer_options({"freqsep_stride": guides.get("freqsep_stride")})
RK.update_transformer_options({"freqsep_lowpass_weight": guides.get("freqsep_lowpass_weight")})
RK.update_transformer_options({"freqsep_highpass_weight":guides.get("freqsep_highpass_weight")})
RK.update_transformer_options({"freqsep_mask": guides.get("freqsep_mask")})
StyleMMDiT = guides.get('StyleMMDiT')
if StyleMMDiT is not None:
StyleMMDiT.init_guides(model)
LG.ADAIN_NOISE_MODE = StyleMMDiT.noise_mode
if EO("mycoshock"):
StyleMMDiT.Retrojector = model.inner_model.inner_model.diffusion_model.Retrojector
image_initial_shock = StyleMMDiT.apply_data_shock(image_initial.to(x))
if VE_MODEL:
x = image_initial_shock.to(x) + sigmas[0] * noise_initial.to(x)
else:
x = (1 - sigmas[0]) * image_initial_shock.to(x) + sigmas[0] * noise_initial.to(x)
RK.update_transformer_options({"model_sampling": model.inner_model.inner_model.model_sampling})
# BEGIN SAMPLING LOOP
while step < num_steps:
sigma, sigma_next = sigmas[step], sigmas[step+1]
if sigma_next > sigma:
step_sched = torch.where(torch.flip(sigmas, dims=[0]) == sigma)[0][0].item()
else:
step_sched = step
SYNC_GUIDE_ACTIVE = LG.guide_mode.startswith("sync") and (LG.lgw[step_sched] != 0 or LG.lgw_inv[step_sched] != 0 or LG.lgw_sync[step_sched] != 0 or LG.lgw_sync_inv[step_sched] != 0)
if StyleMMDiT is not None:
RK.update_transformer_options({'StyleMMDiT': StyleMMDiT})
else:
if LG.HAS_LATENT_GUIDE_ADAIN:
if LG.lgw_adain[step_sched] == 0.0:
RK.update_transformer_options({'y0_adain': None})
RK.update_transformer_options({'blocks_adain': {}})
RK.update_transformer_options({'sort_and_scatter': {}})
else:
RK.update_transformer_options({'y0_adain': LG.y0_adain.clone()})
if 'blocks_adain_mmdit' in guides:
blocks_adain = {
"double_weights": [val * LG.lgw_adain[step_sched] for val in guides['blocks_adain_mmdit']['double_weights']],
"single_weights": [val * LG.lgw_adain[step_sched] for val in guides['blocks_adain_mmdit']['single_weights']],
"double_blocks" : guides['blocks_adain_mmdit']['double_blocks'],
"single_blocks" : guides['blocks_adain_mmdit']['single_blocks'],
}
RK.update_transformer_options({'blocks_adain': blocks_adain})
RK.update_transformer_options({'sort_and_scatter': guides['sort_and_scatter']})
RK.update_transformer_options({'noise_mode_adain': guides['sort_and_scatter']['noise_mode']})
if LG.HAS_LATENT_GUIDE_ATTNINJ:
if LG.lgw_attninj[step_sched] == 0.0:
RK.update_transformer_options({'y0_attninj': None})
RK.update_transformer_options({'blocks_attninj' : {}})
RK.update_transformer_options({'blocks_attninj_qkv': {}})
else:
RK.update_transformer_options({'y0_attninj': LG.y0_attninj.clone()})
if 'blocks_attninj_mmdit' in guides:
blocks_attninj = {
"double_weights": [val * LG.lgw_attninj[step_sched] for val in guides['blocks_attninj_mmdit']['double_weights']],
"single_weights": [val * LG.lgw_attninj[step_sched] for val in guides['blocks_attninj_mmdit']['single_weights']],
"double_blocks" : guides['blocks_attninj_mmdit']['double_blocks'],
"single_blocks" : guides['blocks_attninj_mmdit']['single_blocks'],
}
RK.update_transformer_options({'blocks_attninj' : blocks_attninj})
RK.update_transformer_options({'blocks_attninj_qkv': guides['blocks_attninj_qkv']})
if LG.HAS_LATENT_GUIDE_STYLE_POS:
if LG.lgw_style_pos[step_sched] == 0.0:
RK.update_transformer_options({'y0_style_pos': None})
RK.update_transformer_options({'y0_style_pos_weight': 0.0})
RK.update_transformer_options({'y0_style_pos_synweight': 0.0})
RK.update_transformer_options({'y0_style_pos_mask': None})
else:
RK.update_transformer_options({'y0_style_pos': LG.y0_style_pos.clone()})
RK.update_transformer_options({'y0_style_pos_weight': LG.lgw_style_pos[step_sched]})
RK.update_transformer_options({'y0_style_pos_synweight': guides['synweight_style_pos']})
RK.update_transformer_options({'y0_style_pos_mask': LG.mask_style_pos})
RK.update_transformer_options({'y0_style_pos_mask_edge': guides.get('mask_edge_style_pos')})
RK.update_transformer_options({'y0_style_method': guides['style_method']})
RK.update_transformer_options({'y0_style_tile_height': guides.get('style_tile_height')})
RK.update_transformer_options({'y0_style_tile_width': guides.get('style_tile_width')})
RK.update_transformer_options({'y0_style_tile_padding': guides.get('style_tile_padding')})
if EO("style_edge_width"):
RK.update_transformer
#if LG.HAS_LATENT_GUIDE:
# y0_cache = LG.y0.clone().cpu()
# RK.update_transformer_options({'y0_standard_guide': LG.y0})
if LG.HAS_LATENT_GUIDE_INV and y0_inv_standard_guide is None:
y0_inv_cache = LG.y0_inv.clone().cpu()
RK.update_transformer_options({'y0_inv_standard_guide': LG.y0_inv})
if LG.HAS_LATENT_GUIDE_STYLE_NEG:
if LG.lgw_style_neg[step_sched] == 0.0:
RK.update_transformer_options({'y0_style_neg': None})
RK.update_transformer_options({'y0_style_neg_weight': 0.0})
RK.update_transformer_options({'y0_style_neg_synweight': 0.0})
RK.update_transformer_options({'y0_style_neg_mask': None})
else:
RK.update_transformer_options({'y0_style_neg': LG.y0_style_neg.clone()})
RK.update_transformer_options({'y0_style_neg_weight': LG.lgw_style_neg[step_sched]})
RK.update_transformer_options({'y0_style_neg_synweight': guides['synweight_style_neg']})
RK.update_transformer_options({'y0_style_neg_mask': LG.mask_style_neg})
RK.update_transformer_options({'y0_style_neg_mask_edge': guides.get('mask_edge_style_neg')})
RK.update_transformer_options({'y0_style_method': guides['style_method']})
RK.update_transformer_options({'y0_style_tile_height': guides.get('style_tile_height')})
RK.update_transformer_options({'y0_style_tile_width': guides.get('style_tile_width')})
RK.update_transformer_options({'y0_style_tile_padding': guides.get('style_tile_padding')})
if AttnMask_neg is not None:
RK.update_transformer_options({'regional_conditioning_weight_neg': RegParam_neg.weights[step_sched]})
RK.update_transformer_options({'regional_conditioning_floor_neg': RegParam_neg.floors[step_sched]})
if AttnMask is not None:
RK.update_transformer_options({'regional_conditioning_weight': RegParam.weights[step_sched]})
RK.update_transformer_options({'regional_conditioning_floor': RegParam.floors[step_sched]})
elif regional_conditioning_weights is not None:
RK.extra_args['model_options']['transformer_options']['regional_conditioning_weight'] = regional_conditioning_weights[step_sched]
RK.extra_args['model_options']['transformer_options']['regional_conditioning_floor'] = regional_conditioning_floors [step_sched]
epsilon_scale = float(epsilon_scales [step_sched]) if epsilon_scales is not None else None
eta = etas [step_sched].to(x) if etas is not None else eta
eta_substep = etas_substep [step_sched].to(x) if etas_substep is not None else eta_substep
s_noise = s_noises [step_sched].to(x) if s_noises is not None else s_noise
s_noise_substep = s_noises_substep [step_sched].to(x) if s_noises_substep is not None else s_noise_substep
noise_scaling_eta = noise_scaling_etas [step_sched].to(x) if noise_scaling_etas is not None else noise_scaling_eta
noise_scaling_weight = noise_scaling_weights[step_sched].to(x) if noise_scaling_weights is not None else noise_scaling_weight
NS.set_sde_step(sigma, sigma_next, eta, overshoot, s_noise)
RK.set_coeff(rk_type, NS.h, c1, c2, c3, step, sigmas, NS.sigma_down)
NS.set_substep_list(RK)
if (noise_scaling_eta > 0 or noise_scaling_weight != 0) and noise_scaling_type != "model_d":
if noise_scaling_type == "model_alpha":
VP_OVERRIDE=True
else:
VP_OVERRIDE=None
if noise_scaling_type in {"sampler", "model", "model_alpha"}:
if noise_scaling_type == "model_alpha":
sigma_divisor = NS.sigma_max
else:
sigma_divisor = 1.0
if RK.multistep_stages > 0: # hardcoded s_[1] for multistep samplers, which are never multistage
lying_su, lying_sigma, lying_sd, lying_alpha_ratio = NS.get_sde_step(NS.s_[1]/sigma_divisor, NS.s_[0]/sigma_divisor, noise_scaling_eta, noise_scaling_mode, VP_OVERRIDE=VP_OVERRIDE)
else:
lying_su, lying_sigma, lying_sd, lying_alpha_ratio = NS.get_sde_step(sigma/sigma_divisor, NS.sigma_down/sigma_divisor, noise_scaling_eta, noise_scaling_mode, VP_OVERRIDE=VP_OVERRIDE)
for _ in range(noise_scaling_cycles-1):
lying_su, lying_sigma, lying_sd, lying_alpha_ratio = NS.get_sde_step(sigma/sigma_divisor, lying_sd/sigma_divisor, noise_scaling_eta, noise_scaling_mode, VP_OVERRIDE=VP_OVERRIDE)
lying_s_ = NS.get_substep_list(RK, sigma, RK.h_fn(lying_sd, lying_sigma))
lying_s_ = NS.s_ + noise_scaling_weight * (lying_s_ - NS.s_)
else:
lying_s_ = NS.s_.clone()
rk_swap_stages = 3 if rk_swap_type != "" else 0
data_prev_len = len(data_prev_)-1 if data_prev_ is not None else 3
recycled_stages = max(rk_swap_stages, RK.multistep_stages, RK.hybrid_stages, data_prev_len)
if INIT_SAMPLE_LOOP:
INIT_SAMPLE_LOOP = False
x_, data_, eps_, eps_prev_ = (torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) for _ in range(4))
if LG.ADAIN_NOISE_MODE == "smart":
z_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device)
z_[0] = noise_initial.clone()
RK.update_transformer_options({'z_' : z_})
if sampler_mode in {"unsample", "resample"}:
data_prev_ = state_info.get('data_prev_')
if data_prev_ is not None:
if x.shape == state_info['raw_x'].shape:
data_prev_ = state_info['data_prev_'].clone().to(dtype=default_dtype, device=work_device)
else:
data_prev_ = torch.stack([comfy.utils.bislerp(data_prev_item, x.shape[-1], x.shape[-2]) for data_prev_item in state_info['data_prev_']])
data_prev_ = data_prev_.to(x)
else:
data_prev_ = torch.zeros(4, *x.shape, dtype=default_dtype, device=work_device) # multistep max is 4m... so 4 needed
else:
data_prev_ = torch.zeros(4, *x.shape, dtype=default_dtype, device=work_device) # multistep max is 4m... so 4 needed
recycled_stages = len(data_prev_)-1
if RK.rows+2 > x_.shape[0]:
row_gap = RK.rows+2 - x_.shape[0]
x_gap_, data_gap_, eps_gap_, eps_prev_gap_ = (torch.zeros(row_gap, *x.shape, dtype=default_dtype, device=work_device) for _ in range(4))
x_ = torch.cat((x_ ,x_gap_) , dim=0)
data_ = torch.cat((data_ ,data_gap_) , dim=0)
eps_ = torch.cat((eps_ ,eps_gap_) , dim=0)
eps_prev_ = torch.cat((eps_prev_,eps_prev_gap_), dim=0)
if LG.ADAIN_NOISE_MODE == "smart":
z_gap_ = torch.zeros(row_gap, *x.shape, dtype=default_dtype, device=work_device)
z_ = torch.cat((z_ ,z_gap_) , dim=0)
RK.update_transformer_options({'z_' : z_})
sde_noise_t = None
if SDE_NOISE_EXTERNAL:
if step >= len(sde_noise):
SDE_NOISE_EXTERNAL=False
else:
sde_noise_t = sde_noise[step]
x_[0] = x.clone()
# PRENOISE METHOD HERE!
x_0 = x_[0].clone()
if EO("guide_step_cutoff") or EO("guide_step_min"):
x_0_orig = x_0.clone()
# RECYCLE STAGES FOR MULTISTEP
if RK.multistep_stages > 0 or RK.hybrid_stages > 0:
if SYNC_GUIDE_ACTIVE:
lgw_mask_, lgw_mask_inv_ = LG.get_masks_for_step(step)
lgw_mask_sync_, lgw_mask_sync_inv_ = LG.get_masks_for_step(step, lgw_type="sync")
weight_mask = lgw_mask_+lgw_mask_inv_
if LG.SYNC_SEPARATE:
sync_mask = lgw_mask_sync_+lgw_mask_sync_inv_
else:
sync_mask = 1.
if VE_MODEL:
yt_0 = y0_bongflow + sigma * noise_bongflow
else:
yt_0 = (1-sigma) * y0_bongflow + sigma * noise_bongflow
for ms in range(min(len(data_prev_), len(eps_))):
eps_x = RK.get_epsilon_anchored(x_0, data_prev_x_[ms], sigma)
eps_y = RK.get_epsilon_anchored(yt_0, data_prev_y_[ms], sigma)
eps_x2y = RK.get_epsilon_anchored(yt_0, data_prev_y_[ms], sigma)
if RK.EXPONENTIAL:
if VE_MODEL:
eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + sigma*(-noise_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + sigma*(-noise_bongflow))
else:
eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + sigma*(y0_bongflow-noise_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + sigma*(y0_bongflow-noise_bongflow))
else:
if VE_MODEL:
eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + (noise_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + (noise_bongflow))
else:
eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + (noise_bongflow-y0_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + (noise_bongflow-y0_bongflow))
#if RK.EXPONENTIAL:
# if VE_MODEL:
# eps_[ms] = sync_mask * weight_mask_inv * (eps_x - weight_mask * eps_y) + weight_mask * sigma*(-noise_bongflow)
# else:
# #eps_[ms] = (lgw_mask_sync_+lgw_mask_sync_inv_) * (1-(lgw_mask_+lgw_mask_inv_)) * (eps_x - (lgw_mask_+lgw_mask_inv_) * eps_y) + (lgw_mask_+lgw_mask_inv_) * sigma*(y0_bongflow-noise_bongflow)
# eps_[ms] = sync_mask * weight_mask_inv * (eps_x - weight_mask * eps_y) + weight_mask * sigma*(y0_bongflow-noise_bongflow)
#else:
# if VE_MODEL:
# eps_[ms] = sync_mask * weight_mask_inv * (eps_x - weight_mask * eps_y) + weight_mask * (noise_bongflow)
# else:
# #eps_[ms] = (lgw_mask_sync_+lgw_mask_sync_inv_) * (1-(lgw_mask_+lgw_mask_inv_)) * (eps_x - (lgw_mask_+lgw_mask_inv_) * eps_y) + (lgw_mask_+lgw_mask_inv_) * (noise_bongflow-y0_bongflow)
# eps_[ms] = sync_mask * weight_mask_inv * (eps_x - weight_mask * eps_y) + weight_mask * (noise_bongflow-y0_bongflow)
eps_prev_ = eps_.clone()
else:
for ms in range(min(len(data_prev_), len(eps_))):
eps_[ms] = RK.get_epsilon_anchored(x_0, data_prev_[ms], sigma)
eps_prev_ = eps_.clone()
# INITIALIZE IMPLICIT SAMPLING
if RK.IMPLICIT:
x_, eps_, data_ = init_implicit_sampling(RK, x_0, x_, eps_, eps_prev_, data_, eps, denoised, denoised_prev2, step, sigmas, NS.h, NS.s_, EO, SYNC_GUIDE_ACTIVE)
implicit_steps_total = (implicit_steps_full + 1) * (implicit_steps_diag + 1)
# BEGIN FULLY IMPLICIT LOOP
cossim_counter = 0
adaptive_lgw = LG.lgw.clone()
full_iter = 0
while full_iter < implicit_steps_full+1:
if RK.IMPLICIT:
x_, eps_ = RK.newton_iter(x_0, x_, eps_, eps_prev_, data_, NS.s_, 0, NS.h, sigmas, step, "init", SYNC_GUIDE_ACTIVE)
# PREPARE FULLY PSEUDOIMPLICIT GUIDES
if step > 0 or not SKIP_PSEUDO:
if full_iter > 0 and EO("fully_implicit_reupdate_x"):
x_[0] = NS.sigma_from_to(x_0, x, sigma, sigma_next, NS.s_[0])
x_0 = NS.sigma_from_to(x_0, x, sigma, sigma_next, sigma)
if EO("fully_pseudo_init") and full_iter == 0:
guide_mode_tmp = LG.guide_mode
LG.guide_mode = "fully_" + LG.guide_mode
x_0, x_, eps_ = LG.prepare_fully_pseudoimplicit_guides_substep(x_0, x_, eps_, eps_prev_, data_, denoised_prev, 0, step, step_sched, sigmas, eta_substep, overshoot_substep, s_noise_substep, \
NS, RK, pseudoimplicit_row_weights, pseudoimplicit_step_weights, full_iter, BONGMATH)
if EO("fully_pseudo_init") and full_iter == 0:
LG.guide_mode = guide_mode_tmp
# TABLEAU LOOP
for row in range(RK.rows - RK.multistep_stages - RK.row_offset + 1):
diag_iter = 0
while diag_iter < implicit_steps_diag+1:
if noise_sampler_type_substep == "brownian" and (full_iter > 0 or diag_iter > 0):
eta_substep = 0.
NS.set_sde_substep(row, RK.multistep_stages, eta_substep, overshoot_substep, s_noise_substep, full_iter, diag_iter, implicit_steps_full, implicit_steps_diag)
# PRENOISE METHOD HERE!
# A-TABLEAU
if row < RK.rows:
# PREPARE PSEUDOIMPLICIT GUIDES
if step > 0 or not SKIP_PSEUDO:
x_0, x_, eps_, x_row_pseudoimplicit, sub_sigma_pseudoimplicit = LG.process_pseudoimplicit_guides_substep(x_0, x_, eps_, eps_prev_, data_, denoised_prev, row, step, step_sched, sigmas, NS, RK, \
pseudoimplicit_row_weights, pseudoimplicit_step_weights, full_iter, BONGMATH)
# PREPARE MODEL CALL
if LG.guide_mode in GUIDE_MODE_NAMES_PSEUDOIMPLICIT and (step > 0 or not SKIP_PSEUDO) and (LG.lgw[step_sched] > 0 or LG.lgw_inv[step_sched] > 0) and x_row_pseudoimplicit is not None:
x_tmp = x_row_pseudoimplicit
s_tmp = sub_sigma_pseudoimplicit
# Fully implicit iteration (explicit only) # or... Fully implicit iteration (implicit only... not standard)
elif (full_iter > 0 and RK.row_offset == 1 and row == 0) or (full_iter > 0 and RK.row_offset == 0 and row == 0 and EO("fully_implicit_update_x")):
if EO("fully_explicit_pogostick_eta"):
super_alpha_ratio, super_sigma_down, super_sigma_up = NS.get_sde_coeff(sigma, sigma_next, None, eta)
x = super_alpha_ratio * x + super_sigma_up * NS.noise_sampler(sigma=sigma_next, sigma_next=sigma)
x_tmp = x
s_tmp = sigma
elif EO("enable_fully_explicit_lagrange_rebound1"):
substeps_prev = len(RK.C[:-1])
x_tmp = lagrange_interpolation(RK.C[1:-1], x_[1:substeps_prev], RK.C[0]).squeeze(0)
elif EO("enable_fully_explicit_lagrange_rebound2"):
substeps_prev = len(RK.C[:-1])
x_tmp = lagrange_interpolation(RK.C[1:], x_[1:substeps_prev+1], RK.C[0]).squeeze(0)
elif EO("enable_fully_explicit_rebound1"): # 17630, faded dots, just crap
eps_tmp, denoised_tmp = RK(x, sigma_next, x, sigma_next)
eps_tmp = (x - denoised_tmp) / sigma_next
x_[0] = denoised_tmp + sigma * eps_tmp
x_0 = x_[0]
x_tmp = x_[0]
s_tmp = sigma
elif implicit_type == "rebound": # TODO: ADAPT REBOUND IMPLICIT TO WORK WITH FLOW GUIDE MODE
eps_tmp, denoised_tmp = RK(x, sigma_next, x_0, sigma)
eps_tmp = (x - denoised_tmp) / sigma_next
x = denoised_tmp + sigma * eps_tmp
x_tmp = x
s_tmp = sigma
elif implicit_type == "retro-eta" and (NS.sub_sigma_up > 0 or NS.sub_sigma_up_eta > 0):
x_tmp = NS.sigma_from_to(x_0, x, sigma, sigma_next, sigma)
s_tmp = sigma
elif implicit_type == "bongmath" and (NS.sub_sigma_up > 0 or NS.sub_sigma_up_eta > 0):
if BONGMATH:
x_tmp = x_[row]
s_tmp = NS.s_[row]
else:
x_tmp = NS.sigma_from_to(x_0, x, sigma, sigma_next, sigma)
s_tmp = sigma
else:
x_tmp = x
s_tmp = sigma_next
# All others
else:
# three potential toggle options: force rebound/model call, force PC style, force pogostick style
if diag_iter > 0: # Diagonally implicit iteration (explicit or implicit)
if EO("diag_explicit_pogostick_eta"):
super_alpha_ratio, super_sigma_down, super_sigma_up = NS.get_sde_coeff(NS.s_[row], NS.s_[row+RK.row_offset+RK.multistep_stages], None, eta)
x_[row+RK.row_offset] = super_alpha_ratio * x_[row+RK.row_offset] + super_sigma_up * NS.noise_sampler(sigma=NS.s_[row+RK.row_offset+RK.multistep_stages], sigma_next=NS.s_[row])
x_tmp = x_[row+RK.row_offset]
s_tmp = sigma
elif implicit_type_substeps == "rebound":
eps_[row], data_[row] = RK(x_[row+RK.row_offset], NS.s_[row+RK.row_offset+RK.multistep_stages], x_0, sigma)
x_ = RK.update_substep(x_0, x_, eps_, eps_prev_, row, RK.row_offset, NS.h_new, NS.h_new_orig)
x_[row+RK.row_offset] = NS.rebound_overshoot_substep(x_0, x_[row+RK.row_offset])
x_[row+RK.row_offset] = NS.sigma_from_to(x_0, x_[row+RK.row_offset], sigma, NS.s_[row+RK.row_offset+RK.multistep_stages], NS.s_[row])
x_tmp = x_[row+RK.row_offset]
s_tmp = NS.s_[row]
elif implicit_type_substeps == "retro-eta" and (NS.sub_sigma_up > 0 or NS.sub_sigma_up_eta > 0):
x_tmp = NS.sigma_from_to(x_0, x_[row+RK.row_offset], sigma, NS.s_[row+RK.row_offset+RK.multistep_stages], NS.s_[row])
s_tmp = NS.s_[row]
elif implicit_type_substeps == "bongmath" and (NS.sub_sigma_up > 0 or NS.sub_sigma_up_eta > 0) and not EO("disable_diag_explicit_bongmath_rebound"):
if BONGMATH:
x_tmp = x_[row]
s_tmp = NS.s_[row]
else:
x_tmp = NS.sigma_from_to(x_0, x_[row+RK.row_offset], sigma, NS.s_[row+RK.row_offset+RK.multistep_stages], NS.s_[row])
s_tmp = NS.s_[row]
else:
x_tmp = x_[row+RK.row_offset]
s_tmp = NS.s_[row+RK.row_offset+RK.multistep_stages]
else:
x_tmp = x_[row]
s_tmp = NS.sub_sigma
if RK.IMPLICIT:
if not EO("disable_implicit_guide_preproc"):
eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step_sched, sigma, sigma_next, NS.sigma_down, NS.s_, epsilon_scale, RK)
eps_prev_, x_ = LG.process_guides_substep(x_0, x_, eps_prev_, data_, row, step_sched, sigma, sigma_next, NS.sigma_down, NS.s_, epsilon_scale, RK)
if row == 0 and (EO("implicit_lagrange_init") or EO("radaucycle")):
pass
else:
x_[row+RK.row_offset] = x_0 + NS.h_new * RK.zum(row+RK.row_offset, eps_, eps_prev_)
x_[row+RK.row_offset] = NS.rebound_overshoot_substep(x_0, x_[row+RK.row_offset])
if row > 0:
if not LG.guide_mode.startswith("flow") or (LG.lgw[step_sched] == 0 and LG.lgw[step+1] == 0 and LG.lgw_inv[step_sched] == 0 and LG.lgw_inv[step+1] == 0):
x_row_tmp = NS.swap_noise_substep(x_0, x_[row+RK.row_offset], mask=sde_mask, guide=LG.y0)
if LG.ADAIN_NOISE_MODE == "smart": #_smartnoise_implicit"):
data_next = denoised + NS.h_new * RK.zum(row+RK.row_offset+RK.multistep_stages, data_, data_prev_)
if VE_MODEL:
z_[row+RK.row_offset] = (x_row_tmp - data_next) / s_tmp
else:
z_[row+RK.row_offset] = (x_row_tmp - (NS.sigma_max-s_tmp)*data_next) / s_tmp
RK.update_transformer_options({'z_' : z_})
if SYNC_GUIDE_ACTIVE:
noise_bongflow_new = (x_row_tmp - x_[row+RK.row_offset]) / s_tmp + noise_bongflow
yt_[row+RK.row_offset] += s_tmp * (noise_bongflow_new - noise_bongflow)
x_0 += sigma * (noise_bongflow_new - noise_bongflow)
if not EO("disable_i_bong"):
for i_bong in range(len(NS.s_)):
x_[i_bong] += NS.s_[i_bong] * (noise_bongflow_new - noise_bongflow)
noise_bongflow = noise_bongflow_new
x_[row+RK.row_offset] = x_row_tmp
if SYNC_GUIDE_ACTIVE:
if VE_MODEL:
yt_[:NS.s_.shape[0], 0] = y0_bongflow + NS.s_.view(-1, *[1]*(x.ndim-1)) * (noise_bongflow)
yt_0 = y0_bongflow + sigma * (noise_bongflow)
else:
yt_[:NS.s_.shape[0], 0] = y0_bongflow + NS.s_.view(-1, *[1]*(x.ndim-1)) * (noise_bongflow - y0_bongflow)
yt_0 = y0_bongflow + sigma * (noise_bongflow - y0_bongflow)
if RK.EXPONENTIAL:
eps_y_ = data_y_ - yt_0 # yt_ # watch out for fuckery with size of tableau being smaller later in a chained sampler
else:
if BONGMATH:
eps_y_[:NS.s_.shape[0]] = (yt_[:NS.s_.shape[0]] - data_y_[:NS.s_.shape[0]]) / NS.s_.view(-1,*[1]*(x_.ndim-1))
else:
eps_y_[:NS.s_.shape[0]] = (yt_0.repeat(NS.s_.shape[0], *[1]*(x_.ndim-1)) - data_y_[:NS.s_.shape[0]]) / sigma # calc exact to c0 node
if not BONGMATH:
if RK.EXPONENTIAL:
eps_x_ = data_x_ - x_0
else:
eps_x_ = (x_0 - data_x_) / sigma
weight_mask = lgw_mask_+lgw_mask_inv_
if LG.SYNC_SEPARATE:
sync_mask = lgw_mask_sync_+lgw_mask_sync_inv_
else:
sync_mask = 1.
for ms in range(len(eps_)):
if RK.EXPONENTIAL:
if VE_MODEL: # ZERO IS THIS # ONE IS THIS
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + sigma*(-noise_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + sigma*(-noise_bongflow))
else:
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + sigma*(y0_bongflow-noise_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + sigma*(y0_bongflow-noise_bongflow))
else:
if VE_MODEL:
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + (noise_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + (noise_bongflow))
else:
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + (noise_bongflow-y0_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + (noise_bongflow-y0_bongflow))
if BONGMATH and step < sigmas.shape[0]-1 and sigma > 0.03 and not EO("disable_implicit_prebong"):
BONGMATH_Y = SYNC_GUIDE_ACTIVE
x_0, x_, eps_ = RK.bong_iter(x_0, x_, eps_, eps_prev_, data_, sigma, NS.s_, row, RK.row_offset, NS.h, step, step_sched,
BONGMATH_Y, y0_bongflow, noise_bongflow, eps_x_, eps_y_, data_x_, data_y_, LG) # TRY WITH h_new ??
# BONGMATH_Y, y0_bongflow, noise_bongflow, eps_x_, eps_y_, eps_x2y_, data_x_, LG) # TRY WITH h_new ??
#if EO("eps_adain_smartnoise_bongmath"):
if LG.ADAIN_NOISE_MODE == "smart":
if VE_MODEL:
z_[:NS.s_.shape[0], ...] = (x_ - data_)[:NS.s_.shape[0], ...] / NS.s_.view(-1,*[1]*(x_.ndim-1))
else:
z_[:NS.s_.shape[0], ...] = (x_[:NS.s_.shape[0], ...] - (NS.sigma_max - NS.s_.view(-1,*[1]*(x_.ndim-1)))*data_[:NS.s_.shape[0], ...])[:NS.s_.shape[0], ...] / NS.s_.view(-1,*[1]*(x_.ndim-1))
RK.update_transformer_options({'z_' : z_})
x_tmp = x_[row+RK.row_offset]
lying_eps_row_factor = 1.0
# MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL
if RK.IMPLICIT and row == 0 and (EO("implicit_lazy_recycle_first_model_call_at_start") or EO("radaucycle") or RK.C[0] == 0.0):
pass
else:
if s_tmp == 0:
break
x_, eps_ = RK.newton_iter(x_0, x_, eps_, eps_prev_, data_, NS.s_, row, NS.h, sigmas, step, "pre", SYNC_GUIDE_ACTIVE) # will this do anything? not x_tmp
# DETAIL BOOST
if noise_scaling_type == "model_alpha" and noise_scaling_weight != 0 and noise_scaling_eta > 0:
s_tmp = s_tmp + noise_scaling_weight * (s_tmp * lying_alpha_ratio - s_tmp)
if noise_scaling_type == "model" and noise_scaling_weight != 0 and noise_scaling_eta > 0:
s_tmp = lying_s_[row]
if RK.multistep_stages > 0:
s_tmp = lying_sd
# SYNC GUIDE ---------------------------
if LG.guide_mode.startswith("sync") and (LG.lgw[step_sched] == 0 and LG.lgw_inv[step_sched] == 0 and LG.lgw_sync[step_sched] == 0 and LG.lgw_sync_inv[step_sched] == 0):
data_cached = None
elif SYNC_GUIDE_ACTIVE:
lgw_mask_, lgw_mask_inv_ = LG.get_masks_for_step(step_sched)
lgw_mask_sync_, lgw_mask_sync_inv_ = LG.get_masks_for_step(step_sched, lgw_type="sync")
lgw_mask_drift_x_, lgw_mask_drift_x_inv_ = LG.get_masks_for_step(step_sched, lgw_type="drift_x")
lgw_mask_drift_y_, lgw_mask_drift_y_inv_ = LG.get_masks_for_step(step_sched, lgw_type="drift_y")
lgw_mask_lure_x_, lgw_mask_lure_x_inv_ = LG.get_masks_for_step(step_sched, lgw_type="lure_x")
lgw_mask_lure_y_, lgw_mask_lure_y_inv_ = LG.get_masks_for_step(step_sched, lgw_type="lure_y")
weight_mask = lgw_mask_ + lgw_mask_inv_
sync_mask = lgw_mask_sync_ + lgw_mask_sync_inv_
drift_x_mask = lgw_mask_drift_x_ + lgw_mask_drift_x_inv_
drift_y_mask = lgw_mask_drift_y_ + lgw_mask_drift_y_inv_
lure_x_mask = lgw_mask_lure_x_ + lgw_mask_lure_x_inv_
lure_y_mask = lgw_mask_lure_y_ + lgw_mask_lure_y_inv_
if eps_x_ is None:
eps_x_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device)
data_x_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device)
eps_y2x_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device)
eps_x2y_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device)
eps_yt_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device)
eps_y_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device)
eps_prev_y_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device)
data_y_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device)
yt_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device)
RUN_X_0_COPY = False
if noise_bongflow is None:
RUN_X_0_COPY = True
data_prev_x_ = torch.zeros(4, *x.shape, dtype=default_dtype, device=work_device)
data_prev_y_ = torch.zeros(4, *x.shape, dtype=default_dtype, device=work_device)
noise_bongflow = normalize_zscore(NS.noise_sampler(sigma=sigma, sigma_next=NS.sigma_min), channelwise=True, inplace=True)
_, _ = RK(noise_bongflow, s_tmp/s_tmp, noise_bongflow, sigma/sigma, transformer_options={'latent_type': 'xt'})
if RK.extra_args['model_options']['transformer_options'].get('y0_standard_guide') is not None:
if hasattr(model.inner_model.inner_model.diffusion_model, "y0_standard_guide"):
LG.y0 = y0_standard_guide = model.inner_model.inner_model.diffusion_model.y0_standard_guide.clone()
del model.inner_model.inner_model.diffusion_model.y0_standard_guide
RK.extra_args['model_options']['transformer_options']['y0_standard_guide'] = None
if RK.extra_args['model_options']['transformer_options'].get('y0_inv_standard_guide') is not None:
if hasattr(model.inner_model.inner_model.diffusion_model, "y0_inv_standard_guide"):
LG.y0_inv = y0_inv_standard_guide = model.inner_model.inner_model.diffusion_model.y0_inv_standard_guide.clone() # RK.extra_args['model_options']['transformer_options'].get('y0_standard_guide')
del model.inner_model.inner_model.diffusion_model.y0_inv_standard_guide
RK.extra_args['model_options']['transformer_options']['y0_inv_standard_guide'] = None
y0_bongflow = LG.HAS_LATENT_GUIDE * LG.mask * LG.y0 + LG.HAS_LATENT_GUIDE_INV * LG.mask_inv * LG.y0_inv #LG.y0.clone()
if VE_MODEL:
yt_0 = y0_bongflow + sigma * noise_bongflow
yt = y0_bongflow + s_tmp * noise_bongflow
else:
yt_0 = (1-sigma) * y0_bongflow + sigma * noise_bongflow
yt = (1-s_tmp) * y0_bongflow + s_tmp * noise_bongflow
yt_[row] = yt
if RUN_X_0_COPY:
x_0 = yt_0.clone()
x_tmp = x_[row] = yt.clone()
else:
y0_bongflow_orig = y0_bongflow.clone() if y0_bongflow_orig is None else y0_bongflow_orig
y0_bongflow = y0_bongflow + LG.drift_x_data * drift_x_mask * (data_x - y0_bongflow) \
+ LG.drift_x_sync * drift_x_mask * (data_barf - y0_bongflow) \
+ LG.drift_y_data * drift_y_mask * (data_y - y0_bongflow) \
+ LG.drift_y_sync * drift_y_mask * (data_barf_y - y0_bongflow) \
+ LG.drift_y_guide * drift_y_mask * (y0_bongflow_orig - y0_bongflow)
if torch.norm(y0_bongflow_orig - y0_bongflow) != 0 and EO("enable_y0_bongflow_update"):
RK.update_transformer_options({'y0_style_pos': y0_bongflow.clone()})
if not EO("skip_yt"):
yt_0 = RK.get_x(y0_bongflow, noise_bongflow, sigma)
yt = RK.get_x(y0_bongflow, noise_bongflow, s_tmp)
yt_[row] = yt
if ((LG.lgw[step_sched].item() in {1,0} and LG.lgw_inv[step_sched].item() in {1,0} and LG.lgw[step_sched] == 1-LG.lgw_sync[step_sched] and LG.lgw_inv[step_sched] == 1-LG.lgw_sync_inv[step_sched]) or EO("sync_speed_mode")) and not EO("disable_sync_speed_mode"):
data_y = y0_bongflow.clone()
eps_y = RK.get_eps(yt_0, yt_[row], data_y, sigma, s_tmp)
else:
eps_y, data_y = RK(yt_[row], s_tmp, yt_0, sigma, transformer_options={'latent_type': 'yt'})
eps_x, data_x = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'xt', 'row': row, "x_tmp": x_tmp})
#if hasattr(model.inner_model.inner_model.diffusion_model, "eps_out"):
for sync_lure_iter in range(LG.sync_lure_iter):
if LG.sync_lure_sequence == "x -> y":
if lure_x_mask.abs().sum() > 0:
x_tmp = LG.swap_data(x_tmp, data_x, data_y, s_tmp, lure_x_mask)
eps_x_lure, data_x_lure = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'xt'})
eps_x = eps_x + lure_x_mask * (eps_x_lure - eps_x)
data_x = data_x + lure_x_mask * (data_x_lure - data_x)
if lure_y_mask.abs().sum() > 0:
y_tmp = yt_[row].clone()
y_tmp = LG.swap_data(y_tmp, data_y, data_x, s_tmp, lure_y_mask)
eps_y_lure, data_y_lure = RK(y_tmp, s_tmp, yt_0, sigma, transformer_options={'latent_type': 'yt'})
eps_y = eps_y + lure_y_mask * (eps_y_lure - eps_y)
data_y = data_y + lure_y_mask * (data_y_lure - data_y)
elif LG.sync_lure_sequence == "y -> x":
if lure_y_mask.abs().sum() > 0:
y_tmp = yt_[row].clone()
y_tmp = LG.swap_data(y_tmp, data_y, data_x, s_tmp, lure_y_mask)
eps_y_lure, data_y_lure = RK(y_tmp, s_tmp, yt_0, sigma, transformer_options={'latent_type': 'yt'})
eps_y = eps_y + lure_y_mask * (eps_y_lure - eps_y)
data_y = data_y + lure_y_mask * (data_y_lure - data_y)
if lure_x_mask.abs().sum() > 0:
x_tmp = LG.swap_data(x_tmp, data_x, data_y, s_tmp, lure_x_mask)
eps_x_lure, data_x_lure = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'xt'})
eps_x = eps_x + lure_x_mask * (eps_x_lure - eps_x)
data_x = data_x + lure_x_mask * (data_x_lure - data_x)
elif LG.sync_lure_sequence == "xy -> xy":
data_x_orig, data_y_orig = data_x.clone(), data_y.clone()
if lure_x_mask.abs().sum() > 0:
x_tmp = LG.swap_data(x_tmp, data_x_orig, data_y_orig, s_tmp, lure_x_mask)
eps_x_lure, data_x_lure = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'xt'})
eps_x = eps_x + lure_x_mask * (eps_x_lure - eps_x)
data_x = data_x + lure_x_mask * (data_x_lure - data_x)
if lure_y_mask.abs().sum() > 0:
y_tmp = yt_[row].clone()
y_tmp = LG.swap_data(y_tmp, data_y_orig, data_x_orig, s_tmp, lure_y_mask)
eps_y_lure, data_y_lure = RK(y_tmp, s_tmp, yt_0, sigma, transformer_options={'latent_type': 'yt'})
eps_y = eps_y + lure_y_mask * (eps_y_lure - eps_y)
data_y = data_y + lure_y_mask * (data_y_lure - data_y)
if EO("sync_proj_y"):
d_collinear_d_lerp = get_collinear(eps_x, eps_y)
d_lerp_ortho_d = get_orthogonal(eps_y, eps_x)
eps_y = d_collinear_d_lerp + d_lerp_ortho_d
if EO("sync_proj_y2"):
d_collinear_d_lerp = get_collinear(eps_y, eps_x)
d_lerp_ortho_d = get_orthogonal(eps_x, eps_y)
eps_y = d_collinear_d_lerp + d_lerp_ortho_d
if EO("sync_proj_x"):
d_collinear_d_lerp = get_collinear(eps_y, eps_x)
d_lerp_ortho_d = get_orthogonal(eps_x, eps_y)
eps_x = d_collinear_d_lerp + d_lerp_ortho_d
if EO("sync_proj_x2"):
d_collinear_d_lerp = get_collinear(eps_x, eps_y)
d_lerp_ortho_d = get_orthogonal(eps_y, eps_x)
eps_x = d_collinear_d_lerp + d_lerp_ortho_d
eps_x2y = RK.get_eps(x_0, x_[row], data_y, sigma, s_tmp)
eps_x2y_[row] = eps_x2y
eps_y2x = RK.get_eps(x_0, x_[row], data_y, sigma, s_tmp)
eps_y2x_[row] = eps_y2x
if RK.EXPONENTIAL:
if VE_MODEL: # ZERO IS THIS # ONE IS THIS
eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + sigma*(-noise_bongflow))
if EO("sync_x2y"):
eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + sigma*(-noise_bongflow))
else:
eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + sigma*(y0_bongflow-noise_bongflow)) #+ lure_x_mask * sigma*(data_y - data_x)
if EO("sync_x2y"):
eps_[row] = sync_mask * eps_x - (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + sigma*(y0_bongflow-noise_bongflow))
eps_yt_[row] = sync_mask * eps_y + (1-sync_mask) * eps_y2x + weight_mask * (-eps_x + sigma*(y0_bongflow-noise_bongflow)) # differentiate guide as well toward the x pred?
else:
if VE_MODEL:
eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (noise_bongflow - eps_y)
if EO("sync_x2y"):
eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (noise_bongflow - eps_x2y)
else:
eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (noise_bongflow - eps_y - y0_bongflow)
if EO("sync_x2y"):
eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (noise_bongflow - eps_x2y - y0_bongflow)
eps_yt_[row] = sync_mask * eps_y + (1-sync_mask) * eps_y2x + weight_mask * (noise_bongflow - eps_x - y0_bongflow) # differentiate guide as well toward the x pred?
if VE_MODEL:
data_[row] = x_0 + sync_mask * NS.h * eps_x + (1-sync_mask) * NS.h * eps_x2y - weight_mask * (sigma*(eps_y + noise_bongflow)) # - lure_x_mask * (sigma*(eps_y + eps_x))
data_barf_y = yt_0 + sync_mask * NS.h * eps_y + (1-sync_mask) * NS.h * eps_y2x - weight_mask * (sigma*(eps_x + noise_bongflow))
if EO("sync_x2y"):
data_[row] = x_0 + sync_mask * NS.h * eps_x + (1-sync_mask) * NS.h * eps_x2y - weight_mask * (sigma*(eps_x2y + noise_bongflow))
else:
data_[row] = x_0 + sync_mask * NS.h * eps_x + (1-sync_mask) * NS.h * eps_x2y - weight_mask * (NS.h * eps_y + sigma*(noise_bongflow-y0_bongflow))
data_barf_y = yt_0 + sync_mask * NS.h * eps_y + (1-sync_mask) * NS.h * eps_y2x - weight_mask * (NS.h * eps_x + sigma*(noise_bongflow-y0_bongflow))
if EO("sync_x2y"):
data_[row] = x_0 + sync_mask * NS.h * eps_x + (1-sync_mask) * NS.h * eps_x2y - weight_mask * (NS.h * eps_x2y + sigma*(noise_bongflow-y0_bongflow))
if EO("data_is_y0_with_lure_x_mask"):
data_[row] = data_[row] + lure_x_mask * (y0_bongflow - data_[row])
if EO("eps_is_y0_with_lure_x_mask"):
if RK.EXPONENTIAL:
eps_[row] = eps_[row] + lure_x_mask * ((y0_bongflow - x_0) - eps_[row])
else:
eps_[row] = eps_[row] + lure_x_mask * (((x_0 - y0_bongflow) / sigma) - eps_[row])
data_barf = data_[row]
data_cached = data_x
eps_x_ [row] = eps_x
data_x_[row] = data_x
eps_y_ [row] = eps_y
data_y_[row] = data_y
if EO("sync_use_fake_eps_y"):
if RK.EXPONENTIAL:
if VE_MODEL:
eps_y_ [row] = sigma * ( - noise_bongflow)
else:
eps_y_ [row] = sigma * (y0_bongflow - noise_bongflow)
else:
if VE_MODEL:
eps_y_ [row] = noise_bongflow
else:
eps_y_ [row] = noise_bongflow - y0_bongflow
if EO("sync_use_fake_data_y"):
data_y_[row] = y0_bongflow
elif LG.guide_mode.startswith("flow") and (LG.lgw[step_sched] > 0 or LG.lgw_inv[step_sched] > 0) and not FLOW_STOPPED and not EO("flow_sync") :
lgw_mask_, lgw_mask_inv_ = LG.get_masks_for_step(step)
if not FLOW_STARTED and not FLOW_RESUMED:
FLOW_STARTED = True
data_x_prev_ = torch.zeros_like(data_prev_)
y0 = LG.HAS_LATENT_GUIDE * LG.mask * LG.y0 + LG.HAS_LATENT_GUIDE_INV * LG.mask_inv * LG.y0_inv
yx0 = y0.clone()
if EO("flow_slerp"):
y0_inv = LG.HAS_LATENT_GUIDE * LG.mask * LG.y0_inv + LG.HAS_LATENT_GUIDE_INV * LG.mask_inv * LG.y0
y0 = LG.y0.clone()
y0_inv = LG.y0_inv.clone()
flow_slerp_guide_ratio = EO("flow_slerp_guide_ratio", 0.5)
y_slerp = slerp_tensor(flow_slerp_guide_ratio, y0, y0_inv)
yx0 = y_slerp.clone()
x_[row], x_0 = yx0.clone(), yx0.clone()
if EO("guide_step_cutoff") or EO("guide_step_min"):
x_0_orig = yx0.clone()
if EO("flow_yx0_init_y0_inv"):
yx0 = LG.HAS_LATENT_GUIDE * LG.mask * LG.y0_inv + LG.HAS_LATENT_GUIDE_INV * LG.mask_inv * LG.y0
if step > 0:
if EO("flow_manual_masks"):
y0 = (1 - (LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv)) * denoised + LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask * LG.y0 + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv * LG.y0_inv
else:
y0 = (1 - (lgw_mask_ + lgw_mask_inv_)) * denoised + lgw_mask_ * LG.y0 + lgw_mask_inv_ * LG.y0_inv
yx0 = y0.clone()
if EO("flow_slerp"):
if EO("flow_manual_masks"):
y0_inv = (1 - (LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv)) * denoised + LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask * LG.y0_inv + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv * LG.y0
else:
y0_inv = (1 - (lgw_mask_ + lgw_mask_inv_)) * denoised + lgw_mask_ * LG.y0_inv + lgw_mask_inv_ * LG.y0
flow_slerp_guide_ratio = EO("flow_slerp_guide_ratio", 0.5)
y_slerp = slerp_tensor(flow_slerp_guide_ratio, y0, y0_inv)
yx0 = y_slerp.clone()
else:
yx0_prev = data_cached
if EO("flow_manual_masks"):
yx0 = (1 - (LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv)) * yx0_prev + LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask * x_tmp + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv * x_tmp
else:
yx0 = (1 - (lgw_mask_ + lgw_mask_inv_)) * yx0_prev + (lgw_mask_ + lgw_mask_inv_) * x_tmp
if not EO("flow_static_guides"):
if EO("flow_manual_masks"):
y0 = (1 - (LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv)) * yx0_prev + LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask * LG.y0 + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv * LG.y0_inv
else:
y0 = (1 - (lgw_mask_ + lgw_mask_inv_)) * yx0_prev + lgw_mask_ * LG.y0 + lgw_mask_inv_ * LG.y0_inv
if EO("flow_slerp"):
if EO("flow_manual_masks"):
y0_inv = (1 - (LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv)) * yx0_prev + LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask * LG.y0_inv + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv * LG.y0
else:
y0_inv = (1 - (lgw_mask_ + lgw_mask_inv_)) * yx0_prev + lgw_mask_ * LG.y0_inv + lgw_mask_inv_ * LG.y0
y0_orig = y0.clone()
if EO("flow_proj_xy"):
d_collinear_d_lerp = get_collinear(yx0, y0_orig)
d_lerp_ortho_d = get_orthogonal(y0_orig, yx0)
y0 = d_collinear_d_lerp + d_lerp_ortho_d
if EO("flow_proj_yx"):
d_collinear_d_lerp = get_collinear(y0_orig, yx0)
d_lerp_ortho_d = get_orthogonal(yx0, y0_orig)
yx0 = d_collinear_d_lerp + d_lerp_ortho_d
y0_inv_orig = None
if EO("flow_proj_xy_inv"):
y0_inv_orig = y0_inv.clone()
d_collinear_d_lerp = get_collinear(yx0, y0_inv)
d_lerp_ortho_d = get_orthogonal(y0_inv, yx0)
y0_inv = d_collinear_d_lerp + d_lerp_ortho_d
if EO("flow_proj_yx_inv"):
y0_inv_orig = y0_inv if y0_inv_orig is None else y0_inv_orig
d_collinear_d_lerp = get_collinear(y0_inv_orig, yx0)
d_lerp_ortho_d = get_orthogonal(yx0, y0_inv_orig)
yx0 = d_collinear_d_lerp + d_lerp_ortho_d
del y0_orig
flow_cossim_iter = EO("flow_cossim_iter", 1)
if step == 0:
noise_yt = noise_fn(y0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) # normalize_zscore(NS.noise_sampler(sigma=sigma, sigma_next=sigma_next), channelwise=True, inplace=True)
if not EO("flow_disable_renoise_y0"):
if noise_yt is None:
noise_yt = noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
else:
noise_yt = (1-eta) * noise_yt + eta * noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
if VE_MODEL:
yt = y0 + s_tmp * noise_yt
else:
yt = (NS.sigma_max-s_tmp) * y0 + (s_tmp/NS.sigma_max) * noise_yt
if not EO("flow_disable_doublenoise_y0"):
if noise_yt is None:
noise_yt = noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
else:
noise_yt = (1-eta) * noise_yt + eta * noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
if VE_MODEL:
y0_noised = y0 + sigma * noise_yt
else:
y0_noised = (NS.sigma_max-sigma) * y0 + sigma * noise_yt
if EO("flow_slerp"):
noise = noise_fn(y0_inv, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
yt_inv = (NS.sigma_max-s_tmp) * y0_inv + (s_tmp/NS.sigma_max) * noise
if not EO("flow_disable_doublenoise_y0_inv"):
noise = noise_fn(y0_inv, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
y0_noised_inv = (NS.sigma_max-sigma) * y0_inv + sigma * noise
if step == 0:
noise_xt = noise_fn(yx0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
if EO("flow_slerp"):
xt = yx0 + (s_tmp/NS.sigma_max) * (noise - y_slerp)
if not EO("flow_disable_doublenoise_x_0"):
noise = noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
x_0_noised = x_0 + sigma * (noise - y_slerp)
else:
if not EO("flow_disable_renoise_x_0"):
if noise_xt is None:
noise_xt = noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
else:
noise_xt = (1-eta_substep) * noise_xt + eta_substep * noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
if VE_MODEL:
xt = yx0 + (s_tmp) * yx0 + (s_tmp) * (noise_xt - y0)
else:
xt = yx0 + (s_tmp/NS.sigma_max) * (noise_xt - y0)
if not EO("flow_disable_doublenoise_x_0"):
if noise_xt is None:
noise_xt = noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
else:
noise_xt = (1-eta_substep) * noise_xt + eta_substep * noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter)
if VE_MODEL:
x_0_noised = x_0 + (sigma) * x_0 + (sigma) * (noise_xt - y0)
else:
x_0_noised = x_0 + (sigma/NS.sigma_max) * (noise_xt - y0) # just lerp noise add, (1-sigma)*y0 + sigma*noise assuming x_0 == y0, which is true initially...
eps_y, data_y = RK(yt, s_tmp, y0_noised, sigma, transformer_options={'latent_type': 'yt'})
eps_x, data_x = RK(xt, s_tmp, x_0_noised, sigma, transformer_options={'latent_type': 'xt'})
if EO("flow_slerp"):
eps_y_inv, data_y_inv = RK(yt_inv, s_tmp, y0_noised_inv, sigma, transformer_options={'latent_type': 'yt_inv'})
if LG.lgw[step+1] == 0 and LG.lgw_inv[step+1] == 0: # break out of differentiating x0 and return to differentiating eps/velocity field
if EO("flow_shit_out_yx0"):
eps_ [row] = eps_x - eps_y
data_[row] = yx0
if row == 0:
x_[row] = x_0 = xt
else:
x_[row] = xt
if not EO("flow_shit_out_new"):
eps_ [row] = eps_x
data_[row] = data_x
if row == 0:
x_[row] = x_0 = xt
else:
x_[row] = xt
else:
eps_ [row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * eps_x + (lgw_mask_ + lgw_mask_inv_) * eps_y
data_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * data_x + (lgw_mask_ + lgw_mask_inv_) * data_y
if row == 0:
x_[row] = x_0 = (1 - (lgw_mask_ + lgw_mask_inv_)) * xt + (lgw_mask_ + lgw_mask_inv_) * yt
else:
x_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * xt + (lgw_mask_ + lgw_mask_inv_) * yt
FLOW_STOPPED = True
else:
if not EO("flow_slerp"):
if RK.EXPONENTIAL:
eps_y_alt = data_y - x_0
eps_x_alt = data_x - x_0
else:
eps_y_alt = (x_0 - data_y) / sigma
eps_x_alt = (x_0 - data_x) / sigma
if EO("flow_y_zero"):
eps_y_alt *= LG.mask
eps_[row] = eps_yx = (eps_y_alt - eps_x_alt)
eps_y_lin = (x_0 - data_y) / sigma
if EO("flow_y_zero"):
eps_y_lin *= LG.mask
eps_x_lin = (x_0 - data_x) / sigma
eps_yx_lin = (eps_y_lin - eps_x_lin)
data_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * data_x + (lgw_mask_ + lgw_mask_inv_) * data_y
if EO("flow_reverse_data_masks"):
data_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * data_y + (lgw_mask_ + lgw_mask_inv_) * data_x
if flow_sync_eps != 0.0:
if RK.EXPONENTIAL:
eps_[row] = (1-flow_sync_eps) * eps_[row] + flow_sync_eps * (data_[row] - x_0)
else:
eps_[row] = (1-flow_sync_eps) * eps_[row] + flow_sync_eps * (x_0 - data_[row]) / sigma
if EO("flow_sync_eps_mask"):
flow_sync_eps = EO("flow_sync_eps_mask", 1.0)
if RK.EXPONENTIAL:
eps_[row] = (lgw_mask_ + lgw_mask_inv_) * (1-flow_sync_eps) * eps_[row] + (1 - (lgw_mask_ + lgw_mask_inv_)) * flow_sync_eps * (data_[row] - x_0)
else:
eps_[row] = (lgw_mask_ + lgw_mask_inv_) * (1-flow_sync_eps) * eps_[row] + (1 - (lgw_mask_ + lgw_mask_inv_)) * flow_sync_eps * (x_0 - data_[row]) / sigma
if EO("flow_sync_eps_revmask"):
flow_sync_eps = EO("flow_sync_eps_revmask", 1.0)
if RK.EXPONENTIAL:
eps_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * (1-flow_sync_eps) * eps_[row] + (lgw_mask_ + lgw_mask_inv_) * flow_sync_eps * (data_[row] - x_0)
else:
eps_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * (1-flow_sync_eps) * eps_[row] + (lgw_mask_ + lgw_mask_inv_) * flow_sync_eps * (x_0 - data_[row]) / sigma
if EO("flow_sync_eps_maskonly"):
flow_sync_eps = EO("flow_sync_eps_maskonly", 1.0)
if RK.EXPONENTIAL:
eps_[row] = (lgw_mask_ + lgw_mask_inv_) * eps_[row] + (1 - (lgw_mask_ + lgw_mask_inv_)) * (data_[row] - x_0)
else:
eps_[row] = (lgw_mask_ + lgw_mask_inv_) * eps_[row] + (1 - (lgw_mask_ + lgw_mask_inv_)) * (x_0 - data_[row]) / sigma
if EO("flow_sync_eps_revmaskonly"):
flow_sync_eps = EO("flow_sync_eps_revmaskonly", 1.0)
if RK.EXPONENTIAL:
eps_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * eps_[row] + (lgw_mask_ + lgw_mask_inv_) * (data_[row] - x_0)
else:
eps_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * eps_[row] + (lgw_mask_ + lgw_mask_inv_) * (x_0 - data_[row]) / sigma
if EO("flow_slerp"):
if RK.EXPONENTIAL:
eps_y_alt = data_y - x_0
eps_y_alt_inv = data_y_inv - x_0
eps_x_alt = data_x - x_0
else:
eps_y_alt = (x_0 - data_y) / sigma
eps_y_alt_inv = (x_0 - data_y_inv) / sigma
eps_x_alt = (x_0 - data_x) / sigma
flow_slerp_ratio2 = EO("flow_slerp_ratio2", 0.5)
eps_yx = (eps_y_alt - eps_x_alt)
eps_y_lin = (x_0 - data_y) / sigma
eps_x_lin = (x_0 - data_x) / sigma
eps_yx_lin = (eps_y_lin - eps_x_lin)
eps_yx_inv = (eps_y_alt_inv - eps_x_alt)
eps_y_lin_inv = (x_0 - data_y_inv) / sigma
eps_x_lin = (x_0 - data_x) / sigma
eps_yx_lin_inv = (eps_y_lin_inv - eps_x_lin)
data_row = x_0 - sigma * eps_yx_lin
data_row_inv = x_0 - sigma * eps_yx_lin_inv
if EO("flow_slerp_similarity_ratio"):
flow_slerp_similarity_ratio = EO("flow_slerp_similarity_ratio", 1.0)
flow_slerp_ratio2 = find_slerp_ratio_grid(data_row, data_row_inv, LG.y0.clone(), LG.y0_inv.clone(), flow_slerp_similarity_ratio)
eps_ [row] = slerp_tensor(flow_slerp_ratio2, eps_yx, eps_yx_inv)
data_[row] = slerp_tensor(flow_slerp_ratio2, data_row, data_row_inv)
if EO("flow_slerp_autoalter"):
data_row_slerp = slerp_tensor(0.5, data_row, data_row_inv)
y0_pearsim = get_pearson_similarity(data_row_slerp, y0)
y0_pearsim_inv = get_pearson_similarity(data_row_slerp, y0_inv)
if y0_pearsim > y0_pearsim_inv:
data_[row] = data_row_inv
eps_ [row] = (eps_y_alt_inv - eps_x_alt)
else:
data_[row] = data_row
eps_ [row] = (eps_y_alt - eps_x_alt)
if EO("flow_slerp_recalc_eps_row"):
if RK.EXPONENTIAL:
eps_[row] = data_[row] - x_0
else:
eps_[row] = (x_0 - data_[row]) / sigma
if EO("flow_slerp_recalc_data_row"):
if RK.EXPONENTIAL:
data_[row] = x_0 + eps_[row]
else:
data_[row] = x_0 - sigma * eps_[row]
data_cached = data_x
if step < EO("direct_pre_pseudo_guide", 0) and step > 0:
for i_pseudo in range(EO("direct_pre_pseudo_guide_iter", 1)):
x_tmp += LG.lgw[step_sched] * LG.mask * (NS.sigma_max - s_tmp) * (LG.y0 - denoised) + LG.lgw_inv[step_sched] * LG.mask_inv * (NS.sigma_max - s_tmp) * (LG.y0_inv - denoised)
eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma)
# MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL
if SYNC_GUIDE_ACTIVE:
pass
elif not ((not LG.guide_mode.startswith("flow")) or FLOW_STOPPED or (LG.guide_mode.startswith("flow") and LG.lgw[step_sched] == 0 and LG.lgw_inv[step_sched] == 0)): #(LG.guide_mode.startswith("flow") and (LG.lgw[step_sched] != 0 or LG.lgw_inv[step_sched] != 0)) or FLOW_STOPPED:
pass
elif LG.guide_mode.startswith("lure") and (LG.lgw[step_sched] > 0 or LG.lgw_inv[step_sched] > 0):
eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'yt'})
else:
if EO("protoshock") and StyleMMDiT is not None and StyleMMDiT.data_shock_start_step <= step_sched < StyleMMDiT.data_shock_end_step:
eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'row': row, 'x_tmp': x_tmp, 'sigma_next': sigma_next})
data_wct = StyleMMDiT.apply_data_shock(data_[row])
if VE_MODEL:
x_tmp = x_tmp + (data_wct - data_[row])
else:
x_tmp = x_tmp + (NS.sigma_max-NS.s_[row]) * (data_wct - data_[row])
#x_[row+RK.row_offset] = x_tmp
x_[row] = x_tmp
if row == 0:
x_0 = x_tmp
if EO("preshock"):
eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'row': row, 'x_tmp': x_tmp, 'sigma_next': sigma_next})
if VE_MODEL:
x_tmp = x_tmp + (data_wct - data_[row])
else:
x_tmp = x_tmp + (NS.sigma_max-NS.s_[row]) * (data_wct - data_[row])
x_[row] = x_tmp
if row == 0:
x_0 = x_tmp
eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'row': row, 'x_tmp': x_tmp, 'sigma_next': sigma_next})
#if EO("yoloshock") and StyleMMDiT is not None and StyleMMDiT.data_shock_start_step <= step_sched < StyleMMDiT.data_shock_end_step:
if not EO("disable_yoloshock") and StyleMMDiT is not None and StyleMMDiT.data_shock_start_step <= step_sched < StyleMMDiT.data_shock_end_step:
data_wct = StyleMMDiT.apply_data_shock(data_[row])
if VE_MODEL:
x_tmp = x_tmp + (data_wct - data_[row])
else:
x_tmp = x_tmp + (NS.sigma_max-NS.s_[row]) * (data_wct - data_[row])
#x_[row+RK.row_offset] = x_tmp
x_[row] = x_tmp
if row == 0:
x_0 = x_tmp
data_[row] = data_wct
if RK.EXPONENTIAL:
eps_[row] = data_[row] - x_0
else:
eps_[row] = (x_0 - data_[row]) / sigma
if hasattr(model.inner_model.inner_model.diffusion_model, "eps_out"): # fp64 model out override, for testing only
eps_out = model.inner_model.inner_model.diffusion_model.eps_out
del model.inner_model.inner_model.diffusion_model.eps_out
if eps_out.shape[0] == 2:
data_cond = x_0 - sigma * eps_out[1]
data_uncond = x_0 - sigma * eps_out[0]
data_row = data_uncond + model.inner_model.cfg * (data_cond - data_uncond)
eps_row = (x_0 - data_row) / sigma
else:
data_row = x_0 - sigma * eps_out
if RK.EXPONENTIAL:
eps_row = data_row - x_0
else:
eps_row = eps_out
if torch.norm(eps_row - eps_[row]) < 0.01 and torch.norm(data_row - data_[row]) < 0.01: # if some other cfg/post-cfg func was used, detect and ignore this
eps_[row] = eps_row
data_[row] = data_row
if RK.extra_args['model_options']['transformer_options'].get('y0_standard_guide') is not None:
if hasattr(model.inner_model.inner_model.diffusion_model, "y0_standard_guide"):
LG.y0 = model.inner_model.inner_model.diffusion_model.y0_standard_guide.clone()
del model.inner_model.inner_model.diffusion_model.y0_standard_guide
RK.extra_args['model_options']['transformer_options']['y0_standard_guide'] = None
if RK.extra_args['model_options']['transformer_options'].get('y0_inv_standard_guide') is not None:
if hasattr(model.inner_model.inner_model.diffusion_model, "y0_inv_standard_guide"):
LG.y0_inv = model.inner_model.inner_model.diffusion_model.y0_inv_standard_guide.clone() # RK.extra_args['model_options']['transformer_options'].get('y0_standard_guide')
del model.inner_model.inner_model.diffusion_model.y0_inv_standard_guide
RK.extra_args['model_options']['transformer_options']['y0_inv_standard_guide'] = None
if LG.guide_mode.startswith("lure") and (LG.lgw[step_sched] > 0 or LG.lgw_inv[step_sched] > 0):
x_tmp = LG.process_guides_data_substep(x_tmp, data_[row], step_sched, s_tmp)
eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'xt'})
if momentum != 0.0:
data_[row] = data_[row] - momentum * (data_prev_[0] - data_[row]) #negative!
eps_[row] = RK.get_epsilon(x_0, x_tmp, data_[row], sigma, s_tmp) # ... why was this here??? for momentum maybe?
if row < RK.rows and noise_scaling_weight != 0 and noise_scaling_type in {"sampler", "sampler_substep"}:
if noise_scaling_type == "sampler_substep":
sub_lying_su, sub_lying_sigma, sub_lying_sd, sub_lying_alpha_ratio = NS.get_sde_substep(NS.s_[row], NS.s_[row+RK.row_offset+RK.multistep_stages], noise_scaling_eta, noise_scaling_mode)
for _ in range(noise_scaling_cycles-1):
sub_lying_su, sub_lying_sigma, sub_lying_sd, sub_lying_alpha_ratio = NS.get_sde_substep(NS.s_[row], sub_lying_sd, noise_scaling_eta, noise_scaling_mode)
lying_s_[row+1] = sub_lying_sd
substep_noise_scaling_ratio = NS.s_[row+1]/lying_s_[row+1]
if RK.multistep_stages > 0:
substep_noise_scaling_ratio = sigma_next/lying_sd #fails with resample?
lying_eps_row_factor = (1 - noise_scaling_weight*(substep_noise_scaling_ratio-1))
# GUIDE
if not EO("disable_guides_eps_substep"):
eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step_sched, NS.sigma, NS.sigma_next, NS.sigma_down, NS.s_, epsilon_scale, RK)
if not EO("disable_guides_eps_prev_substep"):
eps_prev_, x_ = LG.process_guides_substep(x_0, x_, eps_prev_, data_, row, step_sched, NS.sigma, NS.sigma_next, NS.sigma_down, NS.s_, epsilon_scale, RK)
if LG.y0_mean is not None and LG.y0_mean.sum() != 0.0:
if EO("guide_mean_scattersort"):
data_row_mean = apply_scattersort_spatial(data_[row], LG.y0_mean)
eps_row_mean = RK.get_eps(x_0, data_row_mean, s_tmp)
else:
eps_row_mean = eps_[row] - eps_[row].mean(dim=(-2,-1), keepdim=True) + (LG.y0_mean - x_0).mean(dim=(-2,-1), keepdim=True)
if LG.mask_mean is not None:
eps_row_mean = LG.mask_mean * eps_row_mean + (1-LG.mask_mean) * eps_[row]
eps_[row] = eps_[row] + LG.lgw_mean[step_sched] * (eps_row_mean - eps_[row])
if (full_iter == 0 and diag_iter == 0) or EO("newton_iter_post_use_on_implicit_steps"):
x_, eps_ = RK.newton_iter(x_0, x_, eps_, eps_prev_, data_, NS.s_, row, NS.h, sigmas, step, "post", SYNC_GUIDE_ACTIVE)
# UPDATE #for row in range(RK.rows - RK.multistep_stages - RK.row_offset + 1):
if EO("exp2lin_override") and RK.EXPONENTIAL:
x_ = RK.update_substep(x_0, x_, eps_, eps_prev_, row, RK.row_offset, NS.h_new, NS.h_new_orig, lying_eps_row_factor=lying_eps_row_factor, sigma=sigma) #modifies eps_[row] if lying_eps_row_factor != 1.0
#x_ = RK.update_substep(x_0, x_, eps_, eps_prev_, row, RK.row_offset, -sigma*NS.h_new, -sigma*NS.h_new_orig, lying_eps_row_factor=lying_eps_row_factor) #modifies eps_[row] if lying_eps_row_factor != 1.0
else:
x_ = RK.update_substep(x_0, x_, eps_, eps_prev_, row, RK.row_offset, NS.h_new, NS.h_new_orig, lying_eps_row_factor=lying_eps_row_factor) #modifies eps_[row] if lying_eps_row_factor != 1.0
x_[row+RK.row_offset] = NS.rebound_overshoot_substep(x_0, x_[row+RK.row_offset])
if SYNC_GUIDE_ACTIVE: #yt_ is not None:
#yt_ = RK.update_substep(yt_0, yt_, eps_y_, eps_prev_y_, row, RK.row_offset, NS.h_new, NS.h_new_orig, lying_eps_row_factor=lying_eps_row_factor) #modifies eps_[row] if lying_eps_row_factor != 1.0
yt_ = RK.update_substep(yt_0, yt_, eps_yt_, eps_prev_y_, row, RK.row_offset, NS.h_new, NS.h_new_orig, lying_eps_row_factor=lying_eps_row_factor, sigma=sigma) #modifies eps_[row] if lying_eps_row_factor != 1.0
yt_[row+RK.row_offset] = NS.rebound_overshoot_substep(yt_0, yt_[row+RK.row_offset])
if not RK.IMPLICIT and NS.noise_mode_sde_substep != "hard_sq":
x_means_per_substep = x_[row+RK.row_offset].mean(dim=(-2,-1), keepdim=True)
if not LG.guide_mode.startswith("flow") or (LG.lgw[step_sched] == 0 and LG.lgw[step+1] == 0 and LG.lgw_inv[step_sched] == 0 and LG.lgw_inv[step+1] == 0):
#if LG.guide_mode.startswith("sync") and (LG.lgw[step_sched] != 0.0 or LG.lgw_inv[step_sched] != 0.0):
# x_row_tmp = x_[row+RK.row_offset].clone()
#x_[row+RK.row_offset] = NS.swap_noise_substep(x_0, x_[row+RK.row_offset], mask=sde_mask, guide=LG.y0)
x_row_tmp = NS.swap_noise_substep(x_0, x_[row+RK.row_offset], mask=sde_mask, guide=LG.y0)
#if EO("eps_adain_smartnoise_substep"):
if LG.ADAIN_NOISE_MODE == "smart":
#eps_row_next = (x_0 - x_[row+RK.row_offset]) / (sigma - NS.s_[row+RK.row_offset])
#denoised_row_next = x_0 - sigma * eps_row_next
#
#eps_swapped = (x_row_tmp - denoised_row_next) / NS.s_[row+RK.row_offset]
#
#noise_row_next = eps_swapped + denoised_row_next
#z_[row+RK.row_offset] = noise_row_next
#RK.update_transformer_options({'z_' : z_})
data_next = denoised + NS.h_new * RK.zum(row+RK.row_offset+RK.multistep_stages, data_, data_prev_)
if VE_MODEL:
z_[row+RK.row_offset] = (x_row_tmp - data_next) / NS.s_[row+RK.row_offset]
else:
z_[row+RK.row_offset] = (x_row_tmp - (NS.sigma_max-NS.s_[row+RK.row_offset])*data_next) / NS.s_[row+RK.row_offset]
RK.update_transformer_options({'z_' : z_})
elif LG.ADAIN_NOISE_MODE == "update": #EO("eps_adain"):
x_init_new = (x_row_tmp - x_[row+RK.row_offset]) / s_tmp + x_init
x_0 += sigma * (x_init_new - x_init)
x_init = x_init_new
RK.update_transformer_options({'x_init' : x_init.clone()})
if SYNC_GUIDE_ACTIVE:
noise_bongflow_new = (x_row_tmp - x_[row+RK.row_offset]) / s_tmp + noise_bongflow
yt_[row+RK.row_offset] += s_tmp * (noise_bongflow_new - noise_bongflow)
x_0 += sigma * (noise_bongflow_new - noise_bongflow)
noise_bongflow = noise_bongflow_new
x_[row+RK.row_offset] = x_row_tmp
elif LG.guide_mode.startswith("flow"):
pass
if not LG.guide_mode.startswith("lure"):
x_[row+RK.row_offset] = LG.process_guides_data_substep(x_[row+RK.row_offset], data_[row], step_sched, NS.s_[row])
if ((not EO("protoshock") and not EO("yoloshock")) or EO("fuckitshock")) and StyleMMDiT is not None and StyleMMDiT.data_shock_start_step <= step_sched < StyleMMDiT.data_shock_end_step:
data_wct = StyleMMDiT.apply_data_shock(data_[row])
if VE_MODEL:
x_[row+RK.row_offset] = x_[row+RK.row_offset] + (data_wct - data_[row])
else:
x_[row+RK.row_offset] = x_[row+RK.row_offset] + (NS.sigma_max-NS.s_[row]) * (data_wct - data_[row])
if SYNC_GUIDE_ACTIVE: # # # # ## # # ## # YIIIIKES ---------------------------------------------------------------------------------------------------------
if VE_MODEL:
yt_[:NS.s_.shape[0], 0] = y0_bongflow + NS.s_.view(-1, *[1]*(x.ndim-1)) * (noise_bongflow)
yt_0 = y0_bongflow + sigma * (noise_bongflow)
else:
yt_[:NS.s_.shape[0], 0] = y0_bongflow + NS.s_.view(-1, *[1]*(x.ndim-1)) * (noise_bongflow - y0_bongflow)
yt_0 = y0_bongflow + sigma * (noise_bongflow - y0_bongflow)
if RK.EXPONENTIAL:
eps_y_ = data_y_ - yt_0 # yt_ # watch out for fuckery with size of tableau being smaller later in a chained sampler
else:
if BONGMATH:
eps_y_[:NS.s_.shape[0]] = (yt_[:NS.s_.shape[0]] - data_y_[:NS.s_.shape[0]]) / NS.s_.view(-1,*[1]*(x_.ndim-1))
else:
eps_y_[:NS.s_.shape[0]] = (yt_0.repeat(NS.s_.shape[0], *[1]*(x_.ndim-1)) - data_y_[:NS.s_.shape[0]]) / sigma # calc exact to c0 node
if not BONGMATH and (eta != 0 or eta_substep != 0):
if RK.EXPONENTIAL:
eps_x_ = data_x_ - x_0
else:
eps_x_ = (x_0 - data_x_) / sigma
weight_mask = lgw_mask_+lgw_mask_inv_
if LG.SYNC_SEPARATE:
sync_mask = lgw_mask_sync_+lgw_mask_sync_inv_
else:
sync_mask = 1.
for ms in range(len(eps_)):
if RK.EXPONENTIAL:
if VE_MODEL:
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + sigma*(-noise_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + sigma*(-noise_bongflow))
else:
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + sigma*(y0_bongflow-noise_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + sigma*(y0_bongflow-noise_bongflow))
else:
if VE_MODEL:
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + (noise_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + (noise_bongflow))
else:
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + (noise_bongflow-y0_bongflow))
if EO("sync_x2y"):
eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + (noise_bongflow-y0_bongflow))
if BONGMATH and NS.s_[row] > RK.sigma_min and NS.h < RK.sigma_max/2 and (diag_iter == implicit_steps_diag or EO("enable_diag_explicit_bongmath_all")) and not EO("disable_terminal_bongmath"):
if step == 0 and UNSAMPLE:
pass
elif full_iter == implicit_steps_full or not EO("disable_fully_explicit_bongmath_except_final"):
if sigma > 0.03:
BONGMATH_Y = SYNC_GUIDE_ACTIVE
x_0, x_, eps_ = RK.bong_iter(x_0, x_, eps_, eps_prev_, data_, sigma, NS.s_, row, RK.row_offset, NS.h, step, step_sched,
BONGMATH_Y, y0_bongflow, noise_bongflow, eps_x_, eps_y_, data_x_, data_y_, LG)
# BONGMATH_Y, y0_bongflow, noise_bongflow, eps_x_, eps_y_, eps_x2y_, data_x_, LG)
#if EO("eps_adain_smartnoise_bongmath"):
if LG.ADAIN_NOISE_MODE == "smart":
if VE_MODEL:
z_[:NS.s_.shape[0], ...] = (x_ - data_)[:NS.s_.shape[0], ...] / NS.s_.view(-1,*[1]*(x_.ndim-1))
else:
z_[:NS.s_.shape[0], ...] = (x_[:NS.s_.shape[0], ...] - (NS.sigma_max - NS.s_.view(-1,*[1]*(x_.ndim-1)))*data_[:NS.s_.shape[0], ...])[:NS.s_.shape[0], ...] / NS.s_.view(-1,*[1]*(x_.ndim-1))
RK.update_transformer_options({'z_' : z_})
diag_iter += 1
#progress_bar.update( round(1 / implicit_steps_total, 2) )
#step_update = round(1 / implicit_steps_total, 2)
#progress_bar.update(float(f"{step_update:.2f}"))
x_next = x_[RK.rows - RK.multistep_stages - RK.row_offset + 1]
x_next = NS.rebound_overshoot_step(x_0, x_next)
if SYNC_GUIDE_ACTIVE: # YT_NEXT UPDATE STEP --------------------------------------
yt_next = yt_[RK.rows - RK.multistep_stages - RK.row_offset + 1]
yt_next = NS.rebound_overshoot_step(yt_0, yt_next)
eps = (x_0 - x_next) / (sigma - sigma_next)
denoised = x_0 - sigma * eps
if EO("postshock") and step < EO("postshock", 10):
eps_row, data_row = RK(x_next, sigma_next, x_next, sigma_next, transformer_options={'row': row, 'x_tmp': x_next, 'sigma_next': sigma_next})
if VE_MODEL:
x_next = x_next + (data_row - denoised)
else:
x_next = x_next + (NS.sigma_max-sigma_next) * (data_row - denoised)
eps = (x_0 - x_next) / (sigma - sigma_next)
denoised = x_0 - sigma * eps
if EO("data_sampler") and step > EO("data_sampler_start_step", 0) and step < EO("data_sampler_end_step", 5):
data_sampler_weight = EO("data_sampler_weight", 1.0)
denoised_step = RK.zum(row+RK.row_offset+RK.multistep_stages, data_, data_prev_)
x_next = LG.swap_data(x_next, denoised, denoised_step, data_sampler_weight * sigma_next)
eps = (x_0 - x_next) / (sigma - sigma_next)
denoised = x_0 - sigma * eps
x_0_prev = x_0.clone()
x_means_per_step = x_next.mean(dim=(-2,-1), keepdim=True)
if eta == 0.0:
x = x_next
if SYNC_GUIDE_ACTIVE:
yt_0 = yt_[0] = yt_next
#elif LG.guide_mode.startswith("sync") and (LG.lgw[step_sched] != 0.0 or LG.lgw_inv[step_sched] != 0.0):
# noise_sync_new = NS.noise_sampler(sigma=sigma, sigma_next=sigma_next)
# x = x_next + sigma * eta * (noise_sync_new - noise_bongflow)
# noise_bongflow += eta * (noise_sync_new - noise_bongflow)
elif not LG.guide_mode.startswith("flow") or (LG.lgw[step_sched] == 0 and LG.lgw[step+1] == 0 and LG.lgw_inv[step_sched] == 0 and LG.lgw_inv[step+1] == 0):
x = NS.swap_noise_step(x_0, x_next, mask=sde_mask)
#if EO("eps_adain_smartnoise"):
if LG.ADAIN_NOISE_MODE == "smart":
#noise_next = eps + denoised
#eps_swapped = (x - denoised) / sigma_next
#
#noise_next = eps_swapped + denoised
#z_[0] = noise_next
#RK.update_transformer_options({'z_' : z_})
if full_iter+1 < implicit_steps_full+1: # are we to loop for full iter after this?
if VE_MODEL:
#z_[row+RK.row_offset] = (x - denoised) / sigma_next
z_[0] = (x_0 - denoised) / sigma
else:
#z_[row+RK.row_offset] = (x - (NS.sigma_max-sigma_next) * denoised) / sigma_next
z_[0] = (x_0 - (NS.sigma_max-sigma) * denoised) / sigma
else: #we're advancing to next step, x is x_next
if VE_MODEL:
#z_[row+RK.row_offset] = (x - denoised) / sigma_next
z_[0] = (x - denoised) / sigma_next
else:
#z_[row+RK.row_offset] = (x - (NS.sigma_max-sigma_next) * denoised) / sigma_next
z_[0] = (x - (NS.sigma_max-sigma_next) * denoised) / sigma_next
RK.update_transformer_options({'z_' : z_})
elif LG.ADAIN_NOISE_MODE == "update": #EO("eps_adain"):
x_init_new = (x - x_next) / sigma_next + x_init
x_0 += sigma * (x_init_new - x_init)
x_init = x_init_new
RK.update_transformer_options({'x_init' : x_init.clone()})
if SYNC_GUIDE_ACTIVE:
noise_bongflow_new = (x - x_next) / sigma_next + noise_bongflow
yt_next += sigma_next * (noise_bongflow_new - noise_bongflow)
x_0 += sigma * (noise_bongflow_new - noise_bongflow)
if not EO("disable_i_bong"):
for i_bong in range(len(NS.s_)):
x_[i_bong] += NS.s_[i_bong] * (noise_bongflow_new - noise_bongflow)
#x_[0] += sigma * (noise_bongflow_new - noise_bongflow)
yt_0 = yt_[0] = yt_next
noise_bongflow = noise_bongflow_new
else:
x = x_next
if EO("keep_step_means"):
x = x - x.mean(dim=(-2,-1), keepdim=True) + x_means_per_step
callback_step = len(sigmas)-1 - step if sampler_mode == "unsample" else step
preview_callback(x, eps, denoised, x_, eps_, data_, callback_step, sigma, sigma_next, callback, EO, preview_override=data_cached, FLOW_STOPPED=FLOW_STOPPED)
h_prev = NS.h
x_prev = x_0
denoised_prev2 = denoised_prev
denoised_prev = denoised
full_iter += 1
if LG.lgw[step_sched] > 0 and step >= EO("guide_cutoff_start_step", 0) and cossim_counter < EO("guide_cutoff_max_iter", 10) and (EO("guide_cutoff") or EO("guide_min")):
guide_cutoff = EO("guide_cutoff", 1.0)
denoised_norm = data_[0] - data_[0].mean(dim=(-2,-1), keepdim=True)
y0_norm = LG.y0 - LG.y0 .mean(dim=(-2,-1), keepdim=True)
y0_cossim = get_cosine_similarity(denoised_norm, y0_norm)
if y0_cossim > guide_cutoff and LG.lgw[step_sched] > EO("guide_cutoff_floor", 0.0):
if not EO("guide_cutoff_fast"):
LG.lgw[step_sched] *= EO("guide_cutoff_factor", 0.9)
else:
LG.lgw *= EO("guide_cutoff_factor", 0.9)
full_iter -= 1
if y0_cossim < EO("guide_min", 0.0) and LG.lgw[step_sched] < EO("guide_min_ceiling", 1.0):
if not EO("guide_cutoff_fast"):
LG.lgw[step_sched] *= EO("guide_min_factor", 1.1)
else:
LG.lgw *= EO("guide_min_factor", 1.1)
full_iter -= 1
#if EO("smartnoise"): #TODO: determine if this was useful
# z_[0] = z_next
if FLOW_STARTED and FLOW_STOPPED:
data_prev_ = data_x_prev_
if FLOW_STARTED and not FLOW_STOPPED:
data_x_prev_[0] = data_cached # data_cached is data_x from flow mode. this allows multistep to resume seamlessly.
for ms in range(recycled_stages):
data_x_prev_[recycled_stages - ms] = data_x_prev_[recycled_stages - ms - 1]
#if LG.guide_mode.startswith("sync") and (LG.lgw[step_sched] != 0.0 or LG.lgw_inv[step_sched] != 0.0):
# data_prev_[0] = x_0 - sigma * eps_[0]
#else:
data_prev_[0] = data_[0] # with flow mode, this will be the differentiated guide/"denoised"
for ms in range(recycled_stages):
data_prev_[recycled_stages - ms] = data_prev_[recycled_stages - ms - 1] # TODO: verify that this does not run on every substep...
if SYNC_GUIDE_ACTIVE:
data_prev_x_[0] = data_x
for ms in range(recycled_stages):
data_prev_x_[recycled_stages - ms] = data_prev_x_[recycled_stages - ms - 1]
data_prev_y_[0] = data_y
for ms in range(recycled_stages):
data_prev_y_[recycled_stages - ms] = data_prev_y_[recycled_stages - ms - 1]
rk_type = RK.swap_rk_type_at_step_or_threshold(x_0, data_prev_, NS, sigmas, step, rk_swap_step, rk_swap_threshold, rk_swap_type, rk_swap_print)
if step > rk_swap_step:
implicit_steps_full = 0
implicit_steps_diag = 0
if EO("bong2m") or EO("bong3m"):
denoised_data_prev2 = denoised_data_prev
denoised_data_prev = data_[0]
if SKIP_PSEUDO and not LG.guide_mode.startswith("flow"):
if SKIP_PSEUDO_Y == "y0":
LG.y0 = denoised
LG.HAS_LATENT_GUIDE = True
else:
LG.y0_inv = denoised
LG.HAS_LATENT_GUIDE_INV = True
if EO("pseudo_mix_strength"):
pseudo_mix_strength = EO("pseudo_mix_strength", 0.0)
LG.y0 = orig_y0 + pseudo_mix_strength * (denoised - orig_y0)
LG.y0_inv = orig_y0_inv + pseudo_mix_strength * (denoised - orig_y0_inv)
#if sampler_mode == "unsample":
# progress_bar.n -= 1
# progress_bar.refresh()
#else:
# progress_bar.update(1)
progress_bar.update(1) #THIS WAS HERE
step += 1
if EO("skip_step", -1) == step:
step += 1
if d_noise_start_step == step:
sigmas = sigmas.clone() * d_noise
if sigmas.max() > NS.sigma_max:
sigmas = sigmas / NS.sigma_max
if d_noise_inv_start_step == step:
sigmas = sigmas.clone() / d_noise_inv
if sigmas.max() > NS.sigma_max:
sigmas = sigmas / NS.sigma_max
if LG.lgw[step_sched] > 0 and step >= EO("guide_step_cutoff_start_step", 0) and cossim_counter < EO("guide_step_cutoff_max_iter", 10) and (EO("guide_step_cutoff") or EO("guide_step_min")):
guide_cutoff = EO("guide_step_cutoff", 1.0)
eps_trash, data_trash = RK(x, sigma_next, x_0, sigma)
denoised_norm = data_trash - data_trash.mean(dim=(-2,-1), keepdim=True)
y0_norm = LG.y0 - LG.y0 .mean(dim=(-2,-1), keepdim=True)
y0_cossim = get_cosine_similarity(denoised_norm, y0_norm)
if y0_cossim > guide_cutoff and LG.lgw[step_sched] > EO("guide_step_cutoff_floor", 0.0):
if not EO("guide_step_cutoff_fast"):
LG.lgw[step_sched] *= EO("guide_step_cutoff_factor", 0.9)
else:
LG.lgw *= EO("guide_step_cutoff_factor", 0.9)
step -= 1
x_0 = x = x_[0] = x_0_orig.clone()
if y0_cossim < EO("guide_step_min", 0.0) and LG.lgw[step_sched] < EO("guide_step_min_ceiling", 1.0):
if not EO("guide_step_cutoff_fast"):
LG.lgw[step_sched] *= EO("guide_step_min_factor", 1.1)
else:
LG.lgw *= EO("guide_step_min_factor", 1.1)
step -= 1
x_0 = x = x_[0] = x_0_orig.clone()
# END SAMPLING LOOP ---------------------------------------------------------------------------------------------------
#progress_bar.close()
RK.update_transformer_options({'update_cross_attn': None})
if step == len(sigmas)-2 and sigmas[-1] == 0 and sigmas[-2] == NS.sigma_min and not INIT_SAMPLE_LOOP:
if EO("skip_final_model_call"):
sigma_min = NS.sigma_min.view((1,) * x.ndim).to(x)
denoised = model.inner_model.inner_model.model_sampling.calculate_denoised(sigma_min, eps, x)
x = denoised
else:
eps, denoised = RK(x, NS.sigma_min, x, NS.sigma_min)
x = denoised
#progress_bar.update(1)
eps = eps .to(model_device)
denoised = denoised.to(model_device)
x = x .to(model_device)
progress_bar.close()
if not (UNSAMPLE and sigmas[1] > sigmas[0]) and not EO("preview_last_step_always") and sigma is not None and not (FLOW_STARTED and not FLOW_STOPPED):
callback_step = len(sigmas)-1 - step if sampler_mode == "unsample" else step
preview_callback(x, eps, denoised, x_, eps_, data_, callback_step, sigma, sigma_next, callback, EO, preview_override=data_cached, FLOW_STOPPED=FLOW_STOPPED)
if INIT_SAMPLE_LOOP:
state_info_out = state_info
else:
if guides is not None and guides.get('guide_mode', "") == 'inversion':
guide_inversion_y0 = state_info.get('guide_inversion_y0')
guide_inversion_y0_inv = state_info.get('guide_inversion_y0_inv')
if sampler_mode == "unsample" and guide_inversion_y0 is None:
guide_inversion_y0 = LG.y0.clone()
if sampler_mode == "unsample" and guide_inversion_y0_inv is None:
guide_inversion_y0_inv = LG.y0_inv.clone()
if sampler_mode in {"standard", "resample"} and guide_inversion_y0 is None:
guide_inversion_y0 = NS.noise_sampler(sigma=NS.sigma_max, sigma_next=NS.sigma_min).to(x)
guide_inversion_y0 = normalize_zscore(guide_inversion_y0, channelwise=True, inplace=True)
if sampler_mode in {"standard", "resample"} and guide_inversion_y0_inv is None:
guide_inversion_y0_inv = NS.noise_sampler(sigma=NS.sigma_max, sigma_next=NS.sigma_min).to(x)
guide_inversion_y0_inv = normalize_zscore(guide_inversion_y0_inv, channelwise=True, inplace=True)
state_info_out['guide_inversion_y0'] = guide_inversion_y0
state_info_out['guide_inversion_y0_inv'] = guide_inversion_y0_inv
state_info_out['raw_x'] = x.to('cpu')
state_info_out['denoised'] = denoised.to('cpu')
state_info_out['data_prev_'] = data_prev_.to('cpu')
state_info_out['end_step'] = step
state_info_out['sigma_next'] = sigma_next.clone()
state_info_out['sigmas'] = sigmas_scheduled.clone()
state_info_out['sampler_mode'] = sampler_mode
state_info_out['last_rng'] = NS.noise_sampler .generator.get_state().clone()
state_info_out['last_rng_substep'] = NS.noise_sampler2.generator.get_state().clone()
state_info_out['completed'] = step == len(sigmas)-2 and sigmas[-1] == 0 and sigmas[-2] == NS.sigma_min
state_info_out['FLOW_STARTED'] = FLOW_STARTED
state_info_out['FLOW_STOPPED'] = FLOW_STOPPED
state_info_out['noise_bongflow'] = noise_bongflow
state_info_out['y0_bongflow'] = y0_bongflow
state_info_out['y0_bongflow_orig'] = y0_bongflow_orig
state_info_out['y0_standard_guide'] = y0_standard_guide
state_info_out['y0_inv_standard_guide'] = y0_inv_standard_guide
state_info_out['data_prev_y_'] = data_prev_y_
state_info_out['data_prev_x_'] = data_prev_x_
if noise_initial is not None:
state_info_out['noise_initial'] = noise_initial.to('cpu')
if image_initial is not None:
state_info_out['image_initial'] = image_initial.to('cpu')
if FLOW_STARTED and not FLOW_STOPPED:
state_info_out['y0'] = y0.to('cpu')
#state_info_out['y0_inv'] = y0_inv.to('cpu') # TODO: implement this?
state_info_out['data_cached'] = data_cached.to('cpu')
state_info_out['data_x_prev_'] = data_x_prev_.to('cpu')
return x
def noise_fn(x, sigma, sigma_next, noise_sampler, cossim_iter=1):
noise = normalize_zscore(noise_sampler(sigma=sigma, sigma_next=sigma_next), channelwise=True, inplace=True)
cossim = get_pearson_similarity(x, noise)
for i in range(cossim_iter):
noise_new = normalize_zscore(noise_sampler(sigma=sigma, sigma_next=sigma_next), channelwise=True, inplace=True)
cossim_new = get_pearson_similarity(x, noise_new)
if cossim_new > cossim:
noise = noise_new
cossim = cossim_new
return noise
def preview_callback(
x : Tensor,
eps : Tensor,
denoised : Tensor,
x_ : Tensor,
eps_ : Tensor,
data_ : Tensor,
step : int,
sigma : Tensor,
sigma_next : Tensor,
callback : Callable,
EO : ExtraOptions,
preview_override : Optional[Tensor] = None,
FLOW_STOPPED : bool = False):
if EO("eps_substep_preview"):
row_callback = EO("eps_substep_preview", 0)
denoised_callback = eps_[row_callback]
elif EO("denoised_substep_preview"):
row_callback = EO("denoised_substep_preview", 0)
denoised_callback = data_[row_callback]
elif EO("x_substep_preview"):
row_callback = EO("x_substep_preview", 0)
denoised_callback = x_[row_callback]
elif EO("eps_preview"):
denoised_callback = eps
elif EO("denoised_preview"):
denoised_callback = denoised
elif EO("x_preview"):
denoised_callback = x
elif preview_override is not None and FLOW_STOPPED == False:
denoised_callback = preview_override
else:
denoised_callback = data_[0]
callback({'x': x, 'i': step, 'sigma': sigma, 'sigma_next': sigma_next, 'denoised': denoised_callback.to(torch.float32)}) if callback is not None else None
return
================================================
FILE: beta/samplers.py
================================================
import torch
import torch.nn.functional as F
from torch import Tensor
from typing import Optional, Callable, Tuple, Dict, Any, Union
import copy
import gc
import comfy.samplers
import comfy.sample
import comfy.sampler_helpers
import comfy.model_sampling
import comfy.latent_formats
import comfy.sd
import comfy.supported_models
import comfy.utils
import comfy.nested_tensor
from comfy.samplers import CFGGuider, sampling_function
import latent_preview
from ..helper import initialize_or_scale, get_res4lyf_scheduler_list, OptionsManager, ExtraOptions
from ..res4lyf import RESplain
from ..latents import normalize_zscore, get_orthogonal
from ..sigmas import get_sigmas
#import ..models # import ReFluxPatcher
from .constants import MAX_STEPS, IMPLICIT_TYPE_NAMES
from .noise_classes import NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_NAMES_SIMPLE, NOISE_GENERATOR_NAMES
from .rk_noise_sampler_beta import NOISE_MODE_NAMES
from .rk_coefficients_beta import get_default_sampler_name, get_sampler_name_list, process_sampler_name
def copy_cond(conditioning):
new_conditioning = []
if type(conditioning[0][0]) == list:
for i in range(len(conditioning)):
new_conditioning_i = []
for embedding, cond in conditioning[i]:
cond_copy = {}
for k, v in cond.items():
if isinstance(v, torch.Tensor):
cond_copy[k] = v.clone()
else:
cond_copy[k] = v # ensure we're not copying huge shit like controlnets
new_conditioning_i.append([embedding.clone(), cond_copy])
new_conditioning.append(new_conditioning_i)
else:
for embedding, cond in conditioning:
cond_copy = {}
for k, v in cond.items():
if isinstance(v, torch.Tensor):
cond_copy[k] = v.clone()
else:
cond_copy[k] = v # ensure we're not copying huge shit like controlnets
new_conditioning.append([embedding.clone(), cond_copy])
return new_conditioning
def generate_init_noise(x, seed, noise_type_init, noise_stdev, noise_mean, noise_normalize,
sigma_max, sigma_min, alpha_init=None, k_init=None, EO=None):
if noise_type_init == "none" or noise_stdev == 0.0:
return torch.zeros_like(x)
noise_sampler_init = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_type_init)(
x=x, seed=seed, sigma_max=sigma_max, sigma_min=sigma_min
)
if noise_type_init == "fractal":
noise_sampler_init.alpha = alpha_init
noise_sampler_init.k = k_init
noise_sampler_init.scale = 0.1
noise = noise_sampler_init(sigma=sigma_max * noise_stdev, sigma_next=sigma_min)
if noise_normalize and noise.std() > 0:
channelwise = EO("init_noise_normalize_channelwise", "true") if EO else "true"
channelwise = True if channelwise == "true" else False
noise = normalize_zscore(noise, channelwise=channelwise, inplace=True)
noise *= noise_stdev
noise = (noise - noise.mean()) + noise_mean
return noise
class SharkGuider(CFGGuider):
def __init__(self, model_patcher):
super().__init__(model_patcher)
self.cfgs = {}
def set_conds(self, **kwargs):
self.inner_set_conds(kwargs)
def set_cfgs(self, **kwargs):
self.cfgs = {**kwargs}
self.cfg = self.cfgs.get('xt', self.cfg)
def predict_noise(self, x, timestep, model_options={}, seed=None):
latent_type = model_options['transformer_options'].get('latent_type', 'xt')
positive = self.conds.get(f'{latent_type}_positive', self.conds.get('xt_positive'))
negative = self.conds.get(f'{latent_type}_negative', self.conds.get('xt_negative'))
positive = self.conds.get('xt_positive') if positive is None else positive
negative = self.conds.get('xt_negative') if negative is None else negative
cfg = self.cfgs.get(latent_type, self.cfg)
model_options['transformer_options']['yt_positive'] = self.conds.get('yt_positive')
model_options['transformer_options']['yt_negative'] = self.conds.get('yt_negative')
return sampling_function(self.inner_model, x, timestep, negative, positive, cfg, model_options=model_options, seed=seed)
class SharkSampler:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_stdev": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }),
"noise_seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}),
"sampler_mode": (['unsample', 'standard', 'resample'], {"default": "standard"}),
"scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},),
"steps": ("INT", {"default": 30, "min": 1, "max": 10000.0}),
"denoise": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01}),
"denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01}),
"cfg": ("FLOAT", {"default": 5.5, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Negative values use channelwise CFG." }),
},
"optional": {
"model": ("MODEL",),
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"sampler": ("SAMPLER", ),
"sigmas": ("SIGMAS", ),
"latent_image": ("LATENT", ),
"extra_options": ("STRING", {"default": "", "multiline": True}),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("LATENT",
"LATENT",
"LATENT",)
RETURN_NAMES = ("output",
"denoised",
"sde_noise",)
FUNCTION = "main"
CATEGORY = "RES4LYF/samplers"
EXPERIMENTAL = True
def main(self,
model = None,
cfg : float = 5.5,
scheduler : str = "beta57",
steps : int = 30,
steps_to_run : int = -1,
sampler_mode : str = "standard",
denoise : float = 1.0,
denoise_alt : float = 1.0,
noise_type_init : str = "gaussian",
latent_image : Optional[dict[Tensor]] = None,
positive = None,
negative = None,
sampler = None,
sigmas : Optional[Tensor] = None,
noise_stdev : float = 1.0,
noise_mean : float = 0.0,
noise_normalize : bool = True,
d_noise : float = 1.0,
alpha_init : float = -1.0,
k_init : float = 1.0,
cfgpp : float = 0.0,
noise_seed : int = -1,
options = None,
sde_noise = None,
sde_noise_steps : int = 1,
rebounds : int = 0,
unsample_cfg : float = 1.0,
unsample_eta : float = 0.5,
unsampler_name : str = "none",
unsample_steps_to_run : int = -1,
eta_decay_scale : float = 1.0,
#ultracascade_stage : str = "stage_UP",
ultracascade_latent_image : Optional[dict[str,Any]] = None,
ultracascade_guide_weights: Optional[Tuple] = None,
ultracascade_latent_width : int = 0,
ultracascade_latent_height: int = 0,
extra_options : str = "",
**kwargs,
):
disable_pbar = not comfy.utils.PROGRESS_BAR_ENABLED
# INIT EXTENDABLE OPTIONS INPUTS
options_mgr = OptionsManager(options, **kwargs)
extra_options += "\n" + options_mgr.get('extra_options', "")
EO = ExtraOptions(extra_options)
default_dtype = EO("default_dtype", torch.float64)
default_device = EO("work_device", "cuda" if torch.cuda.is_available() else "cpu")
noise_stdev = options_mgr.get('noise_init_stdev', noise_stdev)
noise_mean = options_mgr.get('noise_init_mean', noise_mean)
noise_type_init = options_mgr.get('noise_type_init', noise_type_init)
d_noise = options_mgr.get('d_noise', d_noise)
alpha_init = options_mgr.get('alpha_init', alpha_init)
k_init = options_mgr.get('k_init', k_init)
sde_noise = options_mgr.get('sde_noise', sde_noise)
sde_noise_steps = options_mgr.get('sde_noise_steps', sde_noise_steps)
rebounds = options_mgr.get('rebounds', rebounds)
unsample_cfg = options_mgr.get('unsample_cfg', unsample_cfg)
unsample_eta = options_mgr.get('unsample_eta', unsample_eta)
unsampler_name = options_mgr.get('unsampler_name', unsampler_name)
unsample_steps_to_run = options_mgr.get('unsample_steps_to_run', unsample_steps_to_run)
eta_decay_scale = options_mgr.get('eta_decay_scale', eta_decay_scale)
start_at_step = options_mgr.get('start_at_step', -1)
tile_sizes = options_mgr.get('tile_sizes', None)
flow_sync_eps = options_mgr.get('flow_sync_eps', 0.0)
unsampler_name, _ = process_sampler_name(unsampler_name)
#ultracascade_stage = options_mgr.get('ultracascade_stage', ultracascade_stage)
ultracascade_latent_image = options_mgr.get('ultracascade_latent_image', ultracascade_latent_image)
ultracascade_latent_width = options_mgr.get('ultracascade_latent_width', ultracascade_latent_width)
ultracascade_latent_height = options_mgr.get('ultracascade_latent_height', ultracascade_latent_height)
if 'BONGMATH' in sampler.extra_options:
sampler.extra_options['start_at_step'] = start_at_step
sampler.extra_options['tile_sizes'] = tile_sizes
sampler.extra_options['unsample_bongmath'] = options_mgr.get('unsample_bongmath', sampler.extra_options['BONGMATH']) # allow turning off bongmath for unsampling with cycles
sampler.extra_options['flow_sync_eps'] = flow_sync_eps
is_chained = False
if latent_image is not None:
if 'positive' in latent_image and positive is None:
positive = copy_cond(latent_image['positive'])
if positive is not None and 'control' in positive[0][1]:
for i in range(len(positive)):
positive[i][1]['control'] = latent_image['positive'][i][1]['control']
if hasattr(latent_image['positive'][i][1]['control'], 'base'):
positive[i][1]['control'].base = latent_image['positive'][i][1]['control'].base
is_chained = True
if 'negative' in latent_image and negative is None:
negative = copy_cond(latent_image['negative'])
if negative is not None and 'control' in negative[0][1]:
for i in range(len(negative)):
negative[i][1]['control'] = latent_image['negative'][i][1]['control']
if hasattr(latent_image['negative'][i][1]['control'], 'base'):
negative[i][1]['control'].base = latent_image['negative'][i][1]['control'].base
is_chained = True
if 'sampler' in latent_image and sampler is None:
sampler = copy_cond(latent_image['sampler']) #.clone()
is_chained = True
if 'steps_to_run' in sampler.extra_options:
sampler.extra_options['steps_to_run'] = steps_to_run
guider_input = options_mgr.get('guider', None)
if guider_input is not None and is_chained is False:
guider = guider_input
work_model = guider.model_patcher
RESplain("Shark: Using model from ClownOptions_GuiderInput: ", guider.model_patcher.model.diffusion_model.__class__.__name__)
RESplain("SharkWarning: \"flow\" guide mode does not work with ClownOptions_GuiderInput")
if hasattr(guider, 'cfg') and guider.cfg is not None:
cfg = guider.cfg
RESplain("Shark: Using cfg from ClownOptions_GuiderInput: ", cfg)
if hasattr(guider, 'original_conds') and guider.original_conds is not None:
if 'positive' in guider.original_conds:
first_ = guider.original_conds['positive'][0]['cross_attn']
second_ = {k: v for k, v in guider.original_conds['positive'][0].items() if k != 'cross_attn'}
positive = [[first_, second_],]
RESplain("Shark: Using positive cond from ClownOptions_GuiderInput")
if 'negative' in guider.original_conds:
first_ = guider.original_conds['negative'][0]['cross_attn']
second_ = {k: v for k, v in guider.original_conds['negative'][0].items() if k != 'cross_attn'}
negative = [[first_, second_],]
RESplain("Shark: Using negative cond from ClownOptions_GuiderInput")
else:
guider = None
work_model = model#.clone()
if latent_image is not None:
latent_image['samples'] = comfy.sample.fix_empty_latent_channels(work_model, latent_image['samples'])
if positive is None or negative is None:
from ..conditioning import EmptyConditioningGenerator
EmptyCondGen = EmptyConditioningGenerator(work_model)
positive, negative = EmptyCondGen.zero_none_conditionings_([positive, negative])
if cfg < 0:
sampler.extra_options['cfg_cw'] = -cfg
cfg = 1.0
else:
sampler.extra_options.pop("cfg_cw", None)
is_nested_input = latent_image is not None and 'samples' in latent_image and isinstance(latent_image['samples'], comfy.nested_tensor.NestedTensor)
if not EO("disable_dummy_sampler_init") and not is_nested_input:
sampler_null = comfy.samplers.ksampler("rk_beta",
{
"sampler_mode": "NULL",
})
if latent_image is not None and 'samples' in latent_image:
latent_vram_factor = EO("latent_vram_factor", 3)
x_null = torch.zeros_like(latent_image['samples']).repeat_interleave(latent_vram_factor, dim=-1)
elif ultracascade_latent_height * ultracascade_latent_width > 0:
x_null = comfy.sample.fix_empty_latent_channels(model, torch.zeros((1,16,ultracascade_latent_height,ultracascade_latent_width)))
else:
print("Fallback: spawning dummy 1,16,256,256 latent.")
x_null = comfy.sample.fix_empty_latent_channels(model, torch.zeros((1,16,256,256)))
_ = comfy.sample.sample_custom(work_model, x_null, cfg, sampler_null, torch.linspace(1, 0, 10).to(x_null.dtype).to(x_null.device), negative, negative, x_null, noise_mask=None, callback=None, disable_pbar=disable_pbar, seed=noise_seed)
sigma_min = work_model.get_model_object('model_sampling').sigma_min
sigma_max = work_model.get_model_object('model_sampling').sigma_max
if sampler is None:
raise ValueError("sampler is required")
else:
sampler = copy.deepcopy(sampler)
# INIT SIGMAS
if sigmas is not None:
sigmas = sigmas.clone().to(dtype=default_dtype, device=default_device) # does this type carry into clown after passing through comfy?
sigmas *= denoise # ... otherwise we have to interpolate and that might not be ideal for tiny custom schedules...
else:
sigmas = get_sigmas(work_model, scheduler, steps, abs(denoise)).to(dtype=default_dtype, device=default_device)
sigmas *= denoise_alt
# USE NULL FLOATS AS "FLAGS" TO PREVENT COMFY NOISE ADDITION
if sampler_mode.startswith("unsample"):
null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype)
sigmas = torch.flip(sigmas, dims=[0])
sigmas = torch.cat([sigmas, null])
elif sampler_mode.startswith("resample"):
null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype)
sigmas = torch.cat([null, sigmas])
sigmas = torch.cat([sigmas, null])
latent_x = {}
# INIT STATE INFO FOR CONTINUING GENERATION ACROSS MULTIPLE SAMPLER NODES
if latent_image is not None:
samples = latent_image['samples']
latent_x['samples'] = samples._copy() if isinstance(samples, comfy.nested_tensor.NestedTensor) else samples.clone()
if 'noise_mask' in latent_image:
noise_mask = latent_image['noise_mask']
latent_x['noise_mask'] = noise_mask._copy() if isinstance(noise_mask, comfy.nested_tensor.NestedTensor) else noise_mask.clone()
state_info = copy.deepcopy(latent_image['state_info']) if 'state_info' in latent_image else {}
else:
state_info = {}
state_info_out = {}
# SETUP CONDITIONING EMBEDS
pos_cond = copy_cond(positive)
neg_cond = copy_cond(negative)
# SETUP FOR ULTRACASCADE IF DETECTED
if work_model.model.model_config.unet_config.get('stable_cascade_stage') == 'up':
ultracascade_guide_weight = EO("ultracascade_guide_weight", 0.0)
ultracascade_guide_type = EO("ultracascade_guide_type", "residual")
x_lr = None
if ultracascade_latent_height * ultracascade_latent_width > 0:
x_lr = latent_image['samples'].clone() if latent_image is not None else None
x_lr_bs = 1 if x_lr is None else x_lr.shape[-4]
x_lr_dtype = default_dtype if x_lr is None else x_lr.dtype
x_lr_device = 'cuda' if x_lr is None else x_lr.device
ultracascade_stage_up_upscale_align_corners = EO("ultracascade_stage_up_upscale_align_corners", False)
ultracascade_stage_up_upscale_mode = EO("ultracascade_stage_up_upscale_mode", "bicubic")
latent_x['samples'] = torch.zeros([x_lr_bs, 16, ultracascade_latent_height, ultracascade_latent_width], dtype=x_lr_dtype, device=x_lr_device)
data_prev_ = state_info.get('data_prev_')
if EO("ultracascade_stage_up_preserve_data_prev") and data_prev_ is not None:
data_prev_ = data_prev_.squeeze(1)
if data_prev_.dim() == 4:
data_prev_ = F.interpolate(
data_prev_,
size=latent_x['samples'].shape[-2:],
mode=ultracascade_stage_up_upscale_mode,
align_corners=ultracascade_stage_up_upscale_align_corners
)
else:
print("data_prev_ upscale failed.")
state_info['data_prev_'] = data_prev_.unsqueeze(1)
else:
state_info['data_prev_'] = data_prev_ #None # = None was leading to errors even with sampler_mode=standard due to below with = state_info['data_prev_'][batch_num]
if x_lr is not None:
if x_lr.shape[-2:] != latent_image['samples'].shape[-2:]:
x_height, x_width = latent_image['samples'].shape[-2:]
ultracascade_stage_up_upscale_align_corners = EO("ultracascade_stage_up_upscale_align_corners", False)
ultracascade_stage_up_upscale_mode = EO("ultracascade_stage_up_upscale_mode", "bicubic")
x_lr = F.interpolate(x_lr, size=(x_height, x_width), mode=ultracascade_stage_up_upscale_mode, align_corners=ultracascade_stage_up_upscale_align_corners)
ultracascade_guide_weights = initialize_or_scale(ultracascade_guide_weights, ultracascade_guide_weight, MAX_STEPS)
patch = work_model.model_options.get("transformer_options", {}).get("patches_replace", {}).get("ultracascade", {}).get("main")
if patch is not None:
patch.update(x_lr=x_lr, guide_weights=ultracascade_guide_weights, guide_type=ultracascade_guide_type)
else:
work_model.model.diffusion_model.set_sigmas_schedule(sigmas_schedule = sigmas)
work_model.model.diffusion_model.set_sigmas_prev (sigmas_prev = sigmas[:1])
work_model.model.diffusion_model.set_guide_weights (guide_weights = ultracascade_guide_weights)
work_model.model.diffusion_model.set_guide_type (guide_type = ultracascade_guide_type)
work_model.model.diffusion_model.set_x_lr (x_lr = x_lr)
elif work_model.model.model_config.unet_config.get('stable_cascade_stage') == 'b':
#if sampler_mode != "resample":
# state_info['data_prev_'] = None #commented out as it was throwing an error below with = state_info['data_prev_'][batch_num]
c_pos, c_neg = [], []
for t in pos_cond:
d_pos = t[1].copy()
d_neg = t[1].copy()
x_lr = None
if ultracascade_latent_height * ultracascade_latent_width > 0:
x_lr = latent_image['samples'].clone()
latent_x['samples'] = torch.zeros([x_lr.shape[-4], 4, ultracascade_latent_height // 4, ultracascade_latent_width // 4], dtype=x_lr.dtype, device=x_lr.device)
d_pos['stable_cascade_prior'] = x_lr
pooled_output = d_neg.get("pooled_output", None)
if pooled_output is not None:
d_neg["pooled_output"] = torch.zeros_like(pooled_output)
c_pos.append( [t[0], d_pos])
c_neg.append([torch.zeros_like(t[0]), d_neg])
pos_cond = c_pos
neg_cond = c_neg
elif ultracascade_latent_height * ultracascade_latent_width > 0:
latent_x['samples'] = torch.zeros([1, 16, ultracascade_latent_height, ultracascade_latent_width], dtype=default_dtype, device=sigmas.device)
# NOISE, ORTHOGONALIZE, OR ZERO EMBEDS
if pos_cond is None or neg_cond is None:
from ..conditioning import EmptyConditioningGenerator
EmptyCondGen = EmptyConditioningGenerator(work_model)
pos_cond, neg_cond = EmptyCondGen.zero_none_conditionings_([pos_cond, neg_cond])
if EO(("cond_noise", "uncond_noise")):
if noise_seed == -1:
cond_seed = torch.initial_seed() + 1
else:
cond_seed = noise_seed
t5_seed = EO("t5_seed" , cond_seed)
clip_seed = EO("clip_seed" , cond_seed+1)
t5_noise_type = EO("t5_noise_type" , "gaussian")
clip_noise_type = EO("clip_noise_type" , "gaussian")
t5_noise_sigma_max = EO("t5_noise_sigma_max" , "gaussian")
t5_noise_sigma_min = EO("t5_noise_sigma_min" , "gaussian")
clip_noise_sigma_max = EO("clip_noise_sigma_max", "gaussian")
clip_noise_sigma_min = EO("clip_noise_sigma_min", "gaussian")
noise_sampler_t5 = NOISE_GENERATOR_CLASSES_SIMPLE.get( t5_noise_type)(x=pos_cond[0][0], seed= t5_seed, sigma_max= t5_noise_sigma_max, sigma_min= t5_noise_sigma_min, )
noise_sampler_clip = NOISE_GENERATOR_CLASSES_SIMPLE.get(clip_noise_type)(x=pos_cond[0][1]['pooled_output'], seed=clip_seed, sigma_max=clip_noise_sigma_max, sigma_min=clip_noise_sigma_min, )
t5_noise_scale = EO("t5_noise_scale", 1.0)
clip_noise_scale = EO("clip_noise_scale", 1.0)
if EO("cond_noise"):
t5_noise = noise_sampler_t5 (sigma= t5_noise_sigma_max, sigma_next= t5_noise_sigma_min)
clip_noise = noise_sampler_clip(sigma=clip_noise_sigma_max, sigma_next=clip_noise_sigma_min)
pos_cond[0][0] = pos_cond[0][0] + t5_noise_scale * (t5_noise - pos_cond[0][0])
pos_cond[0][1]['pooled_output'] = pos_cond[0][1]['pooled_output'] + clip_noise_scale * (clip_noise - pos_cond[0][1]['pooled_output'])
if EO("uncond_noise"):
t5_noise = noise_sampler_t5 (sigma= t5_noise_sigma_max, sigma_next= t5_noise_sigma_min)
clip_noise = noise_sampler_clip(sigma=clip_noise_sigma_max, sigma_next=clip_noise_sigma_min)
neg_cond[0][0] = neg_cond[0][0] + t5_noise_scale * (t5_noise - neg_cond[0][0])
neg_cond[0][1]['pooled_output'] = neg_cond[0][1]['pooled_output'] + clip_noise_scale * (clip_noise - neg_cond[0][1]['pooled_output'])
if EO("uncond_ortho"):
neg_cond[0][0] = get_orthogonal(neg_cond[0][0], pos_cond[0][0])
neg_cond[0][1]['pooled_output'] = get_orthogonal(neg_cond[0][1]['pooled_output'], pos_cond[0][1]['pooled_output'])
if "noise_seed" in sampler.extra_options:
if sampler.extra_options['noise_seed'] == -1 and noise_seed != -1:
sampler.extra_options['noise_seed'] = noise_seed + 1
RESplain("Shark: setting clown noise seed to: ", sampler.extra_options['noise_seed'], debug=True)
if "sampler_mode" in sampler.extra_options:
sampler.extra_options['sampler_mode'] = sampler_mode
if "extra_options" in sampler.extra_options:
extra_options += "\n"
extra_options += sampler.extra_options['extra_options']
sampler.extra_options['extra_options'] = extra_options
samples = latent_x['samples']
latent_image_batch = {"samples": samples._copy() if isinstance(samples, comfy.nested_tensor.NestedTensor) else samples.clone()}
if 'noise_mask' in latent_x and latent_x['noise_mask'] is not None:
noise_mask = latent_x['noise_mask']
latent_image_batch['noise_mask'] = noise_mask._copy() if isinstance(noise_mask, comfy.nested_tensor.NestedTensor) else noise_mask.clone()
if EO("no_batch_loop"):
x = latent_image_batch['samples'].to(default_dtype)
if isinstance(x, comfy.nested_tensor.NestedTensor):
noise = comfy.nested_tensor.NestedTensor([
generate_init_noise(
x=t.clone(), seed=noise_seed + idx,
noise_type_init=noise_type_init, noise_stdev=noise_stdev,
noise_mean=noise_mean, noise_normalize=noise_normalize,
sigma_max=sigma_max, sigma_min=sigma_min,
alpha_init=alpha_init, k_init=k_init, EO=EO
)
for idx, t in enumerate(x.unbind())
])
else:
noise = generate_init_noise(
x=x.clone(), seed=noise_seed,
noise_type_init=noise_type_init, noise_stdev=noise_stdev,
noise_mean=noise_mean, noise_normalize=noise_normalize,
sigma_max=sigma_max, sigma_min=sigma_min,
alpha_init=alpha_init, k_init=k_init, EO=EO
)
if guider is None:
guider = SharkGuider(work_model)
flow_cond = options_mgr.get('flow_cond', {})
if flow_cond and 'yt_positive' in flow_cond:
if 'yt_inv_positive' not in flow_cond:
guider.set_conds(yt_positive=flow_cond.get('yt_positive'),
yt_negative=flow_cond.get('yt_negative'))
guider.set_cfgs(yt=flow_cond.get('yt_cfg'), xt=cfg)
else:
guider.set_conds(yt_positive=flow_cond.get('yt_positive'),
yt_negative=flow_cond.get('yt_negative'),
yt_inv_positive=flow_cond.get('yt_inv_positive'),
yt_inv_negative=flow_cond.get('yt_inv_negative'))
guider.set_cfgs(yt=flow_cond.get('yt_cfg'),
yt_inv=flow_cond.get('yt_inv_cfg'), xt=cfg)
else:
guider.set_cfgs(xt=cfg)
guider.set_conds(xt_positive=pos_cond, xt_negative=neg_cond)
elif type(guider) == SharkGuider:
guider.set_cfgs(xt=cfg)
guider.set_conds(xt_positive=pos_cond, xt_negative=neg_cond)
else:
try:
guider.set_cfg(cfg)
guider.set_conds(pos_cond, neg_cond)
except:
pass
if latent_image is not None and 'state_info' in latent_image and 'sigmas' in latent_image['state_info']:
steps_len = max(sigmas.shape[-1] - 1, latent_image['state_info']['sigmas'].shape[-1] - 1)
else:
steps_len = sigmas.shape[-1] - 1
x0_output = {}
try:
callback = latent_preview.prepare_callback(work_model, steps_len, x0_output,
shape=x.shape if hasattr(x, 'is_nested') and x.is_nested else None)
except TypeError:
callback = latent_preview.prepare_callback(work_model, steps_len, x0_output)
noise_mask = latent_image_batch.get("noise_mask", None)
if noise_mask is not None:
stored_image = state_info.get('image_initial')
x_initial = stored_image if stored_image is not None else x
stored_noise = state_info.get('noise_initial')
noise_initial = stored_noise if stored_noise is not None else noise
else:
x_initial = x
noise_initial = noise
state_info_out = {}
if 'BONGMATH' in sampler.extra_options:
sampler.extra_options['state_info'] = state_info
sampler.extra_options['state_info_out'] = state_info_out
sampler.extra_options['image_initial'] = x_initial
sampler.extra_options['noise_initial'] = noise_initial
if rebounds > 0:
cfgs_cached = guider.cfgs
steps_to_run_cached = sampler.extra_options['steps_to_run']
eta_cached = sampler.extra_options['eta']
eta_substep_cached = sampler.extra_options['eta_substep']
etas_cached = sampler.extra_options['etas'].clone()
etas_substep_cached = sampler.extra_options['etas_substep'].clone()
unsample_etas = torch.full_like(etas_cached, unsample_eta)
rk_type_cached = sampler.extra_options['rk_type']
if sampler.extra_options['sampler_mode'] == "unsample":
guider.cfgs = {
'xt': unsample_cfg,
'yt': unsample_cfg,
}
if unsample_eta != -1.0:
sampler.extra_options['eta_substep'] = unsample_eta
sampler.extra_options['eta'] = unsample_eta
sampler.extra_options['etas_substep'] = unsample_etas
sampler.extra_options['etas'] = unsample_etas
if unsampler_name != "none":
sampler.extra_options['rk_type'] = unsampler_name
if unsample_steps_to_run > -1:
sampler.extra_options['steps_to_run'] = unsample_steps_to_run
else:
guider.cfgs = cfgs_cached
guider.cfgs = cfgs_cached
sampler.extra_options['steps_to_run'] = steps_to_run_cached
eta_decay = eta_cached
eta_substep_decay = eta_substep_cached
unsample_eta_decay = unsample_eta
etas_decay = etas_cached
etas_substep_decay = etas_substep_cached
unsample_etas_decay = unsample_etas
if isinstance(x, comfy.nested_tensor.NestedTensor):
samples = guider.sample(noise, x._copy(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed)
else:
samples = guider.sample(noise, x.clone(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed)
if rebounds > 0:
noise_seed_cached = sampler.extra_options['noise_seed']
cfgs_cached = guider.cfgs
sampler_mode_cached = sampler.extra_options['sampler_mode']
for restarts_iter in range(rebounds):
sampler.extra_options['state_info'] = sampler.extra_options['state_info_out']
sigmas = sampler.extra_options['state_info_out']['sigmas'] if sigmas is None else sigmas
if sampler.extra_options['sampler_mode'] == "standard":
sampler.extra_options['sampler_mode'] = "unsample"
elif sampler.extra_options['sampler_mode'] == "unsample":
sampler.extra_options['sampler_mode'] = "resample"
elif sampler.extra_options['sampler_mode'] == "resample":
sampler.extra_options['sampler_mode'] = "unsample"
sampler.extra_options['noise_seed'] = -1
if sampler.extra_options['sampler_mode'] == "unsample":
guider.cfgs = {
'xt': unsample_cfg,
'yt': unsample_cfg,
}
if unsample_eta != -1.0:
sampler.extra_options['eta_substep'] = unsample_eta_decay
sampler.extra_options['eta'] = unsample_eta_decay
sampler.extra_options['etas_substep'] = unsample_etas
sampler.extra_options['etas'] = unsample_etas
else:
sampler.extra_options['eta_substep'] = eta_substep_decay
sampler.extra_options['eta'] = eta_decay
sampler.extra_options['etas_substep'] = etas_substep_decay
sampler.extra_options['etas'] = etas_decay
if unsampler_name != "none":
sampler.extra_options['rk_type'] = unsampler_name
if unsample_steps_to_run > -1:
sampler.extra_options['steps_to_run'] = unsample_steps_to_run
else:
guider.cfgs = cfgs_cached
sampler.extra_options['eta_substep'] = eta_substep_decay
sampler.extra_options['eta'] = eta_decay
sampler.extra_options['etas_substep'] = etas_substep_decay
sampler.extra_options['etas'] = etas_decay
sampler.extra_options['rk_type'] = rk_type_cached
sampler.extra_options['steps_to_run'] = steps_to_run_cached
samples = guider.sample(noise, samples.clone(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=-1)
eta_substep_decay *= eta_decay_scale
eta_decay *= eta_decay_scale
unsample_eta_decay *= eta_decay_scale
etas_substep_decay *= eta_decay_scale
etas_decay *= eta_decay_scale
unsample_etas_decay *= eta_decay_scale
sampler.extra_options['noise_seed'] = noise_seed_cached
guider.cfgs = cfgs_cached
sampler.extra_options['sampler_mode'] = sampler_mode_cached
sampler.extra_options['eta_substep'] = eta_substep_cached
sampler.extra_options['eta'] = eta_cached
sampler.extra_options['etas_substep'] = etas_substep_cached
sampler.extra_options['etas'] = etas_cached
if noise_mask is not None:
if hasattr(samples, 'is_nested') and samples.is_nested:
blended = []
x_initial_list = x_initial.unbind() if hasattr(x_initial, 'is_nested') and x_initial.is_nested else [x_initial]
if hasattr(noise_mask, 'is_nested') and noise_mask.is_nested:
mask_list = noise_mask.unbind()
else:
mask_list = [noise_mask]
for idx, s in enumerate(samples.unbind()):
xi = x_initial_list[idx] if idx < len(x_initial_list) else x_initial_list[0]
m = mask_list[idx] if idx < len(mask_list) else mask_list[0]
if s.ndim == m.ndim:
reshaped_mask = comfy.utils.reshape_mask(m, s.shape).to(s.device)
blended.append(s * reshaped_mask + xi.to(s.device) * (1.0 - reshaped_mask))
else:
blended.append(s)
samples = comfy.nested_tensor.NestedTensor(blended)
else:
if hasattr(noise_mask, 'is_nested') and noise_mask.is_nested:
noise_mask = noise_mask.unbind()[0]
reshaped_mask = comfy.utils.reshape_mask(noise_mask, samples.shape).to(samples.device)
samples = samples * reshaped_mask + x_initial.to(samples.device) * (1.0 - reshaped_mask)
samples = samples.to(comfy.model_management.intermediate_device())
out = latent_x.copy()
out["samples"] = samples
if "x0" in x0_output:
x0_out = work_model.model.process_latent_out(x0_output["x0"].cpu())
if hasattr(samples, 'is_nested') and samples.is_nested:
latent_shapes = [t.shape for t in samples.unbind()]
x0_out = comfy.nested_tensor.NestedTensor(
comfy.utils.unpack_latents(x0_out, latent_shapes)
)
out_denoised = latent_x.copy()
out_denoised["samples"] = x0_out
else:
out_denoised = out
out['positive'] = positive
out['negative'] = negative
out['model'] = work_model
out['sampler'] = sampler
if noise_mask is not None:
state_info_out['image_initial'] = x_initial
state_info_out['noise_initial'] = noise_initial
out['state_info'] = state_info_out
return (out, out_denoised, None)
out_samples = []
out_denoised_samples = []
out_state_info = []
for batch_num in range(latent_image_batch['samples'].shape[0]):
latent_unbatch = copy.deepcopy(latent_x)
if isinstance(latent_image_batch['samples'][batch_num], comfy.nested_tensor.NestedTensor):
latent_unbatch['samples'] = latent_image_batch['samples'][batch_num]._copy()
else:
latent_unbatch['samples'] = latent_image_batch['samples'][batch_num].clone().unsqueeze(0)
if 'BONGMATH' in sampler.extra_options:
sampler.extra_options['batch_num'] = batch_num
if noise_seed == -1 and sampler_mode in {"unsample", "resample"}:
if latent_image.get('state_info', {}).get('last_rng', None) is not None:
seed = torch.initial_seed() + batch_num
else:
seed = torch.initial_seed() + 1 + batch_num
else:
if EO("lock_batch_seed"):
seed = noise_seed
else:
seed = noise_seed + batch_num
torch .manual_seed(seed)
torch.cuda.manual_seed(seed)
if hasattr(latent_unbatch["samples"], 'is_nested') and latent_unbatch["samples"].is_nested:
x = latent_unbatch["samples"]._copy().to(default_dtype)
else:
x = latent_unbatch["samples"].clone().to(default_dtype) # does this type carry into clown after passing through comfy?
if sde_noise is None and sampler_mode.startswith("unsample"):
sde_noise = []
else:
sde_noise_steps = 1
for total_steps_iter in range (sde_noise_steps):
if noise_type_init != "none" and noise_stdev != 0.0:
RESplain("Initial latent noise seed: ", seed, debug=True)
noise = generate_init_noise(
x=x, seed=seed,
noise_type_init=noise_type_init, noise_stdev=noise_stdev,
noise_mean=noise_mean, noise_normalize=noise_normalize,
sigma_max=sigma_max, sigma_min=sigma_min,
alpha_init=alpha_init, k_init=k_init, EO=EO
)
noise_mask = latent_unbatch["noise_mask"] if "noise_mask" in latent_unbatch else None
x_input = x
if noise_mask is not None and 'noise_initial' in state_info:
stored_noise = state_info.get('noise_initial')
if stored_noise is not None:
if stored_noise.dim() > 3 and stored_noise.shape[0] > batch_num:
stored_noise = stored_noise[batch_num]
if stored_noise.shape == noise.shape:
noise = stored_noise.to(noise.device, dtype=noise.dtype)
RESplain("Using stored noise_initial from previous sampler", debug=True)
stored_image = state_info.get('image_initial')
if stored_image is not None:
if stored_image.dim() > 3 and stored_image.shape[0] > batch_num:
stored_image = stored_image[batch_num]
if stored_image.shape == x.shape:
x_input = stored_image.to(x.device, dtype=x.dtype)
RESplain("Using stored image_initial from previous sampler", debug=True)
if 'BONGMATH' in sampler.extra_options:
sampler.extra_options['noise_initial'] = noise
sampler.extra_options['image_initial'] = x_input
x0_output = {}
if latent_image is not None and 'state_info' in latent_image and 'sigmas' in latent_image['state_info']:
steps_len = max(sigmas.shape[-1] - 1, latent_image['state_info']['sigmas'].shape[-1]-1)
else:
steps_len = sigmas.shape[-1]-1
callback = latent_preview.prepare_callback(work_model, steps_len, x0_output)
if 'BONGMATH' in sampler.extra_options: # verify the sampler is rk_sampler_beta()
sampler.extra_options['state_info'] = copy.deepcopy(state_info) ##############################
if state_info != {} and state_info != {'data_prev_': None}: #second condition is for ultracascade
sampler.extra_options['state_info']['raw_x'] = state_info['raw_x'] [batch_num]
sampler.extra_options['state_info']['data_prev_'] = state_info['data_prev_'] [batch_num]
sampler.extra_options['state_info']['last_rng'] = state_info['last_rng'] [batch_num]
sampler.extra_options['state_info']['last_rng_substep'] = state_info['last_rng_substep'][batch_num]
if 'image_initial' in state_info and state_info['image_initial'].dim() > 3:
sampler.extra_options['state_info']['image_initial'] = state_info['image_initial'][batch_num]
if 'noise_initial' in state_info and state_info['noise_initial'].dim() > 3:
sampler.extra_options['state_info']['noise_initial'] = state_info['noise_initial'][batch_num]
#state_info = copy.deepcopy(latent_image['state_info']) if 'state_info' in latent_image else {}
state_info_out = {}
sampler.extra_options['state_info_out'] = state_info_out
if type(pos_cond[0][0]) == list:
pos_cond_tmp = pos_cond[batch_num]
positive_tmp = positive[batch_num]
else:
pos_cond_tmp = pos_cond
positive_tmp = positive
for i in range(len(neg_cond)): # crude fix for copy.deepcopy converting superclass into real object
if 'control' in neg_cond[i][1]:
neg_cond[i][1]['control'] = negative[i][1]['control']
if hasattr(negative[i][1]['control'], 'base'):
neg_cond[i][1]['control'].base = negative[i][1]['control'].base
for i in range(len(pos_cond_tmp)): # crude fix for copy.deepcopy converting superclass into real object
if 'control' in pos_cond_tmp[i][1]:
pos_cond_tmp[i][1]['control'] = positive_tmp[i][1]['control']
if hasattr(positive[i][1]['control'], 'base'):
pos_cond_tmp[i][1]['control'].base = positive_tmp[i][1]['control'].base
# SETUP REGIONAL COND
if pos_cond_tmp[0][1] is not None:
if 'callback_regional' in pos_cond_tmp[0][1]:
pos_cond_tmp = pos_cond_tmp[0][1]['callback_regional'](work_model)
if 'AttnMask' in pos_cond_tmp[0][1]:
sampler.extra_options['AttnMask'] = pos_cond_tmp[0][1]['AttnMask']
sampler.extra_options['RegContext'] = pos_cond_tmp[0][1]['RegContext']
sampler.extra_options['RegParam'] = pos_cond_tmp[0][1]['RegParam']
if isinstance(model.model.model_config, (comfy.supported_models.SDXL, comfy.supported_models.SD15)):
latent_up_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] * 2, latent_image['samples'].shape[-1] * 2), mode="nearest")
sampler.extra_options['AttnMask'].set_latent(latent_up_dummy)
sampler.extra_options['AttnMask'].generate()
sampler.extra_options['AttnMask'].mask_up = sampler.extra_options['AttnMask'].attn_mask.mask
latent_down_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] // 2, latent_image['samples'].shape[-1] // 2), mode="nearest")
sampler.extra_options['AttnMask'].set_latent(latent_down_dummy)
sampler.extra_options['AttnMask'].generate()
sampler.extra_options['AttnMask'].mask_down = sampler.extra_options['AttnMask'].attn_mask.mask
if isinstance(model.model.model_config, comfy.supported_models.SD15):
latent_down_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] // 4, latent_image['samples'].shape[-1] // 4), mode="nearest")
sampler.extra_options['AttnMask'].set_latent(latent_down_dummy)
sampler.extra_options['AttnMask'].generate()
sampler.extra_options['AttnMask'].mask_down2 = sampler.extra_options['AttnMask'].attn_mask.mask
if isinstance(model.model.model_config, (comfy.supported_models.Stable_Cascade_C)):
latent_up_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] * 2, latent_image['samples'].shape[-1] * 2), mode="nearest")
sampler.extra_options['AttnMask'].set_latent(latent_up_dummy)
# cascade concats 4 + 4 tokens (clip_text_pooled, clip_img)
sampler.extra_options['AttnMask'].context_lens = [context_len + 8 for context_len in sampler.extra_options['AttnMask'].context_lens]
sampler.extra_options['AttnMask'].text_len = sum(sampler.extra_options['AttnMask'].context_lens)
else:
sampler.extra_options['AttnMask'].set_latent(latent_image['samples'])
sampler.extra_options['AttnMask'].generate()
if neg_cond[0][1] is not None:
if 'callback_regional' in neg_cond[0][1]:
neg_cond = neg_cond[0][1]['callback_regional'](work_model)
if 'AttnMask' in neg_cond[0][1]:
sampler.extra_options['AttnMask_neg'] = neg_cond[0][1]['AttnMask']
sampler.extra_options['RegContext_neg'] = neg_cond[0][1]['RegContext']
sampler.extra_options['RegParam_neg'] = neg_cond[0][1]['RegParam']
if isinstance(model.model.model_config, (comfy.supported_models.SDXL, comfy.supported_models.SD15)):
latent_up_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] * 2, latent_image['samples'].shape[-1] * 2), mode="nearest")
sampler.extra_options['AttnMask_neg'].set_latent(latent_up_dummy)
sampler.extra_options['AttnMask_neg'].generate()
sampler.extra_options['AttnMask_neg'].mask_up = sampler.extra_options['AttnMask_neg'].attn_mask.mask
latent_down_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] // 2, latent_image['samples'].shape[-1] // 2), mode="nearest")
sampler.extra_options['AttnMask_neg'].set_latent(latent_down_dummy)
sampler.extra_options['AttnMask_neg'].generate()
sampler.extra_options['AttnMask_neg'].mask_down = sampler.extra_options['AttnMask_neg'].attn_mask.mask
if isinstance(model.model.model_config, comfy.supported_models.SD15):
latent_down_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] // 4, latent_image['samples'].shape[-1] // 4), mode="nearest")
sampler.extra_options['AttnMask_neg'].set_latent(latent_down_dummy)
sampler.extra_options['AttnMask_neg'].generate()
sampler.extra_options['AttnMask_neg'].mask_down2 = sampler.extra_options['AttnMask_neg'].attn_mask.mask
if isinstance(model.model.model_config, (comfy.supported_models.Stable_Cascade_C)):
latent_up_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] * 2, latent_image['samples'].shape[-1] * 2), mode="nearest")
sampler.extra_options['AttnMask'].set_latent(latent_up_dummy)
# cascade concats 4 + 4 tokens (clip_text_pooled, clip_img)
sampler.extra_options['AttnMask'].context_lens = [context_len + 8 for context_len in sampler.extra_options['AttnMask'].context_lens]
sampler.extra_options['AttnMask'].text_len = sum(sampler.extra_options['AttnMask'].context_lens)
else:
sampler.extra_options['AttnMask_neg'].set_latent(latent_image['samples'])
sampler.extra_options['AttnMask_neg'].generate()
if guider is None:
guider = SharkGuider(work_model)
flow_cond = options_mgr.get('flow_cond', {})
if flow_cond != {} and 'yt_positive' in flow_cond and not 'yt_inv_positive' in flow_cond: #and not 'yt_inv;_positive' in flow_cond: # typo???
guider.set_conds(yt_positive=flow_cond.get('yt_positive'), yt_negative=flow_cond.get('yt_negative'),)
guider.set_cfgs(yt=flow_cond.get('yt_cfg'), xt=cfg)
elif flow_cond != {} and 'yt_positive' in flow_cond and 'yt_inv_positive' in flow_cond:
guider.set_conds(yt_positive=flow_cond.get('yt_positive'), yt_negative=flow_cond.get('yt_negative'), yt_inv_positive=flow_cond.get('yt_inv_positive'), yt_inv_negative=flow_cond.get('yt_inv_negative'),)
guider.set_cfgs(yt=flow_cond.get('yt_cfg'), yt_inv=flow_cond.get('yt_inv_cfg'), xt=cfg)
else:
guider.set_cfgs(xt=cfg)
guider.set_conds(xt_positive=pos_cond_tmp, xt_negative=neg_cond)
elif type(guider) == SharkGuider:
guider.set_cfgs(xt=cfg)
guider.set_conds(xt_positive=pos_cond_tmp, xt_negative=neg_cond)
else:
try:
guider.set_cfg(cfg)
except:
RESplain("SharkWarning: guider.set_cfg failed but assuming cfg already set correctly.")
try:
guider.set_conds(pos_cond_tmp, neg_cond)
except:
RESplain("SharkWarning: guider.set_conds failed but assuming conds already set correctly.")
if rebounds > 0:
cfgs_cached = guider.cfgs
steps_to_run_cached = sampler.extra_options['steps_to_run']
eta_cached = sampler.extra_options['eta']
eta_substep_cached = sampler.extra_options['eta_substep']
etas_cached = sampler.extra_options['etas'].clone()
etas_substep_cached = sampler.extra_options['etas_substep'].clone()
unsample_etas = torch.full_like(etas_cached, unsample_eta)
rk_type_cached = sampler.extra_options['rk_type']
if sampler.extra_options['sampler_mode'] == "unsample":
guider.cfgs = {
'xt': unsample_cfg,
'yt': unsample_cfg,
}
if unsample_eta != -1.0:
sampler.extra_options['eta_substep'] = unsample_eta
sampler.extra_options['eta'] = unsample_eta
sampler.extra_options['etas_substep'] = unsample_etas
sampler.extra_options['etas'] = unsample_etas
if unsampler_name != "none":
sampler.extra_options['rk_type'] = unsampler_name
if unsample_steps_to_run > -1:
sampler.extra_options['steps_to_run'] = unsample_steps_to_run
else:
guider.cfgs = cfgs_cached
guider.cfgs = cfgs_cached
sampler.extra_options['steps_to_run'] = steps_to_run_cached
eta_decay = eta_cached
eta_substep_decay = eta_substep_cached
unsample_eta_decay = unsample_eta
etas_decay = etas_cached
etas_substep_decay = etas_substep_cached
unsample_etas_decay = unsample_etas
if isinstance(x_input, comfy.nested_tensor.NestedTensor):
samples = guider.sample(noise, x_input._copy(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed)
else:
samples = guider.sample(noise, x_input.clone(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed)
if rebounds > 0:
noise_seed_cached = sampler.extra_options['noise_seed']
cfgs_cached = guider.cfgs
sampler_mode_cached = sampler.extra_options['sampler_mode']
for restarts_iter in range(rebounds):
sampler.extra_options['state_info'] = sampler.extra_options['state_info_out']
#steps = sampler.extra_options['state_info_out']['sigmas'].shape[-1] - 3
sigmas = sampler.extra_options['state_info_out']['sigmas'] if sigmas is None else sigmas
#if len(sigmas) > 2 and sigmas[1] < sigmas[2] and sampler.extra_options['state_info_out']['sampler_mode'] == "unsample": # and sampler_mode == "resample":
# sigmas = torch.flip(sigmas, dims=[0])
if sampler.extra_options['sampler_mode'] == "standard":
sampler.extra_options['sampler_mode'] = "unsample"
elif sampler.extra_options['sampler_mode'] == "unsample":
sampler.extra_options['sampler_mode'] = "resample"
elif sampler.extra_options['sampler_mode'] == "resample":
sampler.extra_options['sampler_mode'] = "unsample"
sampler.extra_options['noise_seed'] = -1
if sampler.extra_options['sampler_mode'] == "unsample":
guider.cfgs = {
'xt': unsample_cfg,
'yt': unsample_cfg,
}
if unsample_eta != -1.0:
sampler.extra_options['eta_substep'] = unsample_eta_decay
sampler.extra_options['eta'] = unsample_eta_decay
sampler.extra_options['etas_substep'] = unsample_etas
sampler.extra_options['etas'] = unsample_etas
else:
sampler.extra_options['eta_substep'] = eta_substep_decay
sampler.extra_options['eta'] = eta_decay
sampler.extra_options['etas_substep'] = etas_substep_decay
sampler.extra_options['etas'] = etas_decay
if unsampler_name != "none":
sampler.extra_options['rk_type'] = unsampler_name
if unsample_steps_to_run > -1:
sampler.extra_options['steps_to_run'] = unsample_steps_to_run
else:
guider.cfgs = cfgs_cached
sampler.extra_options['eta_substep'] = eta_substep_decay
sampler.extra_options['eta'] = eta_decay
sampler.extra_options['etas_substep'] = etas_substep_decay
sampler.extra_options['etas'] = etas_decay
sampler.extra_options['rk_type'] = rk_type_cached
sampler.extra_options['steps_to_run'] = steps_to_run_cached
samples = guider.sample(noise, samples.clone(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=-1)
eta_substep_decay *= eta_decay_scale
eta_decay *= eta_decay_scale
unsample_eta_decay *= eta_decay_scale
etas_substep_decay *= eta_decay_scale
etas_decay *= eta_decay_scale
unsample_etas_decay *= eta_decay_scale
sampler.extra_options['noise_seed'] = noise_seed_cached
guider.cfgs = cfgs_cached
sampler.extra_options['sampler_mode'] = sampler_mode_cached
sampler.extra_options['eta_substep'] = eta_substep_cached
sampler.extra_options['eta'] = eta_cached
sampler.extra_options['etas_substep'] = etas_substep_cached
sampler.extra_options['etas'] = etas_cached
sampler.extra_options['rk_type'] = rk_type_cached
sampler.extra_options['steps_to_run'] = steps_to_run_cached # TODO: verify this is carried on
if noise_mask is not None:
if 'BONGMATH' in sampler.extra_options:
batch_state_info = sampler.extra_options.get('state_info', {})
latent_for_mask = batch_state_info.get('image_initial', x)
else:
stored_image = state_info.get('image_initial')
if stored_image is not None and stored_image.dim() > 3:
latent_for_mask = stored_image[batch_num]
elif stored_image is not None:
latent_for_mask = stored_image
else:
latent_for_mask = x
reshaped_mask = comfy.utils.reshape_mask(noise_mask, samples.shape).to(samples.device)
samples = samples * reshaped_mask + latent_for_mask.to(samples.device) * (1.0 - reshaped_mask)
out = latent_unbatch.copy()
out["samples"] = samples
if "x0" in x0_output:
out_denoised = latent_unbatch.copy()
out_denoised["samples"] = work_model.model.process_latent_out(x0_output["x0"].cpu())
else:
out_denoised = out
out_samples .append(out ["samples"])
out_denoised_samples.append(out_denoised["samples"])
# ACCUMULATE UNSAMPLED SDE NOISE
if total_steps_iter > 1:
if 'raw_x' in state_info_out:
sde_noise_out = state_info_out['raw_x']
else:
sde_noise_out = out["samples"]
sde_noise.append(normalize_zscore(sde_noise_out, channelwise=True, inplace=True))
out_state_info.append(state_info_out)
# INCREMENT BATCH LOOP
if not EO("lock_batch_seed"):
seed += 1
if latent_image is not None: #needed for ultracascade, where latent_image input is not really used for stage C/first stage
if latent_image.get('state_info', {}).get('last_rng', None) is None:
torch.manual_seed(seed)
gc.collect()
# STACK SDE NOISES, SAVE STATE INFO
state_info_out = out_state_info[0]
if 'raw_x' in out_state_info[0]:
state_info_out['raw_x'] = torch.stack([out_state_info[_]['raw_x'] for _ in range(len(out_state_info))])
state_info_out['data_prev_'] = torch.stack([out_state_info[_]['data_prev_'] for _ in range(len(out_state_info))])
state_info_out['last_rng'] = torch.stack([out_state_info[_]['last_rng'] for _ in range(len(out_state_info))])
state_info_out['last_rng_substep'] = torch.stack([out_state_info[_]['last_rng_substep'] for _ in range(len(out_state_info))])
if 'image_initial' in out_state_info[0]:
state_info_out['image_initial'] = torch.stack([out_state_info[_]['image_initial'] for _ in range(len(out_state_info))])
if 'noise_initial' in out_state_info[0]:
state_info_out['noise_initial'] = torch.stack([out_state_info[_]['noise_initial'] for _ in range(len(out_state_info))])
elif 'raw_x' in state_info:
state_info_out = state_info
out_samples = [tensor.squeeze(0) for tensor in out_samples]
out_denoised_samples = [tensor.squeeze(0) for tensor in out_denoised_samples]
out ['samples'] = torch.stack(out_samples, dim=0)
out_denoised['samples'] = torch.stack(out_denoised_samples, dim=0)
out['state_info'] = copy.deepcopy(state_info_out)
state_info = {}
out['positive'] = positive
out['negative'] = negative
out['model'] = work_model#.clone()
out['sampler'] = sampler
return (out, out_denoised, sde_noise,)
class SharkSampler_Beta:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},),
"steps": ("INT", {"default": 30, "min": 1, "max": 10000.0}),
"steps_to_run": ("INT", {"default": -1, "min": -1, "max": MAX_STEPS}),
"denoise": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01}),
"cfg": ("FLOAT", {"default": 5.5, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Negative values use channelwise CFG." }),
"seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}),
"sampler_mode": (['unsample', 'standard', 'resample'], {"default": "standard"}),
},
"optional": {
"model": ("MODEL",),
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"sampler": ("SAMPLER", ),
"sigmas": ("SIGMAS", ),
"latent_image": ("LATENT", ),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("LATENT",
"LATENT",
"OPTIONS",)
RETURN_NAMES = ("output",
"denoised",
"options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/samplers"
def main(self,
model = None,
cfg : float = 5.5,
scheduler : str = "beta57",
steps : int = 30,
steps_to_run : int = -1,
sampler_mode : str = "standard",
denoise : float = 1.0,
denoise_alt : float = 1.0,
noise_type_init : str = "gaussian",
latent_image : Optional[dict[Tensor]] = None,
positive = None,
negative = None,
sampler = None,
sigmas : Optional[Tensor] = None,
noise_stdev : float = 1.0,
noise_mean : float = 0.0,
noise_normalize : bool = True,
d_noise : float = 1.0,
alpha_init : float = -1.0,
k_init : float = 1.0,
cfgpp : float = 0.0,
seed : int = -1,
options = None,
sde_noise = None,
sde_noise_steps : int = 1,
extra_options : str = "",
**kwargs,
):
options_mgr = OptionsManager(options, **kwargs)
if denoise < 0:
denoise_alt = -denoise
denoise = 1.0
#if 'steps_to_run' in sampler.extra_options:
# sampler.extra_options['steps_to_run'] = steps_to_run
if 'positive' in latent_image and positive is None:
positive = latent_image['positive']
if 'negative' in latent_image and negative is None:
negative = latent_image['negative']
if 'sampler' in latent_image and sampler is None:
sampler = latent_image['sampler']
if 'model' in latent_image and model is None:
model = latent_image['model']
#if model.model.model_config.unet_config.get('stable_cascade_stage') == 'b':
# if 'noise_type_sde' in sampler.extra_options:
# noise_type_sde = "pyramid-cascade_B"
# noise_type_sde_substep = "pyramid-cascade_B"
output, denoised, sde_noise = SharkSampler().main(
model = model,
cfg = cfg,
scheduler = scheduler,
steps = steps,
steps_to_run = steps_to_run,
denoise = denoise,
latent_image = latent_image,
positive = positive,
negative = negative,
sampler = sampler,
cfgpp = cfgpp,
noise_seed = seed,
options = options,
sde_noise = sde_noise,
sde_noise_steps = sde_noise_steps,
noise_type_init = noise_type_init,
noise_stdev = noise_stdev,
sampler_mode = sampler_mode,
denoise_alt = denoise_alt,
sigmas = sigmas,
extra_options = extra_options)
return (output, denoised,options_mgr.as_dict())
class SharkChainsampler_Beta(SharkSampler_Beta):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"steps_to_run": ("INT", {"default": -1, "min": -1, "max": MAX_STEPS}),
"cfg": ("FLOAT", {"default": 5.5, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Negative values use channelwise CFG." }),
"sampler_mode": (['unsample', 'resample'], {"default": "resample"}),
},
"optional": {
"model": ("MODEL",),
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"sampler": ("SAMPLER", ),
"sigmas": ("SIGMAS", ),
"latent_image": ("LATENT", ),
"options": ("OPTIONS", ),
}
}
def main(self,
model = None,
steps_to_run = -1,
cfg = 5.5,
latent_image = None,
sigmas = None,
sampler_mode = "",
seed : int = -1,
**kwargs):
steps = latent_image['state_info']['sigmas'].shape[-1] - 3
sigmas = latent_image['state_info']['sigmas'] if sigmas is None else sigmas
if len(sigmas) > 2 and sigmas[1] < sigmas[2] and latent_image['state_info']['sampler_mode'] == "unsample" and sampler_mode == "resample":
sigmas = torch.flip(sigmas, dims=[0])
return super().main(model=model, sampler_mode=sampler_mode, steps_to_run=steps_to_run, sigmas=sigmas, steps=steps, cfg=cfg, seed=seed, latent_image=latent_image, **kwargs)
class ClownSamplerAdvanced_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_type_sde_substep": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"noise_mode_sde_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"overshoot_mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How step size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"overshoot_mode_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How substep size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"eta_substep": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"overshoot": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising step, then rescale to match the original. Has a softening effect."}),
"overshoot_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising substep, then rescale to match the original. Has a softening effect."}),
"noise_scaling_weight": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}),
"noise_boost_step": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}),
"noise_boost_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}),
"noise_anchor": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Typically set to between 1.0 and 0.0. Lower values cerate a grittier, more detailed image."}),
"s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}),
"s_noise_substep": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}),
"d_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Downscales the sigma schedule. Values around 0.98-0.95 can lead to a large boost in detail and paint textures."}),
"momentum": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Accelerate convergence with positive values when sampling, negative values when unsampling."}),
"noise_seed_sde": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}),
"sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}),
"implicit_type": (IMPLICIT_TYPE_NAMES, {"default": "predictor-corrector"}),
"implicit_type_substeps": (IMPLICIT_TYPE_NAMES, {"default": "predictor-corrector"}),
"implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}),
"implicit_substeps": ("INT", {"default": 0, "min": 0, "max": 10000}),
"bongmath": ("BOOLEAN", {"default": True}),
},
"optional":
{
"guides": ("GUIDES", ),
"automation": ("AUTOMATION", ),
"extra_options": ("STRING", {"default": "", "multiline": True}),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("SAMPLER",)
RETURN_NAMES = ("sampler", )
FUNCTION = "main"
CATEGORY = "RES4LYF/samplers"
EXPERIMENTAL = True
def main(self,
noise_type_sde : str = "gaussian",
noise_type_sde_substep : str = "gaussian",
noise_mode_sde : str = "hard",
overshoot_mode : str = "hard",
overshoot_mode_substep : str = "hard",
eta : float = 0.5,
eta_substep : float = 0.5,
momentum : float = 0.0,
noise_scaling_weight : float = 0.0,
noise_scaling_type : str = "sampler",
noise_scaling_mode : str = "linear",
noise_scaling_eta : float = 0.0,
noise_scaling_cycles : int = 1,
noise_scaling_weights : Optional[Tensor] = None,
noise_scaling_etas : Optional[Tensor] = None,
noise_boost_step : float = 0.0,
noise_boost_substep : float = 0.0,
noise_boost_normalize : bool = True,
noise_anchor : float = 1.0,
s_noise : float = 1.0,
s_noise_substep : float = 1.0,
d_noise : float = 1.0,
d_noise_start_step : int = 0,
d_noise_inv : float = 1.0,
d_noise_inv_start_step : int = 0,
alpha_sde : float = -1.0,
k_sde : float = 1.0,
cfgpp : float = 0.0,
c1 : float = 0.0,
c2 : float = 0.5,
c3 : float = 1.0,
noise_seed_sde : int = -1,
sampler_name : str = "res_2m",
implicit_sampler_name : str = "gauss-legendre_2s",
implicit_substeps : int = 0,
implicit_steps : int = 0,
rescale_floor : bool = True,
sigmas_override : Optional[Tensor] = None,
guides = None,
options = None,
sde_noise = None,
sde_noise_steps : int = 1,
extra_options : str = "",
automation = None,
etas : Optional[Tensor] = None,
etas_substep : Optional[Tensor] = None,
s_noises : Optional[Tensor] = None,
s_noises_substep : Optional[Tensor] = None,
epsilon_scales : Optional[Tensor] = None,
regional_conditioning_weights : Optional[Tensor] = None,
frame_weights_mgr = None,
noise_mode_sde_substep : str = "hard",
overshoot : float = 0.0,
overshoot_substep : float = 0.0,
bongmath : bool = True,
implicit_type : str = "predictor-corrector",
implicit_type_substeps : str = "predictor-corrector",
rk_swap_step : int = MAX_STEPS,
rk_swap_print : bool = False,
rk_swap_threshold : float = 0.0,
rk_swap_type : str = "",
steps_to_run : int = -1,
sde_mask : Optional[Tensor] = None,
**kwargs,
):
options_mgr = OptionsManager(options, **kwargs)
extra_options += "\n" + options_mgr.get('extra_options', "")
EO = ExtraOptions(extra_options)
default_dtype = EO("default_dtype", torch.float64)
sampler_name, implicit_sampler_name = process_sampler_name(sampler_name)
implicit_steps_diag = implicit_substeps
implicit_steps_full = implicit_steps
if noise_mode_sde == "none":
eta = 0.0
noise_mode_sde = "hard"
noise_type_sde = options_mgr.get('noise_type_sde' , noise_type_sde)
noise_mode_sde = options_mgr.get('noise_mode_sde' , noise_mode_sde)
eta = options_mgr.get('eta' , eta)
eta_substep = options_mgr.get('eta_substep' , eta_substep)
noise_scaling_weight = options_mgr.get('noise_scaling_weight' , noise_scaling_weight)
noise_scaling_type = options_mgr.get('noise_scaling_type' , noise_scaling_type)
noise_scaling_mode = options_mgr.get('noise_scaling_mode' , noise_scaling_mode)
noise_scaling_eta = options_mgr.get('noise_scaling_eta' , noise_scaling_eta)
noise_scaling_cycles = options_mgr.get('noise_scaling_cycles' , noise_scaling_cycles)
noise_scaling_weights = options_mgr.get('noise_scaling_weights' , noise_scaling_weights)
noise_scaling_etas = options_mgr.get('noise_scaling_etas' , noise_scaling_etas)
noise_boost_step = options_mgr.get('noise_boost_step' , noise_boost_step)
noise_boost_substep = options_mgr.get('noise_boost_substep' , noise_boost_substep)
noise_boost_normalize = options_mgr.get('noise_boost_normalize' , noise_boost_normalize)
noise_anchor = options_mgr.get('noise_anchor' , noise_anchor)
s_noise = options_mgr.get('s_noise' , s_noise)
s_noise_substep = options_mgr.get('s_noise_substep' , s_noise_substep)
d_noise = options_mgr.get('d_noise' , d_noise)
d_noise_start_step = options_mgr.get('d_noise_start_step' , d_noise_start_step)
d_noise_inv = options_mgr.get('d_noise_inv' , d_noise_inv)
d_noise_inv_start_step = options_mgr.get('d_noise_inv_start_step', d_noise_inv_start_step)
alpha_sde = options_mgr.get('alpha_sde' , alpha_sde)
k_sde = options_mgr.get('k_sde' , k_sde)
c1 = options_mgr.get('c1' , c1)
c2 = options_mgr.get('c2' , c2)
c3 = options_mgr.get('c3' , c3)
frame_weights_mgr = options_mgr.get('frame_weights_mgr', frame_weights_mgr)
sde_noise = options_mgr.get('sde_noise' , sde_noise)
sde_noise_steps = options_mgr.get('sde_noise_steps' , sde_noise_steps)
rk_swap_step = options_mgr.get('rk_swap_step' , rk_swap_step)
rk_swap_print = options_mgr.get('rk_swap_print' , rk_swap_print)
rk_swap_threshold = options_mgr.get('rk_swap_threshold', rk_swap_threshold)
rk_swap_type = options_mgr.get('rk_swap_type' , rk_swap_type)
steps_to_run = options_mgr.get('steps_to_run' , steps_to_run)
noise_seed_sde = options_mgr.get('noise_seed_sde' , noise_seed_sde)
momentum = options_mgr.get('momentum' , momentum)
sde_mask = options_mgr.get('sde_mask' , sde_mask)
rescale_floor = EO("rescale_floor")
if automation is not None:
etas = automation['etas'] if 'etas' in automation else None
etas_substep = automation['etas_substep'] if 'etas_substep' in automation else None
s_noises = automation['s_noises'] if 's_noises' in automation else None
s_noises_substep = automation['s_noises_substep'] if 's_noises_substep' in automation else None
epsilon_scales = automation['epsilon_scales'] if 'epsilon_scales' in automation else None
frame_weights_mgr = automation['frame_weights_mgr'] if 'frame_weights_mgr' in automation else None
etas = options_mgr.get('etas', etas)
etas_substep = options_mgr.get('etas_substep', etas_substep)
s_noises = options_mgr.get('s_noises', s_noises)
s_noises_substep = options_mgr.get('s_noises_substep', s_noises_substep)
etas = initialize_or_scale(etas, eta, MAX_STEPS).to(default_dtype)
etas_substep = initialize_or_scale(etas_substep, eta_substep, MAX_STEPS).to(default_dtype)
s_noises = initialize_or_scale(s_noises, s_noise, MAX_STEPS).to(default_dtype)
s_noises_substep = initialize_or_scale(s_noises_substep, s_noise_substep, MAX_STEPS).to(default_dtype)
etas = F.pad(etas, (0, MAX_STEPS), value=0.0)
etas_substep = F.pad(etas_substep, (0, MAX_STEPS), value=0.0)
s_noises = F.pad(s_noises, (0, MAX_STEPS), value=1.0)
s_noises_substep = F.pad(s_noises_substep, (0, MAX_STEPS), value=1.0)
if sde_noise is None:
sde_noise = []
else:
sde_noise = copy.deepcopy(sde_noise)
sde_noise = normalize_zscore(sde_noise, channelwise=True, inplace=True)
sampler = comfy.samplers.ksampler("rk_beta",
{
"eta" : eta,
"eta_substep" : eta_substep,
"alpha" : alpha_sde,
"k" : k_sde,
"c1" : c1,
"c2" : c2,
"c3" : c3,
"cfgpp" : cfgpp,
"noise_sampler_type" : noise_type_sde,
"noise_sampler_type_substep" : noise_type_sde_substep,
"noise_mode_sde" : noise_mode_sde,
"noise_seed" : noise_seed_sde,
"rk_type" : sampler_name,
"implicit_sampler_name" : implicit_sampler_name,
"implicit_steps_diag" : implicit_steps_diag,
"implicit_steps_full" : implicit_steps_full,
"LGW_MASK_RESCALE_MIN" : rescale_floor,
"sigmas_override" : sigmas_override,
"sde_noise" : sde_noise,
"extra_options" : extra_options,
"sampler_mode" : "standard",
"etas" : etas,
"etas_substep" : etas_substep,
"s_noises" : s_noises,
"s_noises_substep" : s_noises_substep,
"epsilon_scales" : epsilon_scales,
"regional_conditioning_weights" : regional_conditioning_weights,
"guides" : guides,
"frame_weights_mgr" : frame_weights_mgr,
"eta_substep" : eta_substep,
"noise_mode_sde_substep" : noise_mode_sde_substep,
"noise_scaling_weight" : noise_scaling_weight,
"noise_scaling_type" : noise_scaling_type,
"noise_scaling_mode" : noise_scaling_mode,
"noise_scaling_eta" : noise_scaling_eta,
"noise_scaling_cycles" : noise_scaling_cycles,
"noise_scaling_weights" : noise_scaling_weights,
"noise_scaling_etas" : noise_scaling_etas,
"noise_boost_step" : noise_boost_step,
"noise_boost_substep" : noise_boost_substep,
"noise_boost_normalize" : noise_boost_normalize,
"noise_anchor" : noise_anchor,
"s_noise" : s_noise,
"s_noise_substep" : s_noise_substep,
"d_noise" : d_noise,
"d_noise_start_step" : d_noise_start_step,
"d_noise_inv" : d_noise_inv,
"d_noise_inv_start_step" : d_noise_inv_start_step,
"overshoot_mode" : overshoot_mode,
"overshoot_mode_substep" : overshoot_mode_substep,
"overshoot" : overshoot,
"overshoot_substep" : overshoot_substep,
"BONGMATH" : bongmath,
"implicit_type" : implicit_type,
"implicit_type_substeps" : implicit_type_substeps,
"rk_swap_step" : rk_swap_step,
"rk_swap_print" : rk_swap_print,
"rk_swap_threshold" : rk_swap_threshold,
"rk_swap_type" : rk_swap_type,
"steps_to_run" : steps_to_run,
"sde_mask" : sde_mask,
"momentum" : momentum,
})
return (sampler, )
class ClownsharKSampler_Beta:
@classmethod
def INPUT_TYPES(cls):
inputs = {"required":
{
"eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"sampler_name": (get_sampler_name_list (), {"default": get_default_sampler_name()}),
"scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},),
"steps": ("INT", {"default": 30, "min": 1, "max": MAX_STEPS}),
"steps_to_run": ("INT", {"default": -1, "min": -1, "max": MAX_STEPS}),
"denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": MAX_STEPS, "step":0.01}),
"cfg": ("FLOAT", {"default": 5.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, }),
"seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}),
"sampler_mode": (['unsample', 'standard', 'resample'], {"default": "standard"}),
"bongmath": ("BOOLEAN", {"default": True}),
},
"optional":
{
"model": ("MODEL",),
"positive": ("CONDITIONING",),
"negative": ("CONDITIONING",),
"latent_image": ("LATENT",),
"sigmas": ("SIGMAS",),
"guides": ("GUIDES",),
"options": ("OPTIONS", {}),
}
}
return inputs
RETURN_TYPES = ("LATENT",
"LATENT",
"OPTIONS",
)
RETURN_NAMES = ("output",
"denoised",
"options",
)
FUNCTION = "main"
CATEGORY = "RES4LYF/samplers"
def main(self,
model = None,
denoise : float = 1.0,
scheduler : str = "beta57",
cfg : float = 1.0,
seed : int = -1,
positive = None,
negative = None,
latent_image : Optional[dict[Tensor]] = None,
steps : int = 30,
steps_to_run : int = -1,
bongmath : bool = True,
sampler_mode : str = "standard",
noise_type_sde : str = "gaussian",
noise_type_sde_substep : str = "gaussian",
noise_mode_sde : str = "hard",
noise_mode_sde_substep : str = "hard",
overshoot_mode : str = "hard",
overshoot_mode_substep : str = "hard",
overshoot : float = 0.0,
overshoot_substep : float = 0.0,
eta : float = 0.5,
eta_substep : float = 0.5,
momentum : float = 0.0,
noise_scaling_weight : float = 0.0,
noise_scaling_type : str = "sampler",
noise_scaling_mode : str = "linear",
noise_scaling_eta : float = 0.0,
noise_scaling_cycles : int = 1,
noise_scaling_weights : Optional[Tensor] = None,
noise_scaling_etas : Optional[Tensor] = None,
noise_boost_step : float = 0.0,
noise_boost_substep : float = 0.0,
noise_boost_normalize : bool = True,
noise_anchor : float = 1.0,
s_noise : float = 1.0,
s_noise_substep : float = 1.0,
d_noise : float = 1.0,
d_noise_start_step : int = 0,
d_noise_inv : float = 1.0,
d_noise_inv_start_step : int = 0,
alpha_sde : float = -1.0,
k_sde : float = 1.0,
cfgpp : float = 0.0,
c1 : float = 0.0,
c2 : float = 0.5,
c3 : float = 1.0,
noise_seed_sde : int = -1,
sampler_name : str = "res_2m",
implicit_sampler_name : str = "use_explicit",
implicit_type : str = "bongmath",
implicit_type_substeps : str = "bongmath",
implicit_steps : int = 0,
implicit_substeps : int = 0,
sigmas : Optional[Tensor] = None,
sigmas_override : Optional[Tensor] = None,
guides = None,
options = None,
sde_noise = None,
sde_noise_steps : int = 1,
extra_options : str = "",
automation = None,
epsilon_scales : Optional[Tensor] = None,
regional_conditioning_weights : Optional[Tensor] = None,
frame_weights_mgr = None,
rescale_floor : bool = True,
rk_swap_step : int = MAX_STEPS,
rk_swap_print : bool = False,
rk_swap_threshold : float = 0.0,
rk_swap_type : str = "",
sde_mask : Optional[Tensor] = None,
#start_at_step : int = 0,
#stop_at_step : int = MAX_STEPS,
**kwargs
):
options_mgr = OptionsManager(options, **kwargs)
extra_options += "\n" + options_mgr.get('extra_options', "")
#if model is None:
# model = latent_image['model']
# defaults for ClownSampler
eta_substep = eta
# defaults for SharkSampler
noise_type_init = "gaussian"
noise_stdev = 1.0
denoise_alt = 1.0
channelwise_cfg = False
if denoise < 0:
denoise_alt = -denoise
denoise = 1.0
is_chained = False
if latent_image is not None and 'positive' in latent_image and positive is None:
positive = latent_image['positive']
is_chained = True
if latent_image is not None and 'negative' in latent_image and negative is None:
negative = latent_image['negative']
is_chained = True
if latent_image is not None and 'model' in latent_image and model is None:
model = latent_image['model']
is_chained = True
guider = options_mgr.get('guider', None)
if is_chained is False and guider is not None:
model = guider.model_patcher
if model.model.model_config.unet_config.get('stable_cascade_stage') == 'b':
noise_type_sde = "pyramid-cascade_B"
noise_type_sde_substep = "pyramid-cascade_B"
#if options is not None:
#options_mgr = OptionsManager(options_inputs)
noise_seed_sde = options_mgr.get('noise_seed_sde' , noise_seed_sde)
noise_type_sde = options_mgr.get('noise_type_sde' , noise_type_sde)
noise_type_sde_substep = options_mgr.get('noise_type_sde_substep', noise_type_sde_substep)
options_mgr.update('noise_type_sde', noise_type_sde)
options_mgr.update('noise_type_sde_substep', noise_type_sde_substep)
noise_mode_sde = options_mgr.get('noise_mode_sde' , noise_mode_sde)
noise_mode_sde_substep = options_mgr.get('noise_mode_sde_substep', noise_mode_sde_substep)
overshoot_mode = options_mgr.get('overshoot_mode' , overshoot_mode)
overshoot_mode_substep = options_mgr.get('overshoot_mode_substep', overshoot_mode_substep)
eta = options_mgr.get('eta' , eta)
eta_substep = options_mgr.get('eta_substep' , eta_substep)
options_mgr.update('eta', eta)
options_mgr.update('eta_substep', eta_substep)
overshoot = options_mgr.get('overshoot' , overshoot)
overshoot_substep = options_mgr.get('overshoot_substep' , overshoot_substep)
noise_scaling_weight = options_mgr.get('noise_scaling_weight' , noise_scaling_weight)
noise_scaling_type = options_mgr.get('noise_scaling_type' , noise_scaling_type)
noise_scaling_mode = options_mgr.get('noise_scaling_mode' , noise_scaling_mode)
noise_scaling_eta = options_mgr.get('noise_scaling_eta' , noise_scaling_eta)
noise_scaling_cycles = options_mgr.get('noise_scaling_cycles' , noise_scaling_cycles)
noise_scaling_weights = options_mgr.get('noise_scaling_weights' , noise_scaling_weights)
noise_scaling_etas = options_mgr.get('noise_scaling_etas' , noise_scaling_etas)
noise_boost_step = options_mgr.get('noise_boost_step' , noise_boost_step)
noise_boost_substep = options_mgr.get('noise_boost_substep' , noise_boost_substep)
noise_boost_normalize = options_mgr.get('noise_boost_normalize' , noise_boost_normalize)
noise_anchor = options_mgr.get('noise_anchor' , noise_anchor)
s_noise = options_mgr.get('s_noise' , s_noise)
s_noise_substep = options_mgr.get('s_noise_substep' , s_noise_substep)
d_noise = options_mgr.get('d_noise' , d_noise)
d_noise_start_step = options_mgr.get('d_noise_start_step' , d_noise_start_step)
d_noise_inv = options_mgr.get('d_noise_inv' , d_noise_inv)
d_noise_inv_start_step = options_mgr.get('d_noise_inv_start_step', d_noise_inv_start_step)
momentum = options_mgr.get('momentum' , momentum)
implicit_type = options_mgr.get('implicit_type' , implicit_type)
implicit_type_substeps = options_mgr.get('implicit_type_substeps', implicit_type_substeps)
implicit_steps = options_mgr.get('implicit_steps' , implicit_steps)
implicit_substeps = options_mgr.get('implicit_substeps' , implicit_substeps)
alpha_sde = options_mgr.get('alpha_sde' , alpha_sde)
k_sde = options_mgr.get('k_sde' , k_sde)
c1 = options_mgr.get('c1' , c1)
c2 = options_mgr.get('c2' , c2)
c3 = options_mgr.get('c3' , c3)
frame_weights_mgr = options_mgr.get('frame_weights_mgr' , frame_weights_mgr)
sde_noise = options_mgr.get('sde_noise' , sde_noise)
sde_noise_steps = options_mgr.get('sde_noise_steps' , sde_noise_steps)
extra_options = options_mgr.get('extra_options' , extra_options)
automation = options_mgr.get('automation' , automation)
# SharkSampler Options
noise_type_init = options_mgr.get('noise_type_init' , noise_type_init)
noise_stdev = options_mgr.get('noise_stdev' , noise_stdev)
sampler_mode = options_mgr.get('sampler_mode' , sampler_mode)
denoise_alt = options_mgr.get('denoise_alt' , denoise_alt)
channelwise_cfg = options_mgr.get('channelwise_cfg' , channelwise_cfg)
options_mgr.update('noise_type_init', noise_type_init)
options_mgr.update('noise_stdev', noise_stdev)
options_mgr.update('denoise_alt', denoise_alt)
#options_mgr.update('channelwise_cfg', channelwise_cfg)
sigmas = options_mgr.get('sigmas' , sigmas)
rk_swap_type = options_mgr.get('rk_swap_type' , rk_swap_type)
rk_swap_step = options_mgr.get('rk_swap_step' , rk_swap_step)
rk_swap_threshold = options_mgr.get('rk_swap_threshold' , rk_swap_threshold)
rk_swap_print = options_mgr.get('rk_swap_print' , rk_swap_print)
sde_mask = options_mgr.get('sde_mask' , sde_mask)
#start_at_step = options_mgr.get('start_at_step' , start_at_step)
#stop_at_ste = options_mgr.get('stop_at_step' , stop_at_step)
if channelwise_cfg: # != 1.0:
cfg = -abs(cfg) # set cfg negative for shark, to flag as cfg_cw
sampler, = ClownSamplerAdvanced_Beta().main(
noise_type_sde = noise_type_sde,
noise_type_sde_substep = noise_type_sde_substep,
noise_mode_sde = noise_mode_sde,
noise_mode_sde_substep = noise_mode_sde_substep,
eta = eta,
eta_substep = eta_substep,
overshoot = overshoot,
overshoot_substep = overshoot_substep,
overshoot_mode = overshoot_mode,
overshoot_mode_substep = overshoot_mode_substep,
momentum = momentum,
alpha_sde = alpha_sde,
k_sde = k_sde,
cfgpp = cfgpp,
c1 = c1,
c2 = c2,
c3 = c3,
sampler_name = sampler_name,
implicit_sampler_name = implicit_sampler_name,
implicit_type = implicit_type,
implicit_type_substeps = implicit_type_substeps,
implicit_steps = implicit_steps,
implicit_substeps = implicit_substeps,
rescale_floor = rescale_floor,
sigmas_override = sigmas_override,
noise_seed_sde = noise_seed_sde,
guides = guides,
options = options_mgr.as_dict(),
extra_options = extra_options,
automation = automation,
noise_scaling_weight = noise_scaling_weight,
noise_scaling_type = noise_scaling_type,
noise_scaling_mode = noise_scaling_mode,
noise_scaling_eta = noise_scaling_eta,
noise_scaling_cycles = noise_scaling_cycles,
noise_scaling_weights = noise_scaling_weights,
noise_scaling_etas = noise_scaling_etas,
noise_boost_step = noise_boost_step,
noise_boost_substep = noise_boost_substep,
noise_boost_normalize = noise_boost_normalize,
noise_anchor = noise_anchor,
s_noise = s_noise,
s_noise_substep = s_noise_substep,
d_noise = d_noise,
d_noise_start_step = d_noise_start_step,
d_noise_inv = d_noise_inv,
d_noise_inv_start_step = d_noise_inv_start_step,
epsilon_scales = epsilon_scales,
regional_conditioning_weights = regional_conditioning_weights,
frame_weights_mgr = frame_weights_mgr,
sde_noise = sde_noise,
sde_noise_steps = sde_noise_steps,
rk_swap_step = rk_swap_step,
rk_swap_print = rk_swap_print,
rk_swap_threshold = rk_swap_threshold,
rk_swap_type = rk_swap_type,
steps_to_run = steps_to_run,
sde_mask = sde_mask,
bongmath = bongmath,
)
output, denoised, sde_noise = SharkSampler().main(
model = model,
cfg = cfg,
scheduler = scheduler,
steps = steps,
steps_to_run = steps_to_run,
denoise = denoise,
latent_image = latent_image,
positive = positive,
negative = negative,
sampler = sampler,
cfgpp = cfgpp,
noise_seed = seed,
options = options_mgr.as_dict(),
sde_noise = sde_noise,
sde_noise_steps = sde_noise_steps,
noise_type_init = noise_type_init,
noise_stdev = noise_stdev,
sampler_mode = sampler_mode,
denoise_alt = denoise_alt,
sigmas = sigmas,
extra_options = extra_options)
return (output, denoised, options_mgr.as_dict(),) # {'model':model,},)
class ClownsharkChainsampler_Beta(ClownsharKSampler_Beta):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}),
"steps_to_run": ("INT", {"default": -1, "min": -1, "max": MAX_STEPS}),
"cfg": ("FLOAT", {"default": 5.5, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Negative values use channelwise CFG." }),
"sampler_mode": (['unsample', 'resample'],{"default": "resample"}),
"bongmath": ("BOOLEAN", {"default": True}),
},
"optional": {
"model": ("MODEL",),
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
#"sampler": ("SAMPLER", ),
"sigmas": ("SIGMAS", ),
"latent_image": ("LATENT", ),
"guides": ("GUIDES", ),
"options": ("OPTIONS", ),
}
}
def main(self,
eta = 0.5,
sampler_name = "res_2m",
steps_to_run = -1,
cfg = 5.5,
bongmath = True,
seed : int = -1,
latent_image = None,
sigmas = None,
sampler_mode = "",
**kwargs):
steps = latent_image['state_info']['sigmas'].shape[-1] - 3
sigmas = latent_image['state_info']['sigmas'] if sigmas is None else sigmas
if len(sigmas) > 2 and sigmas[1] < sigmas[2] and latent_image['state_info']['sampler_mode'] == "unsample" and sampler_mode == "resample":
sigmas = torch.flip(sigmas, dims=[0])
return super().main(eta=eta, sampler_name=sampler_name, sampler_mode=sampler_mode, sigmas=sigmas, steps_to_run=steps_to_run, steps=steps, cfg=cfg, bongmath=bongmath, seed=seed, latent_image=latent_image, **kwargs)
class ClownSampler_Beta:
@classmethod
def INPUT_TYPES(cls):
inputs = {"required":
{
"eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"sampler_name": (get_sampler_name_list (), {"default": get_default_sampler_name()}),
"seed": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}),
"bongmath": ("BOOLEAN", {"default": True}),
},
"optional":
{
"guides": ("GUIDES",),
"options": ("OPTIONS", {}),
}
}
return inputs
RETURN_TYPES = ("SAMPLER",)
RETURN_NAMES = ("sampler",)
FUNCTION = "main"
CATEGORY = "RES4LYF/samplers"
def main(self,
model = None,
denoise : float = 1.0,
scheduler : str = "beta57",
cfg : float = 1.0,
seed : int = -1,
positive = None,
negative = None,
latent_image : Optional[dict[Tensor]] = None,
steps : int = 30,
steps_to_run : int = -1,
bongmath : bool = True,
sampler_mode : str = "standard",
noise_type_sde : str = "gaussian",
noise_type_sde_substep : str = "gaussian",
noise_mode_sde : str = "hard",
noise_mode_sde_substep : str = "hard",
overshoot_mode : str = "hard",
overshoot_mode_substep : str = "hard",
overshoot : float = 0.0,
overshoot_substep : float = 0.0,
eta : float = 0.5,
eta_substep : float = 0.5,
noise_scaling_weight : float = 0.0,
noise_boost_step : float = 0.0,
noise_boost_substep : float = 0.0,
noise_anchor : float = 1.0,
s_noise : float = 1.0,
s_noise_substep : float = 1.0,
d_noise : float = 1.0,
d_noise_start_step : int = 0,
d_noise_inv : float = 1.0,
d_noise_inv_start_step : int = 0,
alpha_sde : float = -1.0,
k_sde : float = 1.0,
cfgpp : float = 0.0,
c1 : float = 0.0,
c2 : float = 0.5,
c3 : float = 1.0,
noise_seed_sde : int = -1,
sampler_name : str = "res_2m",
implicit_sampler_name : str = "use_explicit",
implicit_type : str = "bongmath",
implicit_type_substeps : str = "bongmath",
implicit_steps : int = 0,
implicit_substeps : int = 0,
sigmas : Optional[Tensor] = None,
sigmas_override : Optional[Tensor] = None,
guides = None,
options = None,
sde_noise = None,
sde_noise_steps : int = 1,
extra_options : str = "",
automation = None,
epsilon_scales : Optional[Tensor] = None,
regional_conditioning_weights : Optional[Tensor] = None,
frame_weights_mgr = None,
rescale_floor : bool = True,
rk_swap_step : int = MAX_STEPS,
rk_swap_print : bool = False,
rk_swap_threshold : float = 0.0,
rk_swap_type : str = "",
sde_mask : Optional[Tensor] = None,
#start_at_step : int = 0,
#stop_at_step : int = MAX_STEPS,
**kwargs
):
options_mgr = OptionsManager(options, **kwargs)
extra_options += "\n" + options_mgr.get('extra_options', "")
# defaults for ClownSampler
eta_substep = eta
# defaults for SharkSampler
noise_type_init = "gaussian"
noise_stdev = 1.0
denoise_alt = 1.0
channelwise_cfg = False #1.0
#if options is not None:
#options_mgr = OptionsManager(options_inputs)
noise_type_sde = options_mgr.get('noise_type_sde' , noise_type_sde)
noise_type_sde_substep = options_mgr.get('noise_type_sde_substep', noise_type_sde_substep)
noise_mode_sde = options_mgr.get('noise_mode_sde' , noise_mode_sde)
noise_mode_sde_substep = options_mgr.get('noise_mode_sde_substep', noise_mode_sde_substep)
overshoot_mode = options_mgr.get('overshoot_mode' , overshoot_mode)
overshoot_mode_substep = options_mgr.get('overshoot_mode_substep', overshoot_mode_substep)
eta = options_mgr.get('eta' , eta)
eta_substep = options_mgr.get('eta_substep' , eta_substep)
overshoot = options_mgr.get('overshoot' , overshoot)
overshoot_substep = options_mgr.get('overshoot_substep' , overshoot_substep)
noise_scaling_weight = options_mgr.get('noise_scaling_weight' , noise_scaling_weight)
noise_boost_step = options_mgr.get('noise_boost_step' , noise_boost_step)
noise_boost_substep = options_mgr.get('noise_boost_substep' , noise_boost_substep)
noise_anchor = options_mgr.get('noise_anchor' , noise_anchor)
s_noise = options_mgr.get('s_noise' , s_noise)
s_noise_substep = options_mgr.get('s_noise_substep' , s_noise_substep)
d_noise = options_mgr.get('d_noise' , d_noise)
d_noise_start_step = options_mgr.get('d_noise_start_step' , d_noise_start_step)
d_noise_inv = options_mgr.get('d_noise_inv' , d_noise_inv)
d_noise_inv_start_step = options_mgr.get('d_noise_inv_start_step', d_noise_inv_start_step)
implicit_type = options_mgr.get('implicit_type' , implicit_type)
implicit_type_substeps = options_mgr.get('implicit_type_substeps', implicit_type_substeps)
implicit_steps = options_mgr.get('implicit_steps' , implicit_steps)
implicit_substeps = options_mgr.get('implicit_substeps' , implicit_substeps)
alpha_sde = options_mgr.get('alpha_sde' , alpha_sde)
k_sde = options_mgr.get('k_sde' , k_sde)
c1 = options_mgr.get('c1' , c1)
c2 = options_mgr.get('c2' , c2)
c3 = options_mgr.get('c3' , c3)
frame_weights_mgr = options_mgr.get('frame_weights_mgr' , frame_weights_mgr)
sde_noise = options_mgr.get('sde_noise' , sde_noise)
sde_noise_steps = options_mgr.get('sde_noise_steps' , sde_noise_steps)
extra_options = options_mgr.get('extra_options' , extra_options)
automation = options_mgr.get('automation' , automation)
# SharkSampler Options
noise_type_init = options_mgr.get('noise_type_init' , noise_type_init)
noise_stdev = options_mgr.get('noise_stdev' , noise_stdev)
sampler_mode = options_mgr.get('sampler_mode' , sampler_mode)
denoise_alt = options_mgr.get('denoise_alt' , denoise_alt)
channelwise_cfg = options_mgr.get('channelwise_cfg' , channelwise_cfg)
sigmas = options_mgr.get('sigmas' , sigmas)
rk_swap_type = options_mgr.get('rk_swap_type' , rk_swap_type)
rk_swap_step = options_mgr.get('rk_swap_step' , rk_swap_step)
rk_swap_threshold = options_mgr.get('rk_swap_threshold' , rk_swap_threshold)
rk_swap_print = options_mgr.get('rk_swap_print' , rk_swap_print)
sde_mask = options_mgr.get('sde_mask' , sde_mask)
#start_at_step = options_mgr.get('start_at_step' , start_at_step)
#stop_at_ste = options_mgr.get('stop_at_step' , stop_at_step)
if channelwise_cfg: # != 1.0:
cfg = -abs(cfg) # set cfg negative for shark, to flag as cfg_cw
noise_seed_sde = seed
sampler, = ClownSamplerAdvanced_Beta().main(
noise_type_sde = noise_type_sde,
noise_type_sde_substep = noise_type_sde_substep,
noise_mode_sde = noise_mode_sde,
noise_mode_sde_substep = noise_mode_sde_substep,
eta = eta,
eta_substep = eta_substep,
s_noise = s_noise,
s_noise_substep = s_noise_substep,
overshoot = overshoot,
overshoot_substep = overshoot_substep,
overshoot_mode = overshoot_mode,
overshoot_mode_substep = overshoot_mode_substep,
d_noise = d_noise,
d_noise_start_step = d_noise_start_step,
d_noise_inv = d_noise_inv,
d_noise_inv_start_step = d_noise_inv_start_step,
alpha_sde = alpha_sde,
k_sde = k_sde,
cfgpp = cfgpp,
c1 = c1,
c2 = c2,
c3 = c3,
sampler_name = sampler_name,
implicit_sampler_name = implicit_sampler_name,
implicit_type = implicit_type,
implicit_type_substeps = implicit_type_substeps,
implicit_steps = implicit_steps,
implicit_substeps = implicit_substeps,
rescale_floor = rescale_floor,
sigmas_override = sigmas_override,
noise_seed_sde = noise_seed_sde,
guides = guides,
options = options_mgr.as_dict(),
extra_options = extra_options,
automation = automation,
noise_scaling_weight = noise_scaling_weight,
noise_boost_step = noise_boost_step,
noise_boost_substep = noise_boost_substep,
epsilon_scales = epsilon_scales,
regional_conditioning_weights = regional_conditioning_weights,
frame_weights_mgr = frame_weights_mgr,
sde_noise = sde_noise,
sde_noise_steps = sde_noise_steps,
rk_swap_step = rk_swap_step,
rk_swap_print = rk_swap_print,
rk_swap_threshold = rk_swap_threshold,
rk_swap_type = rk_swap_type,
steps_to_run = steps_to_run,
sde_mask = sde_mask,
bongmath = bongmath,
)
return (sampler,)
class BongSampler:
@classmethod
def INPUT_TYPES(cls):
inputs = {"required":
{
"model": ("MODEL",),
"seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}),
"steps": ("INT", {"default": 30, "min": 1, "max": MAX_STEPS}),
"cfg": ("FLOAT", {"default": 5.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, }),
"sampler_name": (["res_2m", "res_3m", "res_2s", "res_3s","res_2m_sde", "res_3m_sde", "res_2s_sde", "res_3s_sde"], {"default": "res_2s_sde"}),
"scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},),
"denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": MAX_STEPS, "step":0.01}),
},
"optional":
{
"positive": ("CONDITIONING",),
"negative": ("CONDITIONING",),
"latent_image": ("LATENT",),
}
}
return inputs
RETURN_TYPES = ("LATENT", )
RETURN_NAMES = ("output", )
FUNCTION = "main"
CATEGORY = "RES4LYF/samplers"
def main(self,
model = None,
denoise : float = 1.0,
scheduler : str = "beta57",
cfg : float = 1.0,
seed : int = 42,
positive = None,
negative = None,
latent_image : Optional[dict[Tensor]] = None,
steps : int = 30,
steps_to_run : int = -1,
bongmath : bool = True,
sampler_mode : str = "standard",
noise_type_sde : str = "brownian",
noise_type_sde_substep : str = "brownian",
noise_mode_sde : str = "hard",
noise_mode_sde_substep : str = "hard",
overshoot_mode : str = "hard",
overshoot_mode_substep : str = "hard",
overshoot : float = 0.0,
overshoot_substep : float = 0.0,
eta : float = 0.5,
eta_substep : float = 0.5,
d_noise : float = 1.0,
s_noise : float = 1.0,
s_noise_substep : float = 1.0,
alpha_sde : float = -1.0,
k_sde : float = 1.0,
cfgpp : float = 0.0,
c1 : float = 0.0,
c2 : float = 0.5,
c3 : float = 1.0,
noise_seed_sde : int = -1,
sampler_name : str = "res_2m",
implicit_sampler_name : str = "use_explicit",
implicit_type : str = "bongmath",
implicit_type_substeps : str = "bongmath",
implicit_steps : int = 0,
implicit_substeps : int = 0,
sigmas : Optional[Tensor] = None,
sigmas_override : Optional[Tensor] = None,
guides = None,
options = None,
sde_noise = None,
sde_noise_steps : int = 1,
extra_options : str = "",
automation = None,
epsilon_scales : Optional[Tensor] = None,
regional_conditioning_weights : Optional[Tensor] = None,
frame_weights_mgr = None,
noise_scaling_weight : float = 0.0,
noise_boost_step : float = 0.0,
noise_boost_substep : float = 0.0,
noise_anchor : float = 1.0,
rescale_floor : bool = True,
rk_swap_step : int = MAX_STEPS,
rk_swap_print : bool = False,
rk_swap_threshold : float = 0.0,
rk_swap_type : str = "",
#start_at_step : int = 0,
#stop_at_step : int = MAX_STEPS,
**kwargs
):
options_mgr = OptionsManager(options, **kwargs)
extra_options += "\n" + options_mgr.get('extra_options', "")
if model.model.model_config.unet_config.get('stable_cascade_stage') == 'b':
noise_type_sde = "pyramid-cascade_B"
noise_type_sde_substep = "pyramid-cascade_B"
if sampler_name.endswith("_sde"):
sampler_name = sampler_name[:-4]
eta = 0.5
else:
eta = 0.0
# defaults for ClownSampler
eta_substep = eta
# defaults for SharkSampler
noise_type_init = "gaussian"
noise_stdev = 1.0
denoise_alt = 1.0
channelwise_cfg = False #1.0
#if options is not None:
#options_mgr = OptionsManager(options_inputs)
noise_type_sde = options_mgr.get('noise_type_sde' , noise_type_sde)
noise_type_sde_substep = options_mgr.get('noise_type_sde_substep', noise_type_sde_substep)
noise_mode_sde = options_mgr.get('noise_mode_sde' , noise_mode_sde)
noise_mode_sde_substep = options_mgr.get('noise_mode_sde_substep', noise_mode_sde_substep)
overshoot_mode = options_mgr.get('overshoot_mode' , overshoot_mode)
overshoot_mode_substep = options_mgr.get('overshoot_mode_substep', overshoot_mode_substep)
eta = options_mgr.get('eta' , eta)
eta_substep = options_mgr.get('eta_substep' , eta_substep)
overshoot = options_mgr.get('overshoot' , overshoot)
overshoot_substep = options_mgr.get('overshoot_substep' , overshoot_substep)
noise_scaling_weight = options_mgr.get('noise_scaling_weight' , noise_scaling_weight)
noise_boost_step = options_mgr.get('noise_boost_step' , noise_boost_step)
noise_boost_substep = options_mgr.get('noise_boost_substep' , noise_boost_substep)
noise_anchor = options_mgr.get('noise_anchor' , noise_anchor)
s_noise = options_mgr.get('s_noise' , s_noise)
s_noise_substep = options_mgr.get('s_noise_substep' , s_noise_substep)
d_noise = options_mgr.get('d_noise' , d_noise)
implicit_type = options_mgr.get('implicit_type' , implicit_type)
implicit_type_substeps = options_mgr.get('implicit_type_substeps', implicit_type_substeps)
implicit_steps = options_mgr.get('implicit_steps' , implicit_steps)
implicit_substeps = options_mgr.get('implicit_substeps' , implicit_substeps)
alpha_sde = options_mgr.get('alpha_sde' , alpha_sde)
k_sde = options_mgr.get('k_sde' , k_sde)
c1 = options_mgr.get('c1' , c1)
c2 = options_mgr.get('c2' , c2)
c3 = options_mgr.get('c3' , c3)
frame_weights_mgr = options_mgr.get('frame_weights_mgr' , frame_weights_mgr)
sde_noise = options_mgr.get('sde_noise' , sde_noise)
sde_noise_steps = options_mgr.get('sde_noise_steps' , sde_noise_steps)
extra_options = options_mgr.get('extra_options' , extra_options)
automation = options_mgr.get('automation' , automation)
# SharkSampler Options
noise_type_init = options_mgr.get('noise_type_init' , noise_type_init)
noise_stdev = options_mgr.get('noise_stdev' , noise_stdev)
sampler_mode = options_mgr.get('sampler_mode' , sampler_mode)
denoise_alt = options_mgr.get('denoise_alt' , denoise_alt)
channelwise_cfg = options_mgr.get('channelwise_cfg' , channelwise_cfg)
sigmas = options_mgr.get('sigmas' , sigmas)
rk_swap_type = options_mgr.get('rk_swap_type' , rk_swap_type)
rk_swap_step = options_mgr.get('rk_swap_step' , rk_swap_step)
rk_swap_threshold = options_mgr.get('rk_swap_threshold' , rk_swap_threshold)
rk_swap_print = options_mgr.get('rk_swap_print' , rk_swap_print)
#start_at_step = options_mgr.get('start_at_step' , start_at_step)
#stop_at_ste = options_mgr.get('stop_at_step' , stop_at_step)
if channelwise_cfg: # != 1.0:
cfg = -abs(cfg) # set cfg negative for shark, to flag as cfg_cw
sampler, = ClownSamplerAdvanced_Beta().main(
noise_type_sde = noise_type_sde,
noise_type_sde_substep = noise_type_sde_substep,
noise_mode_sde = noise_mode_sde,
noise_mode_sde_substep = noise_mode_sde_substep,
eta = eta,
eta_substep = eta_substep,
s_noise = s_noise,
s_noise_substep = s_noise_substep,
overshoot = overshoot,
overshoot_substep = overshoot_substep,
overshoot_mode = overshoot_mode,
overshoot_mode_substep = overshoot_mode_substep,
d_noise = d_noise,
#d_noise_start_step = d_noise_start_step,
#d_noise_inv = d_noise_inv,
#d_noise_inv_start_step = d_noise_inv_start_step,
alpha_sde = alpha_sde,
k_sde = k_sde,
cfgpp = cfgpp,
c1 = c1,
c2 = c2,
c3 = c3,
sampler_name = sampler_name,
implicit_sampler_name = implicit_sampler_name,
implicit_type = implicit_type,
implicit_type_substeps = implicit_type_substeps,
implicit_steps = implicit_steps,
implicit_substeps = implicit_substeps,
rescale_floor = rescale_floor,
sigmas_override = sigmas_override,
noise_seed_sde = noise_seed_sde,
guides = guides,
options = options_mgr.as_dict(),
extra_options = extra_options,
automation = automation,
noise_scaling_weight = noise_scaling_weight,
noise_boost_step = noise_boost_step,
noise_boost_substep = noise_boost_substep,
epsilon_scales = epsilon_scales,
regional_conditioning_weights = regional_conditioning_weights,
frame_weights_mgr = frame_weights_mgr,
sde_noise = sde_noise,
sde_noise_steps = sde_noise_steps,
rk_swap_step = rk_swap_step,
rk_swap_print = rk_swap_print,
rk_swap_threshold = rk_swap_threshold,
rk_swap_type = rk_swap_type,
steps_to_run = steps_to_run,
bongmath = bongmath,
)
output, denoised, sde_noise = SharkSampler().main(
model = model,
cfg = cfg,
scheduler = scheduler,
steps = steps,
steps_to_run = steps_to_run,
denoise = denoise,
latent_image = latent_image,
positive = positive,
negative = negative,
sampler = sampler,
cfgpp = cfgpp,
noise_seed = seed,
options = options_mgr.as_dict(),
sde_noise = sde_noise,
sde_noise_steps = sde_noise_steps,
noise_type_init = noise_type_init,
noise_stdev = noise_stdev,
sampler_mode = sampler_mode,
denoise_alt = denoise_alt,
sigmas = sigmas,
extra_options = extra_options)
return (output, )
================================================
FILE: beta/samplers_extensions.py
================================================
import torch
from torch import Tensor
import torch.nn.functional as F
from dataclasses import dataclass, asdict
from typing import Optional, Callable, Tuple, Dict, Any, Union
import copy
from nodes import MAX_RESOLUTION
from ..latents import get_edge_mask
from ..helper import OptionsManager, FrameWeightsManager, initialize_or_scale, get_res4lyf_scheduler_list, parse_range_string, parse_tile_sizes, parse_range_string_int
from .rk_coefficients_beta import RK_SAMPLER_NAMES_BETA_FOLDERS, get_default_sampler_name, get_sampler_name_list, process_sampler_name
from .noise_classes import NOISE_GENERATOR_NAMES_SIMPLE
from .rk_noise_sampler_beta import NOISE_MODE_NAMES
from .constants import IMPLICIT_TYPE_NAMES, GUIDE_MODE_NAMES_BETA_SIMPLE, MAX_STEPS, FRAME_WEIGHTS_CONFIG_NAMES, FRAME_WEIGHTS_DYNAMICS_NAMES, FRAME_WEIGHTS_SCHEDULE_NAMES
class ClownSamplerSelector_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}),
},
"optional":
{
}
}
RETURN_TYPES = (RK_SAMPLER_NAMES_BETA_FOLDERS,)
RETURN_NAMES = ("sampler_name",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
sampler_name = "res_2m",
):
sampler_name, implicit_sampler_name = process_sampler_name(sampler_name)
sampler_name = sampler_name if implicit_sampler_name == "use_explicit" else implicit_sampler_name
return (sampler_name,)
class ClownOptions_SDE_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_type_sde_substep": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"noise_mode_sde_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"eta_substep": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"seed": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}),
},
"optional":
{
"etas": ("SIGMAS", ),
"etas_substep": ("SIGMAS", ),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
noise_type_sde = "gaussian",
noise_type_sde_substep = "gaussian",
noise_mode_sde = "hard",
noise_mode_sde_substep = "hard",
eta = 0.5,
eta_substep = 0.5,
seed : int = -1,
etas : Optional[Tensor] = None,
etas_substep : Optional[Tensor] = None,
options = None,
):
options = options if options is not None else {}
if noise_mode_sde == "none":
noise_mode_sde = "hard"
eta = 0.0
if noise_mode_sde_substep == "none":
noise_mode_sde_substep = "hard"
eta_substep = 0.0
if noise_type_sde == "none":
noise_type_sde = "gaussian"
eta = 0.0
if noise_type_sde_substep == "none":
noise_type_sde_substep = "gaussian"
eta_substep = 0.0
options['noise_type_sde'] = noise_type_sde
options['noise_type_sde_substep'] = noise_type_sde_substep
options['noise_mode_sde'] = noise_mode_sde
options['noise_mode_sde_substep'] = noise_mode_sde_substep
options['eta'] = eta
options['eta_substep'] = eta_substep
options['noise_seed_sde'] = seed
options['etas'] = etas
options['etas_substep'] = etas_substep
return (options,)
class ClownOptions_StepSize_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"overshoot_mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How step size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"overshoot_mode_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How substep size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"overshoot": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising step, then rescale to match the original. Has a softening effect."}),
"overshoot_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising substep, then rescale to match the original. Has a softening effect."}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
overshoot_mode = "hard",
overshoot_mode_substep = "hard",
overshoot = 0.0,
overshoot_substep = 0.0,
options = None,
):
options = options if options is not None else {}
options['overshoot_mode'] = overshoot_mode
options['overshoot_mode_substep'] = overshoot_mode_substep
options['overshoot'] = overshoot
options['overshoot_substep'] = overshoot_substep
return (options,
)
@dataclass
class DetailBoostOptions:
noise_scaling_weight : float = 0.0
noise_boost_step : float = 0.0
noise_boost_substep : float = 0.0
noise_anchor : float = 1.0
s_noise : float = 1.0
s_noise_substep : float = 1.0
d_noise : float = 1.0
DETAIL_BOOST_METHODS = [
'sampler',
'sampler_normal',
'sampler_substep',
'sampler_substep_normal',
'model',
'model_alpha',
]
class ClownOptions_DetailBoost_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}),
"method": (DETAIL_BOOST_METHODS, {"default": "model", "tooltip": "Determines whether the sampler or the model underestimates the noise level."}),
#"noise_scaling_mode": (['linear'] + NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "Changes the steps where the effect is greatest. Most affect early steps, sinusoidal affects middle steps."}),
"mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "Changes the steps where the effect is greatest. Most affect early steps, sinusoidal affects middle steps."}),
"eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "The strength of the effect of the noise_scaling_mode. Linear ignores this parameter."}),
"start_step": ("INT", {"default": 3, "min": 0, "max": MAX_STEPS}),
"end_step": ("INT", {"default": 10, "min": -1, "max": MAX_STEPS}),
#"noise_scaling_cycles": ("INT", {"default": 1, "min": 1, "max": MAX_STEPS}),
#"noise_boost_step": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}),
#"noise_boost_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}),
#"sampler_scaling_normalize":("BOOLEAN", {"default": False, "tooltip": "Limit saturation and luminosity drift."}),
},
"optional":
{
"weights": ("SIGMAS", ),
"etas": ("SIGMAS", ),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
weight : float = 0.0,
method : str = "sampler",
mode : str = "linear",
eta : float = 0.5,
start_step : int = 0,
end_step : int = -1,
noise_scaling_cycles : int = 1,
noise_boost_step : float = 0.0,
noise_boost_substep : float = 0.0,
sampler_scaling_normalize : bool = False,
weights : Optional[Tensor] = None,
etas : Optional[Tensor] = None,
options = None
):
noise_scaling_weight = weight
noise_scaling_type = method
noise_scaling_mode = mode
noise_scaling_eta = eta
noise_scaling_start_step = start_step
noise_scaling_end_step = end_step
noise_scaling_weights = weights
noise_scaling_etas = etas
options = options if options is not None else {}
default_dtype = torch.float64
default_device = torch.device('cuda')
if noise_scaling_type.endswith("_normal"):
sampler_scaling_normalize = True
noise_scaling_type = noise_scaling_type[:-7]
if noise_scaling_end_step == -1:
noise_scaling_end_step = MAX_STEPS
if noise_scaling_weights == None:
noise_scaling_weights = initialize_or_scale(None, noise_scaling_weight, MAX_STEPS).to(default_dtype).to(default_device)
if noise_scaling_etas == None:
noise_scaling_etas = initialize_or_scale(None, noise_scaling_eta, MAX_STEPS).to(default_dtype).to(default_device)
noise_scaling_prepend = torch.zeros((noise_scaling_start_step,), dtype=default_dtype, device=default_device)
noise_scaling_weights = torch.cat((noise_scaling_prepend, noise_scaling_weights), dim=0)
noise_scaling_etas = torch.cat((noise_scaling_prepend, noise_scaling_etas), dim=0)
if noise_scaling_weights.shape[-1] > noise_scaling_end_step:
noise_scaling_weights = noise_scaling_weights[:noise_scaling_end_step]
if noise_scaling_etas.shape[-1] > noise_scaling_end_step:
noise_scaling_etas = noise_scaling_etas[:noise_scaling_end_step]
noise_scaling_weights = F.pad(noise_scaling_weights, (0, MAX_STEPS), value=0.0)
noise_scaling_etas = F.pad(noise_scaling_etas, (0, MAX_STEPS), value=0.0)
options['noise_scaling_weight'] = noise_scaling_weight
options['noise_scaling_type'] = noise_scaling_type
options['noise_scaling_mode'] = noise_scaling_mode
options['noise_scaling_eta'] = noise_scaling_eta
options['noise_scaling_cycles'] = noise_scaling_cycles
options['noise_scaling_weights'] = noise_scaling_weights
options['noise_scaling_etas'] = noise_scaling_etas
options['noise_boost_step'] = noise_boost_step
options['noise_boost_substep'] = noise_boost_substep
options['noise_boost_normalize'] = sampler_scaling_normalize
"""options['DetailBoostOptions'] = DetailBoostOptions(
noise_scaling_weight = noise_scaling_weight,
noise_scaling_type = noise_scaling_type,
noise_scaling_mode = noise_scaling_mode,
noise_scaling_eta = noise_scaling_eta,
noise_boost_step = noise_boost_step,
noise_boost_substep = noise_boost_substep,
noise_boost_normalize = noise_boost_normalize,
noise_anchor = noise_anchor,
s_noise = s_noise,
s_noise_substep = s_noise_substep,
d_noise = d_noise
d_noise_start_step = d_noise_start_step
)"""
return (options,)
class ClownOptions_SigmaScaling_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}),
"s_noise_substep": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}),
"noise_anchor_sde": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Typically set to between 1.0 and 0.0. Lower values cerate a grittier, more detailed image."}),
"lying": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Downscales the sigma schedule. Values around 0.98-0.95 can lead to a large boost in detail and paint textures."}),
"lying_inv": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Upscales the sigma schedule. Will soften the image and deepen colors. Use after d_noise to counteract desaturation."}),
"lying_start_step": ("INT", {"default": 0, "min": 0, "max": MAX_STEPS}),
"lying_inv_start_step": ("INT", {"default": 1, "min": 0, "max": MAX_STEPS}),
},
"optional":
{
"s_noises": ("SIGMAS", ),
"s_noises_substep": ("SIGMAS", ),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
noise_anchor_sde : float = 1.0,
s_noise : float = 1.0,
s_noise_substep : float = 1.0,
lying : float = 1.0,
lying_start_step : int = 0,
lying_inv : float = 1.0,
lying_inv_start_step : int = 1,
s_noises : Optional[Tensor] = None,
s_noises_substep : Optional[Tensor] = None,
options = None
):
options = options if options is not None else {}
default_dtype = torch.float64
default_device = torch.device('cuda')
options['noise_anchor'] = noise_anchor_sde
options['s_noise'] = s_noise
options['s_noise_substep'] = s_noise_substep
options['d_noise'] = lying
options['d_noise_start_step'] = lying_start_step
options['d_noise_inv'] = lying_inv
options['d_noise_inv_start_step'] = lying_inv_start_step
options['s_noises'] = s_noises
options['s_noises_substep'] = s_noises_substep
return (options,)
class ClownOptions_FlowGuide:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"sync_eps": ("FLOAT", {"default": 0.75, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Accelerate convergence with positive values when sampling, negative values when unsampling."}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
sync_eps = 0.75,
options = None
):
options = options if options is not None else {}
options['flow_sync_eps'] = sync_eps
return (options,)
class ClownOptions_Momentum_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"momentum": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Accelerate convergence with positive values when sampling, negative values when unsampling."}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
momentum = 0.0,
options = None
):
options = options if options is not None else {}
options['momentum'] = momentum
return (options,)
class ClownOptions_ImplicitSteps_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"implicit_type": (IMPLICIT_TYPE_NAMES, {"default": "bongmath"}),
"implicit_type_substeps": (IMPLICIT_TYPE_NAMES, {"default": "bongmath"}),
"implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}),
"implicit_substeps": ("INT", {"default": 0, "min": 0, "max": 10000}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
implicit_type = "bongmath",
implicit_type_substeps = "bongmath",
implicit_steps = 0,
implicit_substeps = 0,
options = None
):
options = options if options is not None else {}
options['implicit_type'] = implicit_type
options['implicit_type_substeps'] = implicit_type_substeps
options['implicit_steps'] = implicit_steps
options['implicit_substeps'] = implicit_substeps
return (options,)
class ClownOptions_Cycles_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"cycles" : ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10000, "step":0.5, "round": 0.5}),
"eta_decay_scale" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Multiplies etas by this number after every cycle. May help drive convergence." }),
"unsample_eta" : ("FLOAT", {"default": 0.5, "min": -10000, "max": 10000, "step":0.01}),
"unsampler_override" : (get_sampler_name_list(), {"default": "none"}),
"unsample_steps_to_run" : ("INT", {"default": -1, "min": -1, "max": 10000, "step":1, "round": 1}),
"unsample_cfg" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"unsample_bongmath" : ("BOOLEAN", {"default": False}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
cycles = 0,
unsample_eta = 0.5,
eta_decay_scale = 1.0,
unsample_cfg = 1.0,
unsampler_override = "none",
unsample_steps_to_run = -1,
unsample_bongmath = False,
options = None
):
options = options if options is not None else {}
options['rebounds'] = int(cycles * 2)
options['unsample_eta'] = unsample_eta
options['unsampler_name'] = unsampler_override
options['eta_decay_scale'] = eta_decay_scale
options['unsample_steps_to_run'] = unsample_steps_to_run
options['unsample_cfg'] = unsample_cfg
options['unsample_bongmath'] = unsample_bongmath
return (options,)
class SharkOptions_StartStep_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"start_at_step": ("INT", {"default": 0, "min": -1, "max": 10000, "step":1,}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
start_at_step = 0,
options = None
):
options = options if options is not None else {}
options['start_at_step'] = start_at_step
return (options,)
class ClownOptions_Tile_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"tile_width" : ("INT", {"default": 1024, "min": -1, "max": 10000, "step":1,}),
"tile_height": ("INT", {"default": 1024, "min": -1, "max": 10000, "step":1,}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
tile_height = 1024,
tile_width = 1024,
options = None
):
options = options if options is not None else {}
tile_sizes = options.get('tile_sizes', [])
tile_sizes.append((tile_height, tile_width))
options['tile_sizes'] = tile_sizes
return (options,)
class ClownOptions_Tile_Advanced_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"tile_sizes": ("STRING", {"default": "1024,1024", "multiline": True}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
tile_sizes = "1024,1024",
options = None
):
options = options if options is not None else {}
tiles_height_width = parse_tile_sizes(tile_sizes)
options['tile_sizes'] = [(tile[-1], tile[-2]) for tile in tiles_height_width] # swap height and width to be consistent... width, height
return (options,)
class ClownOptions_ExtraOptions_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"extra_options": ("STRING", {"default": "", "multiline": True}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
extra_options = "",
options = None
):
options = options if options is not None else {}
if 'extra_options' in options:
options['extra_options'] += '\n' + extra_options
else:
options['extra_options'] = extra_options
return (options, )
class ClownOptions_DenoisedSampling_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"cycles" : ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10000, "step":0.5, "round": 0.5}),
"eta_decay_scale" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Multiplies etas by this number after every cycle. May help drive convergence." }),
"unsample_eta" : ("FLOAT", {"default": 0.5, "min": -10000, "max": 10000, "step":0.01}),
"unsampler_override" : (get_sampler_name_list(), {"default": "none"}),
"unsample_steps_to_run" : ("INT", {"default": -1, "min": -1, "max": 10000, "step":1, "round": 1}),
"unsample_cfg" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"unsample_bongmath" : ("BOOLEAN", {"default": False}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
extra_options = "",
options = None
):
options = options if options is not None else {}
if 'extra_options' in options:
options['extra_options'] += '\n' + extra_options
else:
options['extra_options'] = extra_options
return (options, )
class ClownOptions_Automation_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required": {},
"optional": {
"etas": ("SIGMAS", ),
"etas_substep": ("SIGMAS", ),
"s_noises": ("SIGMAS", ),
"s_noises_substep": ("SIGMAS", ),
"epsilon_scales": ("SIGMAS", ),
"frame_weights": ("SIGMAS", ),
"options": ("OPTIONS",),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
etas = None,
etas_substep = None,
s_noises = None,
s_noises_substep = None,
epsilon_scales = None,
frame_weights = None,
options = None
):
options = options if options is not None else {}
options_mgr = OptionsManager(options)
frame_weights_mgr = options_mgr.get("frame_weights_mgr")
if frame_weights_mgr is None and frame_weights is not None:
frame_weights_mgr = FrameWeightsManager()
frame_weights_mgr.set_custom_weights("frame_weights", frame_weights)
automation = {
"etas" : etas,
"etas_substep" : etas_substep,
"s_noises" : s_noises,
"s_noises_substep" : s_noises_substep,
"epsilon_scales" : epsilon_scales,
"frame_weights_mgr" : frame_weights_mgr,
}
options["automation"] = automation
return (options, )
class SharkOptions_GuideCond_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required": {},
"optional": {
"positive" : ("CONDITIONING", ),
"negative" : ("CONDITIONING", ),
"cfg" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"options" : ("OPTIONS",),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
positive = None,
negative = None,
cfg = 1.0,
options = None,
):
options = options if options is not None else {}
flow_cond = {
"yt_positive" : positive,
"yt_negative" : negative,
"yt_cfg" : cfg,
}
options["flow_cond"] = flow_cond
return (options, )
class SharkOptions_GuideConds_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required": {},
"optional": {
"positive_masked" : ("CONDITIONING", ),
"positive_unmasked" : ("CONDITIONING", ),
"negative_masked" : ("CONDITIONING", ),
"negative_unmasked" : ("CONDITIONING", ),
"cfg_masked" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"cfg_unmasked" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"options" : ("OPTIONS",),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
positive_masked = None,
negative_masked = None,
cfg_masked = 1.0,
positive_unmasked = None,
negative_unmasked = None,
cfg_unmasked = 1.0,
options = None,
):
options = options if options is not None else {}
flow_cond = {
"yt_positive" : positive_masked,
"yt_negative" : negative_masked,
"yt_cfg" : cfg_masked,
"yt_inv_positive" : positive_unmasked,
"yt_inv_negative" : negative_unmasked,
"yt_inv_cfg" : cfg_unmasked,
}
options["flow_cond"] = flow_cond
return (options, )
class SharkOptions_Beta:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"s_noise_init": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }),
"denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"channelwise_cfg": ("BOOLEAN", {"default": False}),
},
"optional": {
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
noise_type_init = "gaussian",
s_noise_init = 1.0,
denoise_alt = 1.0,
channelwise_cfg = False,
options = None
):
options = options if options is not None else {}
options['noise_type_init'] = noise_type_init
options['noise_init_stdev'] = s_noise_init
options['denoise_alt'] = denoise_alt
options['channelwise_cfg'] = channelwise_cfg
return (options,)
class SharkOptions_UltraCascade_Latent_Beta:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"width": ("INT", {"default": 60, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
"height": ("INT", {"default": 36, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
},
"optional": {
"options": ("OPTIONS",),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
width : int = 60,
height : int = 36,
options = None,
):
options = options if options is not None else {}
options['ultracascade_latent_width'] = width
options['ultracascade_latent_height'] = height
return (options,)
class ClownOptions_SwapSampler_Beta:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}),
"swap_below_err": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Swap samplers if the error per step falls below this threshold."}),
"swap_at_step": ("INT", {"default": 30, "min": 1, "max": 10000}),
"log_err_to_console": ("BOOLEAN", {"default": False}),
},
"optional": {
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
sampler_name = "res_3m",
swap_below_err = 0.0,
swap_at_step = 30,
log_err_to_console = False,
options = None,
):
sampler_name, implicit_sampler_name = process_sampler_name(sampler_name)
sampler_name = sampler_name if implicit_sampler_name == "use_explicit" else implicit_sampler_name
options = options if options is not None else {}
options['rk_swap_type'] = sampler_name
options['rk_swap_threshold'] = swap_below_err
options['rk_swap_step'] = swap_at_step
options['rk_swap_print'] = log_err_to_console
return (options,)
class ClownOptions_SDE_Mask_Beta:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"max": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Clamp the max value for the mask."}),
"min": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Clamp the min value for the mask."}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional": {
"mask": ("MASK", ),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
max = 1.0,
min = 0.0,
invert_mask = False,
mask = None,
options = None,
):
options = copy.deepcopy(options) if options is not None else {}
if invert_mask:
mask = 1-mask
mask = ((mask - mask.min()) * (max - min)) / (mask.max() - mask.min()) + min
options['sde_mask'] = mask
return (options,)
class ClownGuide_Mean_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"cutoff": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide": ("LATENT", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight_scheduler = "constant",
start_step = 0,
end_step = 30,
cutoff = 1.0,
guide = None,
weight = 0.0,
channelwise_mode = False,
projection_mode = False,
weights = None,
mask = None,
invert_mask = False,
guides = None,
):
default_dtype = torch.float64
mask = 1-mask if mask is not None else None
if end_step == -1:
end_step = MAX_STEPS
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
if weight_scheduler == "constant": # and weights == None:
weights = initialize_or_scale(None, weight, end_step).to(default_dtype)
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
guides = copy.deepcopy(guides) if guides is not None else {}
guides['weight_mean'] = weight
guides['weights_mean'] = weights
guides['guide_mean'] = guide
guides['mask_mean'] = mask
guides['weight_scheduler_mean'] = weight_scheduler
guides['start_step_mean'] = start_step
guides['end_step_mean'] = end_step
guides['cutoff_mean'] = cutoff
return (guides, )
class ClownGuide_FrequencySeparation:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"apply_to" : (["AdaIN"], {"default": "AdaIN"}),
"method" : (["gaussian", "gaussian_pw", "median", "median_pw",], {"default": "median"}),
"sigma": ("FLOAT", {"default": 3.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Low values produce results closer to the guide image. No effect with median."}),
"kernel_size": ("INT", {"default": 8, "min": 1, "max": 11111, "step": 1, "tooltip": "Primary control with median. Set the Re___Patcher node to float32 or lower precision if you have OOMs. You may have them regardless at higher kernel sizes with median."}),
"inner_kernel_size": ("INT", {"default": 2, "min": 1, "max": 11111, "step": 1, "tooltip": "Should be equal to, or less than, kernel_size."}),
"stride": ("INT", {"default": 2, "min": 1, "max": 11111, "step": 1, "tooltip": "Should be equal to, or less than, inner_kernel_size."}),
"lowpass_weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Typically should be set to 1.0. Lower values may sharpen the image, higher values may blur the image."}),
"highpass_weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Typically should be set to 1.0. Higher values may sharpen the image, lower values may blur the image."}),
"guides": ("GUIDES", ),
},
"optional":
{
"mask" : ("MASK",),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
EXPERIMENTAL = True
def main(self,
apply_to = "AdaIN",
method = "median",
sigma = 3.0,
kernel_size = 9,
inner_kernel_size = 2,
stride = 2,
lowpass_weight = 1.0,
highpass_weight = 1.0,
guides = None,
mask = None,
):
guides = copy.deepcopy(guides) if guides is not None else {}
guides['freqsep_apply_to'] = apply_to
guides['freqsep_lowpass_method'] = method
guides['freqsep_sigma'] = sigma
guides['freqsep_kernel_size'] = kernel_size
guides['freqsep_inner_kernel_size'] = inner_kernel_size
guides['freqsep_stride'] = stride
guides['freqsep_lowpass_weight'] = lowpass_weight
guides['freqsep_highpass_weight']= highpass_weight
guides['freqsep_mask'] = mask
return (guides, )
class ClownGuide_Style_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"apply_to": (["positive", "negative", "denoised"], {"default": "positive", "tooltip": "When using CFG, decides whether to apply the guide to the positive or negative conditioning."}),
"method": (["AdaIN", "WCT", "WCT2", "scattersort","none"], {"default": "WCT"}),
"weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}),
"synweight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the relative strength of the guide on the opposite conditioning to what was selected: i.e., negative if positive in apply_to. Recommended to avoid CFG burn."}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant", "tooltip": "Selecting any scheduler except constant will cause the strength to gradually decay to zero. Try beta57 vs. linear quadratic."},),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": -1, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide": ("LATENT", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
DESCRIPTION = "Transfer some visual aspects of style from a guide (reference) image. If nothing about style is specified in the prompt, it may just transfer the lighting and color scheme." + \
"If using CFG results in burn, or a very dark/bright image in the preview followed by a bad output, try duplicating and chaining this node, so that the guide may be applied to both positive and negative conditioning." + \
"Currently supported models: SD1.5, SDXL, Stable Cascade, SD3.5, AuraFlow, Flux, HiDream, WAN, and LTXV."
def main(self,
apply_to = "all",
method = "WCT",
weight = 1.0,
synweight = 1.0,
weight_scheduler = "constant",
start_step = 0,
end_step = 15,
invert_mask = False,
guide = None,
mask = None,
weights = None,
guides = None,
):
default_dtype = torch.float64
mask = 1-mask if mask is not None else None
if end_step == -1:
end_step = MAX_STEPS
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
if weight_scheduler == "constant": # and weights == None:
weights = initialize_or_scale(None, weight, end_step).to(default_dtype)
prepend = torch.zeros(start_step).to(weights)
weights = torch.cat([prepend, weights])
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
guides = copy.deepcopy(guides) if guides is not None else {}
guides['style_method'] = method
if apply_to in {"positive", "all"}:
guides['weight_style_pos'] = weight
guides['weights_style_pos'] = weights
guides['synweight_style_pos'] = synweight
guides['guide_style_pos'] = guide
guides['mask_style_pos'] = mask
guides['weight_scheduler_style_pos'] = weight_scheduler
guides['start_step_style_pos'] = start_step
guides['end_step_style_pos'] = end_step
if apply_to in {"negative", "all"}:
guides['weight_style_neg'] = weight
guides['weights_style_neg'] = weights
guides['synweight_style_neg'] = synweight
guides['guide_style_neg'] = guide
guides['mask_style_neg'] = mask
guides['weight_scheduler_style_neg'] = weight_scheduler
guides['start_step_style_neg'] = start_step
guides['end_step_style_neg'] = end_step
if apply_to in {"denoised", "all"}:
guides['weight_style_denoised'] = weight
guides['weights_style_denoised'] = weights
guides['synweight_style_denoised'] = synweight
guides['guide_style_denoised'] = guide
guides['mask_style_denoised'] = mask
guides['weight_scheduler_style_denoised'] = weight_scheduler
guides['start_step_style_denoised'] = start_step
guides['end_step_style_denoised'] = end_step
return (guides, )
class ClownGuide_Style_EdgeWidth:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"edge_width": ("INT", {"default": 20, "min": 1, "max": 10000}),
},
"optional":
{
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
DESCRIPTION = "Set an edge mask for some style guide types such as scattersort. Can help mitigate seams."
def main(self,
edge_width = 20,
guides = None,
):
guides = copy.deepcopy(guides) if guides is not None else {}
if guides.get('mask_style_pos') is not None:
guides['mask_edge_style_pos'] = get_edge_mask(guides.get('mask_style_pos'), edge_width)
if guides.get('mask_style_neg') is not None:
guides['mask_edge_style_neg'] = get_edge_mask(guides.get('mask_style_neg'), edge_width)
return (guides, )
class ClownGuide_Style_TileSize:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"height": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16}),
"width" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16}),
"padding" : ("INT", {"default": 64, "min": 0, "max": 10000, "step": 16}),
},
"optional":
{
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
DESCRIPTION = "Set a tile size for some style guide types such as scattersort. Can improve adherence to the input image."
def main(self,
height = 128,
width = 128,
padding = 64,
guides = None,
):
guides = copy.deepcopy(guides) if guides is not None else {}
guides['style_tile_height'] = height // 16
guides['style_tile_width'] = width // 16
guides['style_tile_padding'] = padding // 16
return (guides, )
class ClownGuides_Sync:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"weight_masked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"weight_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"weight_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"weight_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"weight_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"weight_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"weight_end_step_masked": ("INT", {"default": 15, "min": -1, "max": 10000}),
"weight_end_step_unmasked": ("INT", {"default": 15, "min": -1, "max": 10000}),
"sync_masked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"sync_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"sync_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"sync_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"sync_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"sync_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"sync_end_step_masked": ("INT", {"default": 15, "min": -1, "max": 10000}),
"sync_end_step_unmasked": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide_masked": ("LATENT", ),
"guide_unmasked": ("LATENT", ),
"mask": ("MASK", ),
"weights_masked": ("SIGMAS", ),
"weights_unmasked": ("SIGMAS", ),
"syncs_masked": ("SIGMAS", ),
"syncs_unmasked": ("SIGMAS", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
EXPERIMENTAL = True
def main(self,
weight_masked = 0.0,
weight_unmasked = 0.0,
weight_scheduler_masked = "constant",
weight_scheduler_unmasked = "constant",
weight_start_step_masked = 0,
weight_start_step_unmasked = 0,
weight_end_step_masked = 30,
weight_end_step_unmasked = 30,
sync_masked = 0.0,
sync_unmasked = 0.0,
sync_scheduler_masked = "constant",
sync_scheduler_unmasked = "constant",
sync_start_step_masked = 0,
sync_start_step_unmasked = 0,
sync_end_step_masked = 30,
sync_end_step_unmasked = 30,
guide_masked = None,
guide_unmasked = None,
weights_masked = None,
weights_unmasked = None,
syncs_masked = None,
syncs_unmasked = None,
mask = None,
unmask = None,
invert_mask = False,
guide_mode = "sync",
channelwise_mode = False,
projection_mode = False,
cutoff_masked = 1.0,
cutoff_unmasked = 1.0,
):
default_dtype = torch.float64
if weight_end_step_masked == -1:
weight_end_step_masked = MAX_STEPS
if weight_end_step_unmasked == -1:
weight_end_step_unmasked = MAX_STEPS
if sync_end_step_masked == -1:
sync_end_step_masked = MAX_STEPS
if sync_end_step_unmasked == -1:
sync_end_step_unmasked = MAX_STEPS
if guide_masked is None:
weight_scheduler_masked = "constant"
weight_start_step_masked = 0
weight_end_step_masked = 30
weight_masked = 0.0
weights_masked = None
sync_scheduler_masked = "constant"
sync_start_step_masked = 0
sync_end_step_masked = 30
sync_masked = 0.0
syncs_masked = None
if guide_unmasked is None:
weight_scheduler_unmasked = "constant"
weight_start_step_unmasked = 0
weight_end_step_unmasked = 30
weight_unmasked = 0.0
weights_unmasked = None
sync_scheduler_unmasked = "constant"
sync_start_step_unmasked = 0
sync_end_step_unmasked = 30
sync_unmasked = 0.0
syncs_unmasked = None
if guide_masked is not None:
raw_x = guide_masked.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_masked = {'samples': guide_masked['state_info']['raw_x'].clone()}
else:
guide_masked = {'samples': guide_masked['samples'].clone()}
if guide_unmasked is not None:
raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()}
else:
guide_unmasked = {'samples': guide_unmasked['samples'].clone()}
if invert_mask and mask is not None:
mask = 1-mask
if projection_mode:
guide_mode = guide_mode + "_projection"
if channelwise_mode:
guide_mode = guide_mode + "_cw"
if guide_mode == "unsample_cw":
guide_mode = "unsample"
if guide_mode == "resample_cw":
guide_mode = "resample"
if weight_scheduler_masked == "constant" and weights_masked == None:
weights_masked = initialize_or_scale(None, weight_masked, weight_end_step_masked).to(default_dtype)
prepend = torch.zeros(weight_start_step_masked, dtype=default_dtype, device=weights_masked.device)
weights_masked = torch.cat((prepend, weights_masked), dim=0)
weights_masked = F.pad(weights_masked, (0, MAX_STEPS), value=0.0)
if weight_scheduler_unmasked == "constant" and weights_unmasked == None:
weights_unmasked = initialize_or_scale(None, weight_unmasked, weight_end_step_unmasked).to(default_dtype)
prepend = torch.zeros(weight_start_step_unmasked, dtype=default_dtype, device=weights_unmasked.device)
weights_unmasked = torch.cat((prepend, weights_unmasked), dim=0)
weights_unmasked = F.pad(weights_unmasked, (0, MAX_STEPS), value=0.0)
# Values for the sync scheduler will be inverted in rk_guide_func_beta.py as it's easier to understand:
# makes it so that a sync weight of 1.0 = full guide strength (which previously was 0.0)
if sync_scheduler_masked == "constant" and syncs_masked == None:
syncs_masked = initialize_or_scale(None, sync_masked, sync_end_step_masked).to(default_dtype)
prepend = torch.zeros(sync_start_step_masked, dtype=default_dtype, device=syncs_masked.device)
syncs_masked = torch.cat((prepend, syncs_masked), dim=0)
syncs_masked = F.pad(syncs_masked, (0, MAX_STEPS), value=0.0)
if sync_scheduler_unmasked == "constant" and syncs_unmasked == None:
syncs_unmasked = initialize_or_scale(None, sync_unmasked, sync_end_step_unmasked).to(default_dtype)
prepend = torch.zeros(sync_start_step_unmasked, dtype=default_dtype, device=syncs_unmasked.device)
syncs_unmasked = torch.cat((prepend, syncs_unmasked), dim=0)
syncs_unmasked = F.pad(syncs_unmasked, (0, MAX_STEPS), value=0.0)
guides = {
"guide_mode" : guide_mode,
"guide_masked" : guide_masked,
"guide_unmasked" : guide_unmasked,
"mask" : mask,
"unmask" : unmask,
"weight_masked" : weight_masked,
"weight_unmasked" : weight_unmasked,
"weight_scheduler_masked" : weight_scheduler_masked,
"weight_scheduler_unmasked" : weight_scheduler_unmasked,
"start_step_masked" : weight_start_step_masked,
"start_step_unmasked" : weight_start_step_unmasked,
"end_step_masked" : weight_end_step_masked,
"end_step_unmasked" : weight_end_step_unmasked,
"weights_masked" : weights_masked,
"weights_unmasked" : weights_unmasked,
"weight_masked_sync" : sync_masked,
"weight_unmasked_sync" : sync_unmasked,
"weight_scheduler_masked_sync" : sync_scheduler_masked,
"weight_scheduler_unmasked_sync" : sync_scheduler_unmasked,
"start_step_masked_sync" : sync_start_step_masked,
"start_step_unmasked_sync" : sync_start_step_unmasked,
"end_step_masked_sync" : sync_end_step_masked,
"end_step_unmasked_sync" : sync_end_step_unmasked,
"weights_masked_sync" : syncs_masked,
"weights_unmasked_sync" : syncs_unmasked,
"cutoff_masked" : cutoff_masked,
"cutoff_unmasked" : cutoff_unmasked
}
return (guides, )
class ClownGuides_Sync_Advanced:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"weight_masked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"weight_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"weight_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"weight_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"weight_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"weight_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"weight_end_step_masked": ("INT", {"default": 30, "min": -1, "max": 10000}),
"weight_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}),
"sync_masked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"sync_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"sync_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"sync_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"sync_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"sync_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"sync_end_step_masked": ("INT", {"default": -1, "min": -1, "max": 10000}),
"sync_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}),
"drift_x_data": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"drift_x_sync": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"drift_x_masked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"drift_x_unmasked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"drift_x_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"drift_x_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"drift_x_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"drift_x_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"drift_x_end_step_masked": ("INT", {"default": -1, "min": -1, "max": 10000}),
"drift_x_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}),
"drift_y_data": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"drift_y_sync": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"drift_y_guide": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"drift_y_masked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"drift_y_unmasked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"drift_y_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"drift_y_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"drift_y_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"drift_y_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"drift_y_end_step_masked": ("INT", {"default": -1, "min": -1, "max": 10000}),
"drift_y_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}),
"lure_x_masked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"lure_x_unmasked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"lure_x_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"lure_x_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"lure_x_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"lure_x_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"lure_x_end_step_masked": ("INT", {"default": -1, "min": -1, "max": 10000}),
"lure_x_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}),
"lure_y_masked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"lure_y_unmasked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"lure_y_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"lure_y_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"lure_y_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"lure_y_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"lure_y_end_step_masked": ("INT", {"default": -1, "min": -1, "max": 10000}),
"lure_y_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}),
"lure_iter": ("INT", {"default": 0, "min": 0, "max": 10000}),
"lure_sequence": (["x -> y", "y -> x", "xy -> xy"], {"default": "y -> x"}),
"invert_mask": ("BOOLEAN", {"default": False}),
"invert_mask_sync": ("BOOLEAN", {"default": False}),
"invert_mask_drift_x": ("BOOLEAN", {"default": False}),
"invert_mask_drift_y": ("BOOLEAN", {"default": False}),
"invert_mask_lure_x": ("BOOLEAN", {"default": False}),
"invert_mask_lure_y": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide_masked": ("LATENT", ),
"guide_unmasked": ("LATENT", ),
"mask": ("MASK", ),
"mask_sync": ("MASK", ),
"mask_drift_x": ("MASK", ),
"mask_drift_y": ("MASK", ),
"mask_lure_x": ("MASK", ),
"mask_lure_y": ("MASK", ),
"weights_masked": ("SIGMAS", ),
"weights_unmasked": ("SIGMAS", ),
"syncs_masked": ("SIGMAS", ),
"syncs_unmasked": ("SIGMAS", ),
"drift_xs_masked": ("SIGMAS", ),
"drift_xs_unmasked": ("SIGMAS", ),
"drift_ys_masked": ("SIGMAS", ),
"drift_ys_unmasked": ("SIGMAS", ),
"lure_xs_masked": ("SIGMAS", ),
"lure_xs_unmasked": ("SIGMAS", ),
"lure_ys_masked": ("SIGMAS", ),
"lure_ys_unmasked": ("SIGMAS", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
EXPERIMENTAL = True
def main(self,
weight_masked = 0.0,
weight_unmasked = 0.0,
weight_scheduler_masked = "constant",
weight_scheduler_unmasked = "constant",
weight_start_step_masked = 0,
weight_start_step_unmasked = 0,
weight_end_step_masked = 30,
weight_end_step_unmasked = 30,
sync_masked = 0.0,
sync_unmasked = 0.0,
sync_scheduler_masked = "constant",
sync_scheduler_unmasked = "constant",
sync_start_step_masked = 0,
sync_start_step_unmasked = 0,
sync_end_step_masked = 30,
sync_end_step_unmasked = 30,
drift_x_data = 0.0,
drift_x_sync = 0.0,
drift_y_data = 0.0,
drift_y_sync = 0.0,
drift_y_guide = 0.0,
drift_x_masked = 0.0,
drift_x_unmasked = 0.0,
drift_x_scheduler_masked = "constant",
drift_x_scheduler_unmasked = "constant",
drift_x_start_step_masked = 0,
drift_x_start_step_unmasked = 0,
drift_x_end_step_masked = 30,
drift_x_end_step_unmasked = 30,
drift_y_masked = 0.0,
drift_y_unmasked = 0.0,
drift_y_scheduler_masked = "constant",
drift_y_scheduler_unmasked = "constant",
drift_y_start_step_masked = 0,
drift_y_start_step_unmasked = 0,
drift_y_end_step_masked = 30,
drift_y_end_step_unmasked = 30,
lure_x_masked = 0.0,
lure_x_unmasked = 0.0,
lure_x_scheduler_masked = "constant",
lure_x_scheduler_unmasked = "constant",
lure_x_start_step_masked = 0,
lure_x_start_step_unmasked = 0,
lure_x_end_step_masked = 30,
lure_x_end_step_unmasked = 30,
lure_y_masked = 0.0,
lure_y_unmasked = 0.0,
lure_y_scheduler_masked = "constant",
lure_y_scheduler_unmasked = "constant",
lure_y_start_step_masked = 0,
lure_y_start_step_unmasked = 0,
lure_y_end_step_masked = 30,
lure_y_end_step_unmasked = 30,
guide_masked = None,
guide_unmasked = None,
weights_masked = None,
weights_unmasked = None,
syncs_masked = None,
syncs_unmasked = None,
drift_xs_masked = None,
drift_xs_unmasked = None,
drift_ys_masked = None,
drift_ys_unmasked = None,
lure_xs_masked = None,
lure_xs_unmasked = None,
lure_ys_masked = None,
lure_ys_unmasked = None,
lure_iter = 0,
lure_sequence = "x -> y",
mask = None,
unmask = None,
mask_sync = None,
mask_drift_x = None,
mask_drift_y = None,
mask_lure_x = None,
mask_lure_y = None,
invert_mask = False,
invert_mask_sync = False,
invert_mask_drift_x = False,
invert_mask_drift_y = False,
invert_mask_lure_x = False,
invert_mask_lure_y = False,
guide_mode = "sync",
channelwise_mode = False,
projection_mode = False,
cutoff_masked = 1.0,
cutoff_unmasked = 1.0,
):
default_dtype = torch.float64
if weight_end_step_masked == -1:
weight_end_step_masked = MAX_STEPS
if weight_end_step_unmasked == -1:
weight_end_step_unmasked = MAX_STEPS
if sync_end_step_masked == -1:
sync_end_step_masked = MAX_STEPS
if sync_end_step_unmasked == -1:
sync_end_step_unmasked = MAX_STEPS
if drift_x_end_step_masked == -1:
drift_x_end_step_masked = MAX_STEPS
if drift_x_end_step_unmasked == -1:
drift_x_end_step_unmasked = MAX_STEPS
if drift_y_end_step_masked == -1:
drift_y_end_step_masked = MAX_STEPS
if drift_y_end_step_unmasked == -1:
drift_y_end_step_unmasked = MAX_STEPS
if lure_x_end_step_masked == -1:
lure_x_end_step_masked = MAX_STEPS
if lure_x_end_step_unmasked == -1:
lure_x_end_step_unmasked = MAX_STEPS
if lure_y_end_step_masked == -1:
lure_y_end_step_masked = MAX_STEPS
if lure_y_end_step_unmasked == -1:
lure_y_end_step_unmasked = MAX_STEPS
if guide_masked is None:
weight_scheduler_masked = "constant"
weight_start_step_masked = 0
weight_end_step_masked = 30
weight_masked = 0.0
weights_masked = None
sync_scheduler_masked = "constant"
sync_start_step_masked = 0
sync_end_step_masked = 30
sync_masked = 0.0
syncs_masked = None
drift_x_scheduler_masked = "constant"
drift_x_start_step_masked = 0
drift_x_end_step_masked = 30
drift_x_masked = 0.0
drift_xs_masked = None
drift_y_scheduler_masked = "constant"
drift_y_start_step_masked = 0
drift_y_end_step_masked = 30
drift_y_masked = 0.0
drift_ys_masked = None
lure_x_scheduler_masked = "constant"
lure_x_start_step_masked = 0
lure_x_end_step_masked = 30
lure_x_masked = 0.0
lure_xs_masked = None
lure_y_scheduler_masked = "constant"
lure_y_start_step_masked = 0
lure_y_end_step_masked = 30
lure_y_masked = 0.0
lure_ys_masked = None
if guide_unmasked is None:
weight_scheduler_unmasked = "constant"
weight_start_step_unmasked = 0
weight_end_step_unmasked = 30
weight_unmasked = 0.0
weights_unmasked = None
sync_scheduler_unmasked = "constant"
sync_start_step_unmasked = 0
sync_end_step_unmasked = 30
sync_unmasked = 0.0
syncs_unmasked = None
drift_x_scheduler_unmasked = "constant"
drift_x_start_step_unmasked = 0
drift_x_end_step_unmasked = 30
drift_x_unmasked = 0.0
drift_xs_unmasked = None
drift_y_scheduler_unmasked = "constant"
drift_y_start_step_unmasked = 0
drift_y_end_step_unmasked = 30
drift_y_unmasked = 0.0
drift_ys_unmasked = None
lure_x_scheduler_unmasked = "constant"
lure_x_start_step_unmasked = 0
lure_x_end_step_unmasked = 30
lure_x_unmasked = 0.0
lure_xs_unmasked = None
lure_y_scheduler_unmasked = "constant"
lure_y_start_step_unmasked = 0
lure_y_end_step_unmasked = 30
lure_y_unmasked = 0.0
lure_ys_unmasked = None
if guide_masked is not None:
raw_x = guide_masked.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_masked = {'samples': guide_masked['state_info']['raw_x'].clone()}
else:
guide_masked = {'samples': guide_masked['samples'].clone()}
if guide_unmasked is not None:
raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()}
else:
guide_unmasked = {'samples': guide_unmasked['samples'].clone()}
if invert_mask and mask is not None:
mask = 1-mask
if invert_mask_sync and mask_sync is not None:
mask_sync = 1-mask_sync
if invert_mask_drift_x and mask_drift_x is not None:
mask_drift_x = 1-mask_drift_x
if invert_mask_drift_y and mask_drift_y is not None:
mask_drift_y = 1-mask_drift_y
if invert_mask_lure_x and mask_lure_x is not None:
mask_lure_x = 1-mask_lure_x
if invert_mask_lure_y and mask_lure_y is not None:
mask_lure_y = 1-mask_lure_y
if projection_mode:
guide_mode = guide_mode + "_projection"
if channelwise_mode:
guide_mode = guide_mode + "_cw"
if guide_mode == "unsample_cw":
guide_mode = "unsample"
if guide_mode == "resample_cw":
guide_mode = "resample"
if weight_scheduler_masked == "constant" and weights_masked == None:
weights_masked = initialize_or_scale(None, weight_masked, weight_end_step_masked).to(default_dtype)
prepend = torch.zeros(weight_start_step_masked, dtype=default_dtype, device=weights_masked.device)
weights_masked = torch.cat((prepend, weights_masked), dim=0)
weights_masked = F.pad(weights_masked, (0, MAX_STEPS), value=0.0)
if weight_scheduler_unmasked == "constant" and weights_unmasked == None:
weights_unmasked = initialize_or_scale(None, weight_unmasked, weight_end_step_unmasked).to(default_dtype)
prepend = torch.zeros(weight_start_step_unmasked, dtype=default_dtype, device=weights_unmasked.device)
weights_unmasked = torch.cat((prepend, weights_unmasked), dim=0)
weights_unmasked = F.pad(weights_unmasked, (0, MAX_STEPS), value=0.0)
# Values for the sync scheduler will be inverted in rk_guide_func_beta.py as it's easier to understand:
# makes it so that a sync weight of 1.0 = full guide strength (which previously was 0.0)
if sync_scheduler_masked == "constant" and syncs_masked == None:
syncs_masked = initialize_or_scale(None, sync_masked, sync_end_step_masked).to(default_dtype)
prepend = torch.zeros(sync_start_step_masked, dtype=default_dtype, device=syncs_masked.device)
syncs_masked = torch.cat((prepend, syncs_masked), dim=0)
syncs_masked = F.pad(syncs_masked, (0, MAX_STEPS), value=0.0)
if sync_scheduler_unmasked == "constant" and syncs_unmasked == None:
syncs_unmasked = initialize_or_scale(None, sync_unmasked, sync_end_step_unmasked).to(default_dtype)
prepend = torch.zeros(sync_start_step_unmasked, dtype=default_dtype, device=syncs_unmasked.device)
syncs_unmasked = torch.cat((prepend, syncs_unmasked), dim=0)
syncs_unmasked = F.pad(syncs_unmasked, (0, MAX_STEPS), value=0.0)
if drift_x_scheduler_masked == "constant" and drift_xs_masked == None:
drift_xs_masked = initialize_or_scale(None, drift_x_masked, drift_x_end_step_masked).to(default_dtype)
prepend = torch.zeros(drift_x_start_step_masked, dtype=default_dtype, device=drift_xs_masked.device)
drift_xs_masked = torch.cat((prepend, drift_xs_masked), dim=0)
drift_xs_masked = F.pad(drift_xs_masked, (0, MAX_STEPS), value=0.0)
if drift_x_scheduler_unmasked == "constant" and drift_xs_unmasked == None:
drift_xs_unmasked = initialize_or_scale(None, drift_x_unmasked, drift_x_end_step_unmasked).to(default_dtype)
prepend = torch.zeros(drift_x_start_step_unmasked, dtype=default_dtype, device=drift_xs_unmasked.device)
drift_xs_unmasked = torch.cat((prepend, drift_xs_unmasked), dim=0)
drift_xs_unmasked = F.pad(drift_xs_unmasked, (0, MAX_STEPS), value=0.0)
if drift_y_scheduler_masked == "constant" and drift_ys_masked == None:
drift_ys_masked = initialize_or_scale(None, drift_y_masked, drift_y_end_step_masked).to(default_dtype)
prepend = torch.zeros(drift_y_start_step_masked, dtype=default_dtype, device=drift_ys_masked.device)
drift_ys_masked = torch.cat((prepend, drift_ys_masked), dim=0)
drift_ys_masked = F.pad(drift_ys_masked, (0, MAX_STEPS), value=0.0)
if drift_y_scheduler_unmasked == "constant" and drift_ys_unmasked == None:
drift_ys_unmasked = initialize_or_scale(None, drift_y_unmasked, drift_y_end_step_unmasked).to(default_dtype)
prepend = torch.zeros(drift_y_start_step_unmasked, dtype=default_dtype, device=drift_ys_unmasked.device)
drift_ys_unmasked = torch.cat((prepend, drift_ys_unmasked), dim=0)
drift_ys_unmasked = F.pad(drift_ys_unmasked, (0, MAX_STEPS), value=0.0)
if lure_x_scheduler_masked == "constant" and lure_xs_masked == None:
lure_xs_masked = initialize_or_scale(None, lure_x_masked, lure_x_end_step_masked).to(default_dtype)
prepend = torch.zeros(lure_x_start_step_masked, dtype=default_dtype, device=lure_xs_masked.device)
lure_xs_masked = torch.cat((prepend, lure_xs_masked), dim=0)
lure_xs_masked = F.pad(lure_xs_masked, (0, MAX_STEPS), value=0.0)
if lure_x_scheduler_unmasked == "constant" and lure_xs_unmasked == None:
lure_xs_unmasked = initialize_or_scale(None, lure_x_unmasked, lure_x_end_step_unmasked).to(default_dtype)
prepend = torch.zeros(lure_x_start_step_unmasked, dtype=default_dtype, device=lure_xs_unmasked.device)
lure_xs_unmasked = torch.cat((prepend, lure_xs_unmasked), dim=0)
lure_xs_unmasked = F.pad(lure_xs_unmasked, (0, MAX_STEPS), value=0.0)
if lure_y_scheduler_masked == "constant" and lure_ys_masked == None:
lure_ys_masked = initialize_or_scale(None, lure_y_masked, lure_y_end_step_masked).to(default_dtype)
prepend = torch.zeros(lure_y_start_step_masked, dtype=default_dtype, device=lure_ys_masked.device)
lure_ys_masked = torch.cat((prepend, lure_ys_masked), dim=0)
lure_ys_masked = F.pad(lure_ys_masked, (0, MAX_STEPS), value=0.0)
if lure_y_scheduler_unmasked == "constant" and lure_ys_unmasked == None:
lure_ys_unmasked = initialize_or_scale(None, lure_y_unmasked, lure_y_end_step_unmasked).to(default_dtype)
prepend = torch.zeros(lure_y_start_step_unmasked, dtype=default_dtype, device=lure_ys_unmasked.device)
lure_ys_unmasked = torch.cat((prepend, lure_ys_unmasked), dim=0)
lure_ys_unmasked = F.pad(lure_ys_unmasked, (0, MAX_STEPS), value=0.0)
guides = {
"guide_mode" : guide_mode,
"guide_masked" : guide_masked,
"guide_unmasked" : guide_unmasked,
"mask" : mask,
"unmask" : unmask,
"mask_sync" : mask_sync,
"mask_lure_x" : mask_lure_x,
"mask_lure_y" : mask_lure_y,
"weight_masked" : weight_masked,
"weight_unmasked" : weight_unmasked,
"weight_scheduler_masked" : weight_scheduler_masked,
"weight_scheduler_unmasked" : weight_scheduler_unmasked,
"start_step_masked" : weight_start_step_masked,
"start_step_unmasked" : weight_start_step_unmasked,
"end_step_masked" : weight_end_step_masked,
"end_step_unmasked" : weight_end_step_unmasked,
"weights_masked" : weights_masked,
"weights_unmasked" : weights_unmasked,
"weight_masked_sync" : sync_masked,
"weight_unmasked_sync" : sync_unmasked,
"weight_scheduler_masked_sync" : sync_scheduler_masked,
"weight_scheduler_unmasked_sync" : sync_scheduler_unmasked,
"start_step_masked_sync" : sync_start_step_masked,
"start_step_unmasked_sync" : sync_start_step_unmasked,
"end_step_masked_sync" : sync_end_step_masked,
"end_step_unmasked_sync" : sync_end_step_unmasked,
"weights_masked_sync" : syncs_masked,
"weights_unmasked_sync" : syncs_unmasked,
"drift_x_data" : drift_x_data,
"drift_x_sync" : drift_x_sync,
"drift_y_data" : drift_y_data,
"drift_y_sync" : drift_y_sync,
"drift_y_guide" : drift_y_guide,
"weight_masked_drift_x" : drift_x_masked,
"weight_unmasked_drift_x" : drift_x_unmasked,
"weight_scheduler_masked_drift_x" : drift_x_scheduler_masked,
"weight_scheduler_unmasked_drift_x" : drift_x_scheduler_unmasked,
"start_step_masked_drift_x" : drift_x_start_step_masked,
"start_step_unmasked_drift_x" : drift_x_start_step_unmasked,
"end_step_masked_drift_x" : drift_x_end_step_masked,
"end_step_unmasked_drift_x" : drift_x_end_step_unmasked,
"weights_masked_drift_x" : drift_xs_masked,
"weights_unmasked_drift_x" : drift_xs_unmasked,
"weight_masked_drift_y" : drift_y_masked,
"weight_unmasked_drift_y" : drift_y_unmasked,
"weight_scheduler_masked_drift_y" : drift_y_scheduler_masked,
"weight_scheduler_unmasked_drift_y" : drift_y_scheduler_unmasked,
"start_step_masked_drift_y" : drift_y_start_step_masked,
"start_step_unmasked_drift_y" : drift_y_start_step_unmasked,
"end_step_masked_drift_y" : drift_y_end_step_masked,
"end_step_unmasked_drift_y" : drift_y_end_step_unmasked,
"weights_masked_drift_y" : drift_ys_masked,
"weights_unmasked_drift_y" : drift_ys_unmasked,
"weight_masked_lure_x" : lure_x_masked,
"weight_unmasked_lure_x" : lure_x_unmasked,
"weight_scheduler_masked_lure_x" : lure_x_scheduler_masked,
"weight_scheduler_unmasked_lure_x" : lure_x_scheduler_unmasked,
"start_step_masked_lure_x" : lure_x_start_step_masked,
"start_step_unmasked_lure_x" : lure_x_start_step_unmasked,
"end_step_masked_lure_x" : lure_x_end_step_masked,
"end_step_unmasked_lure_x" : lure_x_end_step_unmasked,
"weights_masked_lure_x" : lure_xs_masked,
"weights_unmasked_lure_x" : lure_xs_unmasked,
"weight_masked_lure_y" : lure_y_masked,
"weight_unmasked_lure_y" : lure_y_unmasked,
"weight_scheduler_masked_lure_y" : lure_y_scheduler_masked,
"weight_scheduler_unmasked_lure_y" : lure_y_scheduler_unmasked,
"start_step_masked_lure_y" : lure_y_start_step_masked,
"start_step_unmasked_lure_y" : lure_y_start_step_unmasked,
"end_step_masked_lure_y" : lure_y_end_step_masked,
"end_step_unmasked_lure_y" : lure_y_end_step_unmasked,
"weights_masked_lure_y" : lure_ys_masked,
"weights_unmasked_lure_y" : lure_ys_unmasked,
"sync_lure_iter" : lure_iter,
"sync_lure_sequence" : lure_sequence,
"cutoff_masked" : cutoff_masked,
"cutoff_unmasked" : cutoff_unmasked
}
return (guides, )
class ClownGuide_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}),
"channelwise_mode": ("BOOLEAN", {"default": True}),
"projection_mode": ("BOOLEAN", {"default": True}),
"weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"cutoff": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide": ("LATENT", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight_scheduler = "constant",
weight_scheduler_unmasked = "constant",
start_step = 0,
start_step_unmasked = 0,
end_step = 30,
end_step_unmasked = 30,
cutoff = 1.0,
cutoff_unmasked = 1.0,
guide = None,
guide_unmasked = None,
weight = 0.0,
weight_unmasked = 0.0,
guide_mode = "epsilon",
channelwise_mode = False,
projection_mode = False,
weights = None,
weights_unmasked = None,
mask = None,
unmask = None,
invert_mask = False,
):
CG = ClownGuides_Beta()
mask = 1-mask if mask is not None else None
if end_step == -1:
end_step = MAX_STEPS
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if False: # raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
if guide_unmasked is not None:
raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()}
else:
guide_unmasked = {'samples': guide_unmasked['samples'].clone()}
guides, = CG.main(
weight_scheduler_masked = weight_scheduler,
weight_scheduler_unmasked = weight_scheduler_unmasked,
start_step_masked = start_step,
start_step_unmasked = start_step_unmasked,
end_step_masked = end_step,
end_step_unmasked = end_step_unmasked,
cutoff_masked = cutoff,
cutoff_unmasked = cutoff_unmasked,
guide_masked = guide,
guide_unmasked = guide_unmasked,
weight_masked = weight,
weight_unmasked = weight_unmasked,
guide_mode = guide_mode,
channelwise_mode = channelwise_mode,
projection_mode = projection_mode,
weights_masked = weights,
weights_unmasked = weights_unmasked,
mask = mask,
unmask = unmask,
invert_mask = invert_mask
)
return (guides, )
#return (guides[0], )
class ClownGuides_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}),
"channelwise_mode": ("BOOLEAN", {"default": True}),
"projection_mode": ("BOOLEAN", {"default": True}),
"weight_masked": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"weight_unmasked": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"cutoff_masked": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"cutoff_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"weight_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"weight_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step_masked": ("INT", {"default": 15, "min": -1, "max": 10000}),
"end_step_unmasked": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide_masked": ("LATENT", ),
"guide_unmasked": ("LATENT", ),
"mask": ("MASK", ),
"weights_masked": ("SIGMAS", ),
"weights_unmasked": ("SIGMAS", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight_scheduler_masked = "constant",
weight_scheduler_unmasked = "constant",
start_step_masked = 0,
start_step_unmasked = 0,
end_step_masked = 30,
end_step_unmasked = 30,
cutoff_masked = 1.0,
cutoff_unmasked = 1.0,
guide_masked = None,
guide_unmasked = None,
weight_masked = 0.0,
weight_unmasked = 0.0,
guide_mode = "epsilon",
channelwise_mode = False,
projection_mode = False,
weights_masked = None,
weights_unmasked = None,
mask = None,
unmask = None,
invert_mask = False,
):
default_dtype = torch.float64
if end_step_masked == -1:
end_step_masked = MAX_STEPS
if end_step_unmasked == -1:
end_step_unmasked = MAX_STEPS
if guide_masked is None:
weight_scheduler_masked = "constant"
start_step_masked = 0
end_step_masked = 30
cutoff_masked = 1.0
guide_masked = None
weight_masked = 0.0
weights_masked = None
#mask = None
if guide_unmasked is None:
weight_scheduler_unmasked = "constant"
start_step_unmasked = 0
end_step_unmasked = 30
cutoff_unmasked = 1.0
guide_unmasked = None
weight_unmasked = 0.0
weights_unmasked = None
#unmask = None
if guide_masked is not None:
raw_x = guide_masked.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_masked = {'samples': guide_masked['state_info']['raw_x'].clone()}
else:
guide_masked = {'samples': guide_masked['samples'].clone()}
if guide_unmasked is not None:
raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()}
else:
guide_unmasked = {'samples': guide_unmasked['samples'].clone()}
if invert_mask and mask is not None:
mask = 1-mask
if projection_mode:
guide_mode = guide_mode + "_projection"
if channelwise_mode:
guide_mode = guide_mode + "_cw"
if guide_mode == "unsample_cw":
guide_mode = "unsample"
if guide_mode == "resample_cw":
guide_mode = "resample"
if weight_scheduler_masked == "constant" and weights_masked == None:
weights_masked = initialize_or_scale(None, weight_masked, end_step_masked).to(default_dtype)
prepend = torch.zeros(start_step_masked, dtype=default_dtype, device=weights_masked.device)
weights_masked = torch.cat((prepend, weights_masked), dim=0)
weights_masked = F.pad(weights_masked, (0, MAX_STEPS), value=0.0)
if weight_scheduler_unmasked == "constant" and weights_unmasked == None:
weights_unmasked = initialize_or_scale(None, weight_unmasked, end_step_unmasked).to(default_dtype)
prepend = torch.zeros(start_step_unmasked, dtype=default_dtype, device=weights_unmasked.device)
weights_unmasked = torch.cat((prepend, weights_unmasked), dim=0)
weights_unmasked = F.pad(weights_unmasked, (0, MAX_STEPS), value=0.0)
guides = {
"guide_mode" : guide_mode,
"weight_masked" : weight_masked,
"weight_unmasked" : weight_unmasked,
"weights_masked" : weights_masked,
"weights_unmasked" : weights_unmasked,
"guide_masked" : guide_masked,
"guide_unmasked" : guide_unmasked,
"mask" : mask,
"unmask" : unmask,
"weight_scheduler_masked" : weight_scheduler_masked,
"weight_scheduler_unmasked" : weight_scheduler_unmasked,
"start_step_masked" : start_step_masked,
"start_step_unmasked" : start_step_unmasked,
"end_step_masked" : end_step_masked,
"end_step_unmasked" : end_step_unmasked,
"cutoff_masked" : cutoff_masked,
"cutoff_unmasked" : cutoff_unmasked
}
return (guides, )
class ClownGuidesAB_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}),
"channelwise_mode": ("BOOLEAN", {"default": False}),
"projection_mode": ("BOOLEAN", {"default": False}),
"weight_A": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"weight_B": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"cutoff_A": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"cutoff_B": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"weight_scheduler_A": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"weight_scheduler_B": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"start_step_A": ("INT", {"default": 0, "min": 0, "max": 10000}),
"start_step_B": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step_A": ("INT", {"default": 15, "min": -1, "max": 10000}),
"end_step_B": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_masks": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide_A": ("LATENT", ),
"guide_B": ("LATENT", ),
"mask_A": ("MASK", ),
"mask_B": ("MASK", ),
"weights_A": ("SIGMAS", ),
"weights_B": ("SIGMAS", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight_scheduler_A = "constant",
weight_scheduler_B = "constant",
start_step_A = 0,
start_step_B = 0,
end_step_A = 30,
end_step_B = 30,
cutoff_A = 1.0,
cutoff_B = 1.0,
guide_A = None,
guide_B = None,
weight_A = 0.0,
weight_B = 0.0,
guide_mode = "epsilon",
channelwise_mode = False,
projection_mode = False,
weights_A = None,
weights_B = None,
mask_A = None,
mask_B = None,
invert_masks : bool = False,
):
default_dtype = torch.float64
if end_step_A == -1:
end_step_A = MAX_STEPS
if end_step_B == -1:
end_step_B = MAX_STEPS
if guide_A is not None:
raw_x = guide_A.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_A = {'samples': guide_A['state_info']['raw_x'].clone()}
else:
guide_A = {'samples': guide_A['samples'].clone()}
if guide_B is not None:
raw_x = guide_B.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_B = {'samples': guide_B['state_info']['raw_x'].clone()}
else:
guide_B = {'samples': guide_B['samples'].clone()}
if guide_A is None:
guide_A = guide_B
guide_B = None
mask_A = mask_B
mask_B = None
weight_B = 0.0
if guide_B is None:
weight_B = 0.0
if mask_A is None and mask_B is not None:
mask_A = 1-mask_B
if projection_mode:
guide_mode = guide_mode + "_projection"
if channelwise_mode:
guide_mode = guide_mode + "_cw"
if guide_mode == "unsample_cw":
guide_mode = "unsample"
if guide_mode == "resample_cw":
guide_mode = "resample"
if weight_scheduler_A == "constant" and weights_A == None:
weights_A = initialize_or_scale(None, weight_A, end_step_A).to(default_dtype)
prepend = torch.zeros(start_step_A, dtype=default_dtype, device=weights_A.device)
weights_A = torch.cat((prepend, weights_A), dim=0)
weights_A = F.pad(weights_A, (0, MAX_STEPS), value=0.0)
if weight_scheduler_B == "constant" and weights_B == None:
weights_B = initialize_or_scale(None, weight_B, end_step_B).to(default_dtype)
prepend = torch.zeros(start_step_B, dtype=default_dtype, device=weights_B.device)
weights_B = torch.cat((prepend, weights_B), dim=0)
weights_B = F.pad(weights_B, (0, MAX_STEPS), value=0.0)
if invert_masks:
mask_A = 1-mask_A if mask_A is not None else None
mask_B = 1-mask_B if mask_B is not None else None
guides = {
"guide_mode" : guide_mode,
"weight_masked" : weight_A,
"weight_unmasked" : weight_B,
"weights_masked" : weights_A,
"weights_unmasked" : weights_B,
"guide_masked" : guide_A,
"guide_unmasked" : guide_B,
"mask" : mask_A,
"unmask" : mask_B,
"weight_scheduler_masked" : weight_scheduler_A,
"weight_scheduler_unmasked" : weight_scheduler_B,
"start_step_masked" : start_step_A,
"start_step_unmasked" : start_step_B,
"end_step_masked" : end_step_A,
"end_step_unmasked" : end_step_B,
"cutoff_masked" : cutoff_A,
"cutoff_unmasked" : cutoff_B
}
return (guides, )
class ClownOptions_Combine:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"options": ("OPTIONS",),
},
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self, options, **kwargs):
options_mgr = OptionsManager(options, **kwargs)
return (options_mgr.as_dict(),)
class ClownOptions_Frameweights:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"config_name": (FRAME_WEIGHTS_CONFIG_NAMES, {"default": "frame_weights", "tooltip": "Apply to specific type of per-frame weights."}),
"dynamics": (FRAME_WEIGHTS_DYNAMICS_NAMES, {"default": "ease_out", "tooltip": "The function type used for the dynamic period. constant: no change, linear: steady change, ease_out: starts fast, ease_in: starts slow"}),
"schedule": (FRAME_WEIGHTS_SCHEDULE_NAMES, {"default": "moderate_early", "tooltip": "fast_early: fast change starts immediately, slow_late: slow change starts later"}),
"scale": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The amount of change over the course of the frame weights. 1.0 means that the guides have no influence by the end."}),
"reverse": ("BOOLEAN", {"default": False, "tooltip": "Reverse the frame weights"}),
},
"optional": {
"frame_weights": ("SIGMAS", {"tooltip": "Overrides all other settings EXCEPT reverse."}),
"custom_string": ("STRING", {"tooltip": "Overrides all other settings EXCEPT reverse.", "multiline": True}),
"options": ("OPTIONS",),
},
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
config_name,
dynamics,
schedule,
scale,
reverse,
frame_weights = None,
custom_string = None,
options = None,
):
options_mgr = OptionsManager(options if options is not None else {})
frame_weights_mgr = options_mgr.get("frame_weights_mgr")
if frame_weights_mgr is None:
frame_weights_mgr = FrameWeightsManager()
if custom_string is not None and custom_string.strip() == "":
custom_string = None
frame_weights_mgr.add_weight_config(
config_name,
dynamics=dynamics,
schedule=schedule,
scale=scale,
is_reversed=reverse,
frame_weights=frame_weights,
custom_string=custom_string
)
options_mgr.update("frame_weights_mgr", frame_weights_mgr)
return (options_mgr.as_dict(),)
class SharkOptions_GuiderInput:
@classmethod
def INPUT_TYPES(s):
return {"required":
{"guider": ("GUIDER", ),
},
"optional":
{"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self, guider, options=None):
options_mgr = OptionsManager(options if options is not None else {})
if isinstance(guider, dict):
guider = guider.get('samples', None)
if isinstance(guider, torch.Tensor):
guider = guider.detach().cpu()
if options_mgr is None:
options_mgr = OptionsManager()
options_mgr.update("guider", guider)
return (options_mgr.as_dict(), )
class ClownGuide_AdaIN_MMDiT_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"double_blocks" : ("STRING", {"default": "", "multiline": True}),
"double_weights" : ("STRING", {"default": "", "multiline": True}),
"single_blocks" : ("STRING", {"default": "20", "multiline": True}),
"single_weights" : ("STRING", {"default": "0.5", "multiline": True}),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide": ("LATENT", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight = 1.0,
weight_scheduler = "constant",
double_weights = "0.1",
single_weights = "0.0",
double_blocks = "all",
single_blocks = "all",
start_step = 0,
end_step = 15,
invert_mask = False,
guide = None,
mask = None,
weights = None,
guides = None,
):
default_dtype = torch.float64
mask = 1-mask if mask is not None else None
double_weights = parse_range_string(double_weights)
single_weights = parse_range_string(single_weights)
if len(double_weights) == 0:
double_weights.append(0.0)
if len(single_weights) == 0:
single_weights.append(0.0)
if len(double_weights) == 1:
double_weights = double_weights * 100
if len(single_weights) == 1:
single_weights = single_weights * 100
if type(double_weights[0]) == int:
double_weights = [float(val) for val in double_weights]
if type(single_weights[0]) == int:
single_weights = [float(val) for val in single_weights]
if double_blocks == "all":
double_blocks = [val for val in range(100)]
if len(double_weights) == 1:
double_weights = [double_weights[0]] * 100
else:
double_blocks = parse_range_string(double_blocks)
weights_expanded = [0.0] * 100
for b, w in zip(double_blocks, double_weights):
weights_expanded[b] = w
double_weights = weights_expanded
if single_blocks == "all":
single_blocks = [val for val in range(100)]
if len(single_weights) == 1:
single_weights = [single_weights[0]] * 100
else:
single_blocks = parse_range_string(single_blocks)
weights_expanded = [0.0] * 100
for b, w in zip(single_blocks, single_weights):
weights_expanded[b] = w
single_weights = weights_expanded
if end_step == -1:
end_step = MAX_STEPS
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
if weight_scheduler == "constant": # and weights == None:
weights = initialize_or_scale(None, weight, end_step).to(default_dtype)
prepend = torch.zeros(start_step).to(weights)
weights = torch.cat([prepend, weights])
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
guides = copy.deepcopy(guides) if guides is not None else {}
guides['weight_adain'] = weight
guides['weights_adain'] = weights
guides['blocks_adain_mmdit'] = {
"double_weights": double_weights,
"single_weights": single_weights,
"double_blocks" : double_blocks,
"single_blocks" : single_blocks,
}
guides['guide_adain'] = guide
guides['mask_adain'] = mask
guides['weight_scheduler_adain'] = weight_scheduler
guides['start_step_adain'] = start_step
guides['end_step_adain'] = end_step
return (guides, )
class ClownGuide_AttnInj_MMDiT_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"double_blocks" : ("STRING", {"default": "0,1,3", "multiline": True}),
"double_weights" : ("STRING", {"default": "1.0", "multiline": True}),
"single_blocks" : ("STRING", {"default": "20", "multiline": True}),
"single_weights" : ("STRING", {"default": "0.5", "multiline": True}),
"img_q": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"img_k": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"img_v": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_q": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_k": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_v": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"img_q_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"img_k_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"img_v_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_q_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_k_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_v_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide": ("LATENT", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight = 1.0,
weight_scheduler = "constant",
double_weights = "0.1",
single_weights = "0.0",
double_blocks = "all",
single_blocks = "all",
img_q = 0.0,
img_k = 0.0,
img_v = 0.0,
txt_q = 0.0,
txt_k = 0.0,
txt_v = 0.0,
img_q_norm = 0.0,
img_k_norm = 0.0,
img_v_norm = 0.0,
txt_q_norm = 0.0,
txt_k_norm = 0.0,
txt_v_norm = 0.0,
start_step = 0,
end_step = 15,
invert_mask = False,
guide = None,
mask = None,
weights = None,
guides = None,
):
default_dtype = torch.float64
mask = 1-mask if mask is not None else None
double_weights = parse_range_string(double_weights)
single_weights = parse_range_string(single_weights)
if len(double_weights) == 0:
double_weights.append(0.0)
if len(single_weights) == 0:
single_weights.append(0.0)
if len(double_weights) == 1:
double_weights = double_weights * 100
if len(single_weights) == 1:
single_weights = single_weights * 100
if type(double_weights[0]) == int:
double_weights = [float(val) for val in double_weights]
if type(single_weights[0]) == int:
single_weights = [float(val) for val in single_weights]
if double_blocks == "all":
double_blocks = [val for val in range(100)]
if len(double_weights) == 1:
double_weights = [double_weights[0]] * 100
else:
double_blocks = parse_range_string(double_blocks)
weights_expanded = [0.0] * 100
for b, w in zip(double_blocks, double_weights):
weights_expanded[b] = w
double_weights = weights_expanded
if single_blocks == "all":
single_blocks = [val for val in range(100)]
if len(single_weights) == 1:
single_weights = [single_weights[0]] * 100
else:
single_blocks = parse_range_string(single_blocks)
weights_expanded = [0.0] * 100
for b, w in zip(single_blocks, single_weights):
weights_expanded[b] = w
single_weights = weights_expanded
if end_step == -1:
end_step = MAX_STEPS
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
if weight_scheduler == "constant": # and weights == None:
weights = initialize_or_scale(None, weight, end_step).to(default_dtype)
prepend = torch.zeros(start_step).to(weights)
weights = torch.cat([prepend, weights])
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
guides = copy.deepcopy(guides) if guides is not None else {}
guides['weight_attninj'] = weight
guides['weights_attninj'] = weights
guides['blocks_attninj_mmdit'] = {
"double_weights": double_weights,
"single_weights": single_weights,
"double_blocks" : double_blocks,
"single_blocks" : single_blocks,
}
guides['blocks_attninj_qkv'] = {
"img_q": img_q,
"img_k": img_k,
"img_v": img_v,
"txt_q": txt_q,
"txt_k": txt_k,
"txt_v": txt_v,
"img_q_norm": img_q_norm,
"img_k_norm": img_k_norm,
"img_v_norm": img_v_norm,
"txt_q_norm": txt_q_norm,
"txt_k_norm": txt_k_norm,
"txt_v_norm": txt_v_norm,
}
guides['guide_attninj'] = guide
guides['mask_attninj'] = mask
guides['weight_scheduler_attninj'] = weight_scheduler
guides['start_step_attninj'] = start_step
guides['end_step_attninj'] = end_step
return (guides, )
class ClownGuide_StyleNorm_Advanced_HiDream:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"double_blocks" : ("STRING", {"default": "all", "multiline": True}),
"double_weights" : ("STRING", {"default": "1.0", "multiline": True}),
"single_blocks" : ("STRING", {"default": "all", "multiline": True}),
"single_weights" : ("STRING", {"default": "1.0", "multiline": True}),
"mode": (["scattersort", "AdaIN"], {"default": "scattersort"},),
"noise_mode": (["direct", "update", "smart", "recon", "bonanza"], {"default": "smart"},),
#"shared_experts": ("BOOLEAN", {"default": False}),
"ff_1" : ("BOOLEAN", {"default": False}),
"ff_1_silu" : ("BOOLEAN", {"default": False}),
"ff_3" : ("BOOLEAN", {"default": False}),
"ff_13" : ("BOOLEAN", {"default": False}),
"ff_2" : ("BOOLEAN", {"default": False}),
"moe_gate" : ("BOOLEAN", {"default": False}),
"topk_weight" : ("BOOLEAN", {"default": False}),
"moe_ff_1" : ("BOOLEAN", {"default": False}),
"moe_ff_1_silu" : ("BOOLEAN", {"default": False}),
"moe_ff_3" : ("BOOLEAN", {"default": False}),
"moe_ff_13" : ("BOOLEAN", {"default": False}),
"moe_ff_2" : ("BOOLEAN", {"default": False}),
"moe_sum" : ("BOOLEAN", {"default": False}),
"moe_out" : ("BOOLEAN", {"default": False}),
"double_img_io": ("BOOLEAN", {"default": False}),
"double_img_norm0": ("BOOLEAN", {"default": False}),
"double_img_attn": ("BOOLEAN", {"default": False}),
"double_img_attn_gated": ("BOOLEAN", {"default": False}),
"double_img": ("BOOLEAN", {"default": False}),
"double_img_norm1": ("BOOLEAN", {"default": False}),
"double_img_ff_i": ("BOOLEAN", {"default": False}),
"double_txt_io": ("BOOLEAN", {"default": False}),
"double_txt_norm0": ("BOOLEAN", {"default": False}),
"double_txt_attn": ("BOOLEAN", {"default": False}),
"double_txt_attn_gated": ("BOOLEAN", {"default": False}),
"double_txt": ("BOOLEAN", {"default": False}),
"double_txt_norm1": ("BOOLEAN", {"default": False}),
"double_txt_ff_t": ("BOOLEAN", {"default": False}),
"single_img_io": ("BOOLEAN", {"default": False}),
"single_img_norm0": ("BOOLEAN", {"default": False}),
"single_img_attn": ("BOOLEAN", {"default": False}),
"single_img_attn_gated": ("BOOLEAN", {"default": False}),
"single_img": ("BOOLEAN", {"default": False}),
"single_img_norm1": ("BOOLEAN", {"default": False}),
"single_img_ff_i": ("BOOLEAN", {"default": False}),
"attn_img_q_norm" : ("BOOLEAN", {"default": False}),
"attn_img_k_norm" : ("BOOLEAN", {"default": False}),
"attn_img_v_norm" : ("BOOLEAN", {"default": False}),
"attn_txt_q_norm" : ("BOOLEAN", {"default": False}),
"attn_txt_k_norm" : ("BOOLEAN", {"default": False}),
"attn_txt_v_norm" : ("BOOLEAN", {"default": False}),
"attn_img_double" : ("BOOLEAN", {"default": False}),
"attn_txt_double" : ("BOOLEAN", {"default": False}),
"attn_img_single" : ("BOOLEAN", {"default": False}),
"proj_out" : ("BOOLEAN", {"default": False}),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide": ("LATENT", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
EXPERIMENTAL = True
def main(self,
weight = 1.0,
weight_scheduler = "constant",
mode = "scattersort",
noise_mode = "smart",
double_weights = "0.1",
single_weights = "0.0",
double_blocks = "all",
single_blocks = "all",
start_step = 0,
end_step = 15,
invert_mask = False,
moe_gate = False,
topk_weight = False,
moe_out = False,
moe_sum = False,
ff_1 = False,
ff_1_silu = False,
ff_3 = False,
ff_13 = False,
ff_2 = False,
shared_experts = False,
moe_ff_1 = False,
moe_ff_1_silu = False,
moe_ff_3 = False,
moe_ff_13 = False,
moe_ff_2 = False,
double_img_io = False,
double_img_norm0 = False,
double_img_attn = False,
double_img_norm1 = False,
double_img_attn_gated = False,
double_img = False,
double_img_ff_i = False,
double_txt_io = False,
double_txt_norm0 = False,
double_txt_attn = False,
double_txt_attn_gated = False,
double_txt = False,
double_txt_norm1 = False,
double_txt_ff_t = False,
single_img_io = False,
single_img_norm0 = False,
single_img_attn = False,
single_img_attn_gated = False,
single_img = False,
single_img_norm1 = False,
single_img_ff_i = False,
attn_img_q_norm = False,
attn_img_k_norm = False,
attn_img_v_norm = False,
attn_txt_q_norm = False,
attn_txt_k_norm = False,
attn_txt_v_norm = False,
attn_img_single = False,
attn_img_double = False,
attn_txt_double = False,
proj_out = False,
guide = None,
mask = None,
weights = None,
guides = None,
):
default_dtype = torch.float64
mask = 1-mask if mask is not None else None
double_weights = parse_range_string(double_weights)
single_weights = parse_range_string(single_weights)
if len(double_weights) == 0:
double_weights.append(0.0)
if len(single_weights) == 0:
single_weights.append(0.0)
if len(double_weights) == 1:
double_weights = double_weights * 100
if len(single_weights) == 1:
single_weights = single_weights * 100
if type(double_weights[0]) == int:
double_weights = [float(val) for val in double_weights]
if type(single_weights[0]) == int:
single_weights = [float(val) for val in single_weights]
if double_blocks == "all":
double_blocks = [val for val in range(100)]
if len(double_weights) == 1:
double_weights = [double_weights[0]] * 100
else:
double_blocks = parse_range_string(double_blocks)
weights_expanded = [0.0] * 100
for b, w in zip(double_blocks, double_weights):
weights_expanded[b] = w
double_weights = weights_expanded
if single_blocks == "all":
single_blocks = [val for val in range(100)]
if len(single_weights) == 1:
single_weights = [single_weights[0]] * 100
else:
single_blocks = parse_range_string(single_blocks)
weights_expanded = [0.0] * 100
for b, w in zip(single_blocks, single_weights):
weights_expanded[b] = w
single_weights = weights_expanded
if end_step == -1:
end_step = MAX_STEPS
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
if weight_scheduler == "constant": # and weights == None:
weights = initialize_or_scale(None, weight, end_step).to(default_dtype)
prepend = torch.zeros(start_step).to(weights)
weights = torch.cat([prepend, weights])
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
guides = copy.deepcopy(guides) if guides is not None else {}
guides['weight_adain'] = weight
guides['weights_adain'] = weights
guides['blocks_adain_mmdit'] = {
"double_weights": double_weights,
"single_weights": single_weights,
"double_blocks" : double_blocks,
"single_blocks" : single_blocks,
}
guides['sort_and_scatter'] = {
"mode" : mode,
"noise_mode" : noise_mode,
"moe_gate" : moe_gate,
"topk_weight" : topk_weight,
"moe_sum" : moe_sum,
"moe_out" : moe_out,
"ff_1" : ff_1,
"ff_1_silu" : ff_1_silu,
"ff_3" : ff_3,
"ff_13" : ff_13,
"ff_2" : ff_2,
"moe_ff_1" : moe_ff_1,
"moe_ff_1_silu" : moe_ff_1_silu,
"moe_ff_3" : moe_ff_3,
"moe_ff_13" : moe_ff_13,
"moe_ff_2" : moe_ff_2,
"shared_experts" : shared_experts,
"double_img_io" : double_img_io,
"double_img_norm0" : double_img_norm0,
"double_img_attn" : double_img_attn,
"double_img_norm1" : double_img_norm1,
"double_img_attn_gated" : double_img_attn_gated,
"double_img" : double_img,
"double_img_ff_i" : double_img_ff_i,
"double_txt_io" : double_txt_io,
"double_txt_norm0" : double_txt_norm0,
"double_txt_attn" : double_txt_attn,
"double_txt_attn_gated" : double_txt_attn_gated,
"double_txt" : double_txt,
"double_txt_norm1" : double_txt_norm1,
"double_txt_ff_t" : double_txt_ff_t,
"single_img_io" : single_img_io,
"single_img_norm0" : single_img_norm0,
"single_img_attn" : single_img_attn,
"single_img_attn_gated" : single_img_attn_gated,
"single_img" : single_img,
"single_img_norm1" : single_img_norm1,
"single_img_ff_i" : single_img_ff_i,
"attn_img_q_norm" : attn_img_q_norm,
"attn_img_k_norm" : attn_img_k_norm,
"attn_img_v_norm" : attn_img_v_norm,
"attn_txt_q_norm" : attn_txt_q_norm,
"attn_txt_k_norm" : attn_txt_k_norm,
"attn_txt_v_norm" : attn_txt_v_norm,
"attn_img_single" : attn_img_single,
"attn_img_double" : attn_img_double,
"proj_out" : proj_out,
}
guides['guide_adain'] = guide
guides['mask_adain'] = mask
guides['weight_scheduler_adain'] = weight_scheduler
guides['start_step_adain'] = start_step
guides['end_step_adain'] = end_step
return (guides, )
from ..style_transfer import StyleMMDiT_Model, StyleUNet_Model, DEFAULT_BLOCK_WEIGHTS_MMDIT, DEFAULT_ATTN_WEIGHTS_MMDIT, DEFAULT_BASE_WEIGHTS_MMDIT
STYLE_MODES = [
"none",
#"sinkhornsort",
"scattersort_dir",
"scattersort_dir2",
"scattersort",
"tiled_scattersort",
"AdaIN",
"tiled_AdaIN",
"WCT",
"WCT2",
"injection",
]
class ClownStyle_Boost:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"noise_mode": (["direct", "update", "smart", "recon", "bonanza"], {"default": "update"},),
"recon_lure": (STYLE_MODES, {"default": "WCT", "tooltip": "Only used if noise_mode = recon. Can increase the strength of the style."},),
"datashock": (STYLE_MODES, {"default": "scattersort", "tooltip": "Will drastically increase the strength at low denoise levels. Use with img2img workflows."},),
"datashock_weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}),
"datashock_start_step": ("INT", {"default": 0, "min": 0, "max": 10000, "step": 1, "tooltip": "Start step for data shock."}),
"datashock_end_step" : ("INT", {"default": 1, "min": 1, "max": 10000, "step": 1, "tooltip": "End step for data shock."}),
"tile_h" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"tile_w" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
},
"optional":
{
"guides": ("GUIDES", ),
#"datashock_weights": ("SIGMAS",),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
noise_mode = "update",
recon_lure = "default",
datashock = None,
datashock_weight = 1.0,
datashock_start_step = None,
datashock_end_step = None,
tile_h = 0,
tile_w = 0,
guides = None,
):
guides = copy.deepcopy(guides) if guides is not None else {}
StyleMMDiT = guides.get('StyleMMDiT')
if StyleMMDiT is None:
StyleMMDiT = StyleMMDiT_Model()
weights = {
"h_tile" : tile_h // 16,
"w_tile" : tile_w // 16,
}
StyleMMDiT.set_weights(**weights)
StyleMMDiT.noise_mode = noise_mode
StyleMMDiT.recon_lure = recon_lure
StyleMMDiT.data_shock = datashock
StyleMMDiT.data_shock_weight = datashock_weight
StyleMMDiT.data_shock_start_step = datashock_start_step
StyleMMDiT.data_shock_end_step = datashock_end_step
guides['StyleMMDiT'] = StyleMMDiT
return (guides,)
#guides['StyleMMDiT'].noise_mode = noise_mode
#guides['StyleMMDiT'].recon_lure = recon_lure
#guides['StyleMMDiT'].data_shock = datashock
#guides['StyleMMDiT'].data_shock_start_step = datashock_start_step
#guides['StyleMMDiT'].data_shock_end_step = datashock_end_step
#return (guides, )
class ClownStyle_MMDiT:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"mode": (STYLE_MODES, {"default": "scattersort"},),
"proj_in": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"proj_out": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"tile_h" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"tile_w" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
#"start_step": ("INT", {"default": 0, "min": 16, "max": 10000, "step": 1, "tooltip": "Start step for data shock."}),
#"end_step" : ("INT", {"default": 1, "min": 16, "max": 10000, "step": 1, "tooltip": "End step for data shock."}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"positive" : ("CONDITIONING", ),
"negative" : ("CONDITIONING", ),
"guide": ("LATENT", ),
"mask": ("MASK", ),
"blocks": ("BLOCKS", ),
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
mode = "scattersort",
proj_in = 0.0,
proj_out = 0.0,
tile_h = 128,
tile_w = 128,
invert_mask = False,
positive = None,
negative = None,
guide = None,
mask = None,
blocks = None,
guides = None,
):
#mask = 1-mask if mask is not None else None
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
guides = copy.deepcopy(guides) if guides is not None else {}
blocks = copy.deepcopy(blocks) if blocks is not None else {}
StyleMMDiT = blocks.get('StyleMMDiT')
if StyleMMDiT is None:
StyleMMDiT = StyleMMDiT_Model()
weights = {
"proj_in" : proj_in,
"proj_out": proj_out,
"h_tile" : tile_h // 16,
"w_tile" : tile_w // 16,
}
StyleMMDiT.set_mode(mode)
StyleMMDiT.set_weights(**weights)
StyleMMDiT.set_conditioning(positive, negative)
StyleMMDiT.mask = [mask]
StyleMMDiT.guides = [guide]
StyleMMDiT_ = guides.get('StyleMMDiT')
if StyleMMDiT_ is not None:
StyleMMDiT_.merge_weights(StyleMMDiT)
else:
StyleMMDiT_ = StyleMMDiT
guides['StyleMMDiT'] = StyleMMDiT_
return (guides, )
class ClownStyle_Block_MMDiT:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"mode": (STYLE_MODES, {"default": "scattersort"},),
"apply_to": (["img", "img+txt","img,txt", "txt",], {"default": "img+txt"},),
"block_type": (["double", "double,single", "single"], {"default": "single"},),
"block_list": ("STRING", {"default": "all", "multiline": True}),
"block_weights": ("STRING", {"default": "1.0", "multiline": True}),
"attn_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"attn_norm_mod": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"attn": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"attn_gated": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"attn_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"ff_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"ff_norm_mod": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"ff": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"ff_gated": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"ff_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"invert_mask": ("BOOLEAN",{"default": False}),
},
"optional":
{
"mask": ("MASK", ),
"blocks": ("BLOCKS", ),
}
}
RETURN_TYPES = ("BLOCKS",)
RETURN_NAMES = ("blocks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
mode = "scattersort",
noise_mode = "update",
apply_to = "joint",
block_type = "double",
block_list = "all",
block_weights = "1.0",
attn_norm = 0.0,
attn_norm_mod = 0.0,
attn = 0.0,
attn_gated = 0.0,
attn_res = 0.0,
ff_norm = 0.0,
ff_norm_mod = 0.0,
ff = 0.0,
ff_gated = 0.0,
ff_res = 0.0,
tile_h = 128,
tile_w = 128,
invert_mask = False,
Attn = None,
MoE = None,
FF = None,
mask = None,
blocks = None,
):
#mask = 1-mask if mask is not None else None
blocks = copy.deepcopy(blocks) if blocks is not None else {}
block_weights = parse_range_string(block_weights)
if len(block_weights) == 0:
block_weights.append(0.0)
if len(block_weights) == 1:
block_weights = block_weights * 100
if type(block_weights[0]) == int:
block_weights = [float(val) for val in block_weights]
if "all" in block_list:
block_list = [val for val in range(100)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "even" in block_list:
block_list = [val for val in range(0, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "odd" in block_list:
block_list = [val for val in range(1, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
else:
block_list = parse_range_string_int(block_list)
weights_expanded = [0.0] * 100
for b, w in zip(block_list, block_weights):
weights_expanded[b] = w
block_weights = weights_expanded
StyleMMDiT = blocks.get('StyleMMDiT')
if StyleMMDiT is None:
StyleMMDiT = StyleMMDiT_Model()
weights = {
"attn_norm" : attn_norm,
"attn_norm_mod": attn_norm_mod,
"attn" : attn,
"attn_gated" : attn_gated,
"attn_res" : attn_res,
"ff_norm" : ff_norm,
"ff_norm_mod" : ff_norm_mod,
"ff" : ff,
"ff_gated" : ff_gated,
"ff_res" : ff_res,
"h_tile" : tile_h // 16,
"w_tile" : tile_w // 16,
}
block_types = block_type.split(",")
for block_type in block_types:
if block_type == "double":
style_blocks = StyleMMDiT.double_blocks
elif block_type == "single":
style_blocks = StyleMMDiT.single_blocks
for bid in block_list:
block = style_blocks[bid]
scaled_weights = {
k: (v * block_weights[bid]) if isinstance(v, float) else v
for k, v in weights.items()
}
if "img" in apply_to or block_type == "single":
block.img.set_mode(mode)
block.img.set_weights(**scaled_weights)
block.img.apply_to = [apply_to]
if "txt" in apply_to and block_type == "double":
mode = "scattersort" if mode == "tiled_scattersort" else mode
mode = "AdaIN" if mode == "tiled_AdaIN" else mode
block.txt.set_mode(mode)
block.txt.set_weights(**scaled_weights)
block.txt.apply_to = [apply_to]
block.img.apply_to = [apply_to]
if hasattr(block, "txt"):
block.txt.apply_to = [apply_to]
block.mask = [mask]
blocks['StyleMMDiT'] = StyleMMDiT
return (blocks, )
class ClownStyle_Attn_MMDiT:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"mode": (STYLE_MODES, {"default": "scattersort"},),
"apply_to": (["img","img+txt","img,txt","txt"], {"default": "img+txt"},),
"block_type": (["double", "double,single", "single"], {"default": "single"},),
"block_list": ("STRING", {"default": "all", "multiline": True}),
"block_weights": ("STRING", {"default": "1.0", "multiline": True}),
"q_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"k_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"v_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"q_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"k_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"out": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"mask": ("MASK", ),
"blocks": ("BLOCKS", ),
}
}
RETURN_TYPES = ("BLOCKS",)
RETURN_NAMES = ("blocks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
mode = "scattersort",
noise_mode = "update",
apply_to = "joint",
block_type = "double",
block_list = "all",
block_weights = "1.0",
q_proj = 0.0,
k_proj = 0.0,
v_proj = 0.0,
q_norm = 0.0,
k_norm = 0.0,
out = 0.0,
tile_h = 128,
tile_w = 128,
invert_mask = False,
mask = None,
blocks = None,
):
#mask = 1-mask if mask is not None else None
blocks = copy.deepcopy(blocks) if blocks is not None else {}
block_weights = parse_range_string(block_weights)
if len(block_weights) == 0:
block_weights.append(0.0)
if len(block_weights) == 1:
block_weights = block_weights * 100
if type(block_weights[0]) == int:
block_weights = [float(val) for val in block_weights]
if "all" in block_list:
block_list = [val for val in range(100)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "even" in block_list:
block_list = [val for val in range(0, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "odd" in block_list:
block_list = [val for val in range(1, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
else:
block_list = parse_range_string_int(block_list)
weights_expanded = [0.0] * 100
for b, w in zip(block_list, block_weights):
weights_expanded[b] = w
block_weights = weights_expanded
StyleMMDiT = blocks.get('StyleMMDiT')
if StyleMMDiT is None:
StyleMMDiT = StyleMMDiT_Model()
weights = {
"q_proj": q_proj,
"k_proj": k_proj,
"v_proj": v_proj,
"q_norm": q_norm,
"k_norm": k_norm,
"out" : out,
"h_tile": tile_h // 16,
"w_tile": tile_w // 16,
}
block_types = block_type.split(",")
for block_type in block_types:
if block_type == "double":
style_blocks = StyleMMDiT.double_blocks
elif block_type == "single":
style_blocks = StyleMMDiT.single_blocks
for bid in block_list:
block = style_blocks[bid]
scaled_weights = {
k: (v * block_weights[bid]) if isinstance(v, float) else v
for k, v in weights.items()
}
if "img" in apply_to or block_type == "single":
block.img.ATTN.set_mode(mode)
block.img.ATTN.set_weights(**scaled_weights)
block.img.ATTN.apply_to = [apply_to]
if "txt" in apply_to and block_type == "double":
mode = "scattersort" if mode == "tiled_scattersort" else mode
mode = "AdaIN" if mode == "tiled_AdaIN" else mode
block.txt.ATTN.set_mode(mode)
block.txt.ATTN.set_weights(**scaled_weights)
block.txt.ATTN.apply_to = [apply_to]
block.img.ATTN.apply_to = [apply_to]
if hasattr(block, "txt"):
block.txt.ATTN.apply_to = [apply_to]
block.attn_mask = [mask]
blocks['StyleMMDiT'] = StyleMMDiT
return (blocks, )
class ClownStyle_UNet:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"mode": (STYLE_MODES, {"default": "scattersort"},),
"proj_in": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"proj_out": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"tile_h" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"tile_w" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
#"start_step": ("INT", {"default": 0, "min": 16, "max": 10000, "step": 1, "tooltip": "Start step for data shock."}),
#"end_step" : ("INT", {"default": 1, "min": 16, "max": 10000, "step": 1, "tooltip": "End step for data shock."}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"positive" : ("CONDITIONING", ),
"negative" : ("CONDITIONING", ),
"guide": ("LATENT", ),
"mask": ("MASK", ),
"blocks": ("BLOCKS", ),
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
mode = "scattersort",
proj_in = 0.0,
proj_out = 0.0,
tile_h = 128,
tile_w = 128,
invert_mask = False,
positive = None,
negative = None,
guide = None,
mask = None,
blocks = None,
guides = None,
):
#mask = 1-mask if mask is not None else None
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
guides = copy.deepcopy(guides) if guides is not None else {}
blocks = copy.deepcopy(blocks) if blocks is not None else {}
StyleMMDiT = blocks.get('StyleMMDiT')
if StyleMMDiT is None:
StyleMMDiT = StyleUNet_Model()
weights = {
"proj_in" : proj_in,
"proj_out": proj_out,
"h_tile" : tile_h // 8,
"w_tile" : tile_w // 8,
}
StyleMMDiT.set_mode(mode)
StyleMMDiT.set_weights(**weights)
StyleMMDiT.set_conditioning(positive, negative)
StyleMMDiT.mask = [mask]
StyleMMDiT.guides = [guide]
StyleMMDiT_ = guides.get('StyleMMDiT')
if StyleMMDiT_ is not None:
StyleMMDiT_.merge_weights(StyleMMDiT)
else:
StyleMMDiT_ = StyleMMDiT
guides['StyleMMDiT'] = StyleMMDiT_
return (guides, )
UNET_BLOCK_TYPES = [
"input",
"middle",
"output",
"input,middle",
"input,output",
"middle,output",
"input,middle,output",
]
class ClownStyle_Block_UNet:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"mode": (STYLE_MODES, {"default": "scattersort"},),
#"apply_to": (["img", "img+txt","img,txt", "txt",], {"default": "img+txt"},),
"block_type": (UNET_BLOCK_TYPES, {"default": "input"},),
"block_list": ("STRING", {"default": "all", "multiline": True}),
"block_weights": ("STRING", {"default": "1.0", "multiline": True}),
"resample": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"spatial": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"invert_mask": ("BOOLEAN",{"default": False}),
},
"optional":
{
"mask": ("MASK", ),
"blocks": ("BLOCKS", ),
}
}
RETURN_TYPES = ("BLOCKS",)
RETURN_NAMES = ("blocks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
mode = "scattersort",
noise_mode = "update",
apply_to = "",
block_type = "input",
block_list = "all",
block_weights = "1.0",
resample = 0.0,
res = 0.0,
spatial = 0.0,
tile_h = 128,
tile_w = 128,
invert_mask = False,
mask = None,
blocks = None,
):
#mask = 1-mask if mask is not None else None
blocks = copy.deepcopy(blocks) if blocks is not None else {}
block_weights = parse_range_string(block_weights)
if len(block_weights) == 0:
block_weights.append(0.0)
if len(block_weights) == 1:
block_weights = block_weights * 100
if type(block_weights[0]) == int:
block_weights = [float(val) for val in block_weights]
if "all" in block_list:
block_list = [val for val in range(100)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "even" in block_list:
block_list = [val for val in range(0, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "odd" in block_list:
block_list = [val for val in range(1, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
else:
block_list = parse_range_string_int(block_list)
weights_expanded = [0.0] * 100
for b, w in zip(block_list, block_weights):
weights_expanded[b] = w
block_weights = weights_expanded
StyleMMDiT = blocks.get('StyleMMDiT')
if StyleMMDiT is None:
StyleMMDiT = StyleUNet_Model()
weights = {
"resample": resample,
"res": res,
"spatial": spatial,
"h_tile" : tile_h // 16,
"w_tile" : tile_w // 16,
}
block_types = block_type.split(",")
for block_type in block_types:
if block_type == "input":
style_blocks = StyleMMDiT.input_blocks
elif block_type == "middle":
style_blocks = StyleMMDiT.middle_blocks
elif block_type == "output":
style_blocks = StyleMMDiT.output_blocks
for bid in block_list:
block = style_blocks[bid]
scaled_weights = {
k: (v * block_weights[bid]) if isinstance(v, float) else v
for k, v in weights.items()
}
block.set_mode(mode)
block.set_weights(**scaled_weights)
block.apply_to = [apply_to]
block.mask = [mask]
blocks['StyleMMDiT'] = StyleMMDiT
return (blocks, )
class ClownStyle_Attn_UNet:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"mode": (STYLE_MODES, {"default": "scattersort"},),
"apply_to": (["self","self,cross","cross"], {"default": "self"},),
"block_type": (UNET_BLOCK_TYPES, {"default": "input"},),
"block_list": ("STRING", {"default": "all", "multiline": True}),
"block_weights": ("STRING", {"default": "1.0", "multiline": True}),
"q_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"k_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"v_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"out": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"mask": ("MASK", ),
"blocks": ("BLOCKS", ),
}
}
RETURN_TYPES = ("BLOCKS",)
RETURN_NAMES = ("blocks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
mode = "scattersort",
noise_mode = "update",
apply_to = "self",
block_type = "input",
block_list = "all",
block_weights = "1.0",
q_proj = 0.0,
k_proj = 0.0,
v_proj = 0.0,
out = 0.0,
tile_h = 128,
tile_w = 128,
invert_mask = False,
mask = None,
blocks = None,
):
#mask = 1-mask if mask is not None else None
blocks = copy.deepcopy(blocks) if blocks is not None else {}
block_weights = parse_range_string(block_weights)
if len(block_weights) == 0:
block_weights.append(0.0)
if len(block_weights) == 1:
block_weights = block_weights * 100
if type(block_weights[0]) == int:
block_weights = [float(val) for val in block_weights]
if "all" in block_list:
block_list = [val for val in range(100)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "even" in block_list:
block_list = [val for val in range(0, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "odd" in block_list:
block_list = [val for val in range(1, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
else:
block_list = parse_range_string_int(block_list)
weights_expanded = [0.0] * 100
for b, w in zip(block_list, block_weights):
weights_expanded[b] = w
block_weights = weights_expanded
StyleMMDiT = blocks.get('StyleMMDiT')
if StyleMMDiT is None:
StyleMMDiT = StyleUNet_Model()
weights = {
"q_proj": q_proj,
"k_proj": k_proj,
"v_proj": v_proj,
"out" : out,
"h_tile": tile_h // 8,
"w_tile": tile_w // 8,
}
block_types = block_type.split(",")
for block_type in block_types:
if block_type == "input":
style_blocks = StyleMMDiT.input_blocks
elif block_type == "middle":
style_blocks = StyleMMDiT.middle_blocks
elif block_type == "output":
style_blocks = StyleMMDiT.output_blocks
for bid in block_list:
block = style_blocks[bid]
scaled_weights = {
k: (v * block_weights[bid]) if isinstance(v, float) else v
for k, v in weights.items()
}
#for tfmr_block in block.spatial_block.TFMR:
tfmr_block = block.spatial_block.TFMR
if "self" in apply_to:
tfmr_block.ATTN1.set_mode(mode)
tfmr_block.ATTN1.set_weights(**scaled_weights)
tfmr_block.ATTN1.apply_to = [apply_to]
if "cross" in apply_to:
tfmr_block.ATTN2.set_mode(mode)
tfmr_block.ATTN2.set_weights(**scaled_weights)
tfmr_block.ATTN2.apply_to = [apply_to]
block.attn_mask = [mask]
blocks['StyleMMDiT'] = StyleMMDiT
return (blocks, )
class ClownStyle_ResBlock_UNet:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"mode": (STYLE_MODES, {"default": "scattersort"},),
#"apply_to": (["img", "img+txt","img,txt", "txt",], {"default": "img+txt"},),
"block_type": (UNET_BLOCK_TYPES, {"default": "input"},),
"block_list": ("STRING", {"default": "all", "multiline": True}),
"block_weights": ("STRING", {"default": "1.0", "multiline": True}),
"in_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"in_silu": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"in_conv": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"emb_silu": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"emb_linear": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"emb_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"out_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"out_silu": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"out_conv": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"residual": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"invert_mask": ("BOOLEAN",{"default": False}),
},
"optional":
{
"mask": ("MASK", ),
"blocks": ("BLOCKS", ),
}
}
RETURN_TYPES = ("BLOCKS",)
RETURN_NAMES = ("blocks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
mode = "scattersort",
noise_mode = "update",
apply_to = "",
block_type = "input",
block_list = "all",
block_weights = "1.0",
in_norm = 0.0,
in_silu = 0.0,
in_conv = 0.0,
emb_silu = 0.0,
emb_linear = 0.0,
emb_res = 0.0,
out_norm = 0.0,
out_silu = 0.0,
out_conv = 0.0,
residual = 0.0,
tile_h = 128,
tile_w = 128,
invert_mask = False,
mask = None,
blocks = None,
):
#mask = 1-mask if mask is not None else None
blocks = copy.deepcopy(blocks) if blocks is not None else {}
block_weights = parse_range_string(block_weights)
if len(block_weights) == 0:
block_weights.append(0.0)
if len(block_weights) == 1:
block_weights = block_weights * 100
if type(block_weights[0]) == int:
block_weights = [float(val) for val in block_weights]
if "all" in block_list:
block_list = [val for val in range(100)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "even" in block_list:
block_list = [val for val in range(0, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "odd" in block_list:
block_list = [val for val in range(1, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
else:
block_list = parse_range_string_int(block_list)
weights_expanded = [0.0] * 100
for b, w in zip(block_list, block_weights):
weights_expanded[b] = w
block_weights = weights_expanded
StyleMMDiT = blocks.get('StyleMMDiT')
if StyleMMDiT is None:
StyleMMDiT = StyleUNet_Model()
weights = {
"in_norm": in_norm,
"in_silu": in_silu,
"in_conv": in_conv,
"emb_silu": emb_silu,
"emb_linear": emb_linear,
"emb_res": emb_res,
"out_norm": out_norm,
"out_silu": out_silu,
"out_conv": out_conv,
"residual": residual,
"h_tile": tile_h // 8,
"w_tile": tile_w // 8,
}
block_types = block_type.split(",")
for block_type in block_types:
if block_type == "input":
style_blocks = StyleMMDiT.input_blocks
elif block_type == "middle":
style_blocks = StyleMMDiT.middle_blocks
elif block_type == "output":
style_blocks = StyleMMDiT.output_blocks
for bid in block_list:
block = style_blocks[bid]
scaled_weights = {
k: (v * block_weights[bid]) if isinstance(v, float) else v
for k, v in weights.items()
}
block.res_block.set_mode(mode)
block.res_block.set_weights(**scaled_weights)
block.res_block.apply_to = [apply_to]
block.res_block.mask = [mask]
blocks['StyleMMDiT'] = StyleMMDiT
return (blocks, )
class ClownStyle_SpatialBlock_UNet:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"mode": (STYLE_MODES, {"default": "scattersort"},),
#"apply_to": (["img", "img+txt","img,txt", "txt",], {"default": "img+txt"},),
"block_type": (UNET_BLOCK_TYPES, {"default": "input"},),
"block_list": ("STRING", {"default": "all", "multiline": True}),
"block_weights": ("STRING", {"default": "1.0", "multiline": True}),
"norm_in": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"proj_in": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"transformer_block": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"transformer": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"proj_out": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"invert_mask": ("BOOLEAN",{"default": False}),
},
"optional":
{
"mask": ("MASK", ),
"blocks": ("BLOCKS", ),
}
}
RETURN_TYPES = ("BLOCKS",)
RETURN_NAMES = ("blocks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
mode = "scattersort",
noise_mode = "update",
apply_to = "",
block_type = "input",
block_list = "all",
block_weights = "1.0",
norm_in = 0.0,
proj_in = 0.0,
transformer_block = 0.0,
transformer = 0.0,
proj_out = 0.0,
res = 0.0,
tile_h = 128,
tile_w = 128,
invert_mask = False,
mask = None,
blocks = None,
):
spatial_norm_in = norm_in
spatial_proj_in = proj_in
spatial_transformer_block = transformer_block
spatial_transformer = transformer
spatial_proj_out = proj_out
spatial_res = res
#mask = 1-mask if mask is not None else None
blocks = copy.deepcopy(blocks) if blocks is not None else {}
block_weights = parse_range_string(block_weights)
if len(block_weights) == 0:
block_weights.append(0.0)
if len(block_weights) == 1:
block_weights = block_weights * 100
if type(block_weights[0]) == int:
block_weights = [float(val) for val in block_weights]
if "all" in block_list:
block_list = [val for val in range(100)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "even" in block_list:
block_list = [val for val in range(0, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "odd" in block_list:
block_list = [val for val in range(1, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
else:
block_list = parse_range_string_int(block_list)
weights_expanded = [0.0] * 100
for b, w in zip(block_list, block_weights):
weights_expanded[b] = w
block_weights = weights_expanded
StyleMMDiT = blocks.get('StyleMMDiT')
if StyleMMDiT is None:
StyleMMDiT = StyleUNet_Model()
weights = {
"spatial_norm_in" : spatial_norm_in,
"spatial_proj_in" : spatial_proj_in,
"spatial_transformer_block": spatial_transformer_block,
"spatial_transformer": spatial_transformer,
"spatial_proj_out" : spatial_proj_out,
"spatial_res" : spatial_res,
"h_tile": tile_h // 8,
"w_tile": tile_w // 8,
}
block_types = block_type.split(",")
for block_type in block_types:
if block_type == "input":
style_blocks = StyleMMDiT.input_blocks
elif block_type == "middle":
style_blocks = StyleMMDiT.middle_blocks
elif block_type == "output":
style_blocks = StyleMMDiT.output_blocks
for bid in block_list:
block = style_blocks[bid]
scaled_weights = {
k: (v * block_weights[bid]) if isinstance(v, float) else v
for k, v in weights.items()
}
block.spatial_block.set_mode(mode)
block.spatial_block.set_weights(**scaled_weights)
block.spatial_block.apply_to = [apply_to]
block.spatial_block.mask = [mask]
blocks['StyleMMDiT'] = StyleMMDiT
return (blocks, )
class ClownStyle_TransformerBlock_UNet:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"mode": (STYLE_MODES, {"default": "scattersort"},),
#"apply_to": (["img", "img+txt","img,txt", "txt",], {"default": "img+txt"},),
"block_type": (UNET_BLOCK_TYPES, {"default": "input"},),
"block_list": ("STRING", {"default": "all", "multiline": True}),
"block_weights": ("STRING", {"default": "1.0", "multiline": True}),
"norm1": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"norm2": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"norm3": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"self_attn": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"cross_attn": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"ff": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"self_attn_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"cross_attn_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"ff_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}),
"tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}),
"invert_mask": ("BOOLEAN",{"default": False}),
},
"optional":
{
"mask": ("MASK", ),
"blocks": ("BLOCKS", ),
}
}
RETURN_TYPES = ("BLOCKS",)
RETURN_NAMES = ("blocks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
mode = "scattersort",
noise_mode = "update",
apply_to = "",
block_type = "input",
block_list = "all",
block_weights = "1.0",
norm1 = 0.0,
norm2 = 0.0,
norm3 = 0.0,
self_attn = 0.0,
cross_attn = 0.0,
ff = 0.0,
self_attn_res = 0.0,
cross_attn_res = 0.0,
ff_res = 0.0,
tile_h = 128,
tile_w = 128,
invert_mask = False,
mask = None,
blocks = None,
):
#mask = 1-mask if mask is not None else None
blocks = copy.deepcopy(blocks) if blocks is not None else {}
block_weights = parse_range_string(block_weights)
if len(block_weights) == 0:
block_weights.append(0.0)
if len(block_weights) == 1:
block_weights = block_weights * 100
if type(block_weights[0]) == int:
block_weights = [float(val) for val in block_weights]
if "all" in block_list:
block_list = [val for val in range(100)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "even" in block_list:
block_list = [val for val in range(0, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
elif "odd" in block_list:
block_list = [val for val in range(1, 100, 2)]
if len(block_weights) == 1:
block_weights = [block_weights[0]] * 100
else:
block_list = parse_range_string_int(block_list)
weights_expanded = [0.0] * 100
for b, w in zip(block_list, block_weights):
weights_expanded[b] = w
block_weights = weights_expanded
StyleMMDiT = blocks.get('StyleMMDiT')
if StyleMMDiT is None:
StyleMMDiT = StyleUNet_Model()
weights = {
"norm1" : norm1,
"norm2" : norm2,
"norm3" : norm3,
"self_attn" : self_attn,
"cross_attn": cross_attn,
"ff" : ff,
"self_attn_res" : self_attn_res,
"cross_attn_res": cross_attn_res,
"ff_res" : ff_res,
"h_tile": tile_h // 8,
"w_tile": tile_w // 8,
}
block_types = block_type.split(",")
for block_type in block_types:
if block_type == "input":
style_blocks = StyleMMDiT.input_blocks
elif block_type == "middle":
style_blocks = StyleMMDiT.middle_blocks
elif block_type == "output":
style_blocks = StyleMMDiT.output_blocks
for bid in block_list:
block = style_blocks[bid]
scaled_weights = {
k: (v * block_weights[bid]) if isinstance(v, float) else v
for k, v in weights.items()
}
block.spatial_block.TFMR.set_mode(mode)
block.spatial_block.TFMR.set_weights(**scaled_weights)
block.spatial_block.TFMR.apply_to = [apply_to]
block.spatial_block.TFMR.mask = [mask]
blocks['StyleMMDiT'] = StyleMMDiT
return (blocks, )
================================================
FILE: chroma/layers.py
================================================
import torch
from torch import Tensor, nn
#from comfy.ldm.flux.math import attention
from comfy.ldm.flux.layers import (
MLPEmbedder,
RMSNorm,
QKNorm,
SelfAttention,
ModulationOut,
)
from .math import attention, rope, apply_rope
class ChromaModulationOut(ModulationOut):
@classmethod
def from_offset(cls, tensor: torch.Tensor, offset: int = 0) -> ModulationOut:
return cls(
shift=tensor[:, offset : offset + 1, :],
scale=tensor[:, offset + 1 : offset + 2, :],
gate=tensor[:, offset + 2 : offset + 3, :],
)
class Approximator(nn.Module):
def __init__(self, in_dim: int, out_dim: int, hidden_dim: int, n_layers = 5, dtype=None, device=None, operations=None):
super().__init__()
self.in_proj = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device)
self.layers = nn.ModuleList([MLPEmbedder(hidden_dim, hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
self.norms = nn.ModuleList([RMSNorm(hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
self.out_proj = operations.Linear(hidden_dim, out_dim, dtype=dtype, device=device)
@property
def device(self):
# Get the device of the module (assumes all parameters are on the same device)
return next(self.parameters()).device
def forward(self, x: Tensor) -> Tensor:
x = self.in_proj(x)
for layer, norms in zip(self.layers, self.norms):
x = x + layer(norms(x))
x = self.out_proj(x)
return x
class ReChromaDoubleStreamBlock(nn.Module):
def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, dtype=None, device=None, operations=None):
super().__init__()
mlp_hidden_dim = int(hidden_size * mlp_ratio)
self.num_heads = num_heads
self.hidden_size = hidden_size
self.img_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations)
self.img_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.img_mlp = nn.Sequential(
operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
nn.GELU(approximate="tanh"),
operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
)
self.txt_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations)
self.txt_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.txt_mlp = nn.Sequential(
operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
nn.GELU(approximate="tanh"),
operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
)
self.flipped_img_txt = flipped_img_txt
def forward(self, img: Tensor, txt: Tensor, pe: Tensor, vec: Tensor, attn_mask=None):
(img_mod1, img_mod2), (txt_mod1, txt_mod2) = vec
# prepare image for attention
img_modulated = self.img_norm1(img)
img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
img_qkv = self.img_attn.qkv(img_modulated)
img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
# prepare txt for attention
txt_modulated = self.txt_norm1(txt)
txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
txt_qkv = self.txt_attn.qkv(txt_modulated)
txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
# run actual attention
attn = attention(torch.cat((txt_q, img_q), dim=2),
torch.cat((txt_k, img_k), dim=2),
torch.cat((txt_v, img_v), dim=2),
pe=pe, mask=attn_mask)
txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :]
# calculate the img bloks
img = img + img_mod1.gate * self.img_attn.proj(img_attn)
img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift)
# calculate the txt bloks
txt += txt_mod1.gate * self.txt_attn.proj(txt_attn)
txt += txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift)
if txt.dtype == torch.float16:
txt = torch.nan_to_num(txt, nan=0.0, posinf=65504, neginf=-65504)
return img, txt
class ReChromaSingleStreamBlock(nn.Module):
"""
A DiT block with parallel linear layers as described in
https://arxiv.org/abs/2302.05442 and adapted modulation interface.
"""
def __init__(
self,
hidden_size: int,
num_heads: int,
mlp_ratio: float = 4.0,
qk_scale: float = None,
dtype=None,
device=None,
operations=None
):
super().__init__()
self.hidden_dim = hidden_size
self.num_heads = num_heads
head_dim = hidden_size // num_heads
self.scale = qk_scale or head_dim**-0.5
self.mlp_hidden_dim = int(hidden_size * mlp_ratio)
# qkv and mlp_in
self.linear1 = operations.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim, dtype=dtype, device=device)
# proj and mlp_out
self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, dtype=dtype, device=device)
self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations)
self.hidden_size = hidden_size
self.pre_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.mlp_act = nn.GELU(approximate="tanh")
def forward(self, x: Tensor, pe: Tensor, vec: Tensor, attn_mask=None) -> Tensor:
mod = vec
x_mod = (1 + mod.scale) * self.pre_norm(x) + mod.shift
qkv, mlp = torch.split(self.linear1(x_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)
q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
q, k = self.norm(q, k, v)
# compute attention
attn = attention(q, k, v, pe=pe, mask=attn_mask)
# compute activation in mlp stream, cat again and run second linear layer
output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2))
x += mod.gate * output
if x.dtype == torch.float16:
x = torch.nan_to_num(x, nan=0.0, posinf=65504, neginf=-65504)
return x
class LastLayer(nn.Module):
def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None):
super().__init__()
self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.linear = operations.Linear(hidden_size, out_channels, bias=True, dtype=dtype, device=device)
def forward(self, x: Tensor, vec: Tensor) -> Tensor:
shift, scale = vec
shift = shift.squeeze(1)
scale = scale.squeeze(1)
x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :]
x = self.linear(x)
return x
================================================
FILE: chroma/math.py
================================================
import torch
from einops import rearrange
from torch import Tensor
from comfy.ldm.modules.attention import attention_pytorch
import comfy.model_management
def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor:
q, k = apply_rope(q, k, pe)
heads = q.shape[1]
x = attention_pytorch(q, k, v, heads, skip_reshape=True, mask=mask)
#if mask is not None:
# x = attention_pytorch(q, k, v, heads, skip_reshape=True, mask=mask)
#else:
# from comfy.ldm.modules.attention import optimized_attention
# x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=None)
return x
def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
assert dim % 2 == 0
if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled():
device = torch.device("cpu")
else:
device = pos.device
scale = torch.linspace(0, (dim - 2) / dim, steps=dim//2, dtype=torch.float64, device=device)
omega = 1.0 / (theta**scale)
out = torch.einsum("...n,d->...nd", pos.to(dtype=torch.float32, device=device), omega)
out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
return out.to(dtype=torch.float32, device=pos.device)
def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)
================================================
FILE: chroma/model.py
================================================
#Original code can be found on: https://github.com/black-forest-labs/flux
from dataclasses import dataclass
import torch
import torch.nn.functional as F
from torch import Tensor, nn
from einops import rearrange, repeat
import comfy.ldm.common_dit
from ..helper import ExtraOptions
from ..latents import tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d
from ..style_transfer import apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch
from comfy.ldm.flux.layers import (
EmbedND,
timestep_embedding,
)
from .layers import (
ReChromaDoubleStreamBlock,
LastLayer,
ReChromaSingleStreamBlock,
Approximator,
ChromaModulationOut,
)
@dataclass
class ChromaParams:
in_channels : int
out_channels : int
context_in_dim : int
hidden_size : int
mlp_ratio : float
num_heads : int
depth : int
depth_single_blocks: int
axes_dim : list
theta : int
patch_size : int
qkv_bias : bool
in_dim : int
out_dim : int
hidden_dim : int
n_layers : int
class ReChroma(nn.Module):
"""
Transformer model for flow matching on sequences.
"""
def __init__(self, image_model=None, final_layer=True, dtype=None, device=None, operations=None, **kwargs):
super().__init__()
self.dtype = dtype
params = ChromaParams(**kwargs)
self.params = params
self.patch_size = params.patch_size
self.in_channels = params.in_channels
self.out_channels = params.out_channels
if params.hidden_size % params.num_heads != 0:
raise ValueError(
f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}"
)
pe_dim = params.hidden_size // params.num_heads
if sum(params.axes_dim) != pe_dim:
raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}")
self.hidden_size = params.hidden_size
self.num_heads = params.num_heads
self.in_dim = params.in_dim
self.out_dim = params.out_dim
self.hidden_dim = params.hidden_dim
self.n_layers = params.n_layers
self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim)
self.img_in = operations.Linear(self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device)
self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, dtype=dtype, device=device)
# set as nn identity for now, will overwrite it later.
self.distilled_guidance_layer = Approximator(
in_dim=self.in_dim,
hidden_dim=self.hidden_dim,
out_dim=self.out_dim,
n_layers=self.n_layers,
dtype=dtype, device=device, operations=operations
)
self.double_blocks = nn.ModuleList(
[
ReChromaDoubleStreamBlock(
self.hidden_size,
self.num_heads,
mlp_ratio=params.mlp_ratio,
qkv_bias=params.qkv_bias,
dtype=dtype, device=device, operations=operations
)
for _ in range(params.depth)
]
)
self.single_blocks = nn.ModuleList(
[
ReChromaSingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, dtype=dtype, device=device, operations=operations)
for _ in range(params.depth_single_blocks)
]
)
if final_layer:
self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels, dtype=dtype, device=device, operations=operations)
self.skip_mmdit = []
self.skip_dit = []
self.lite = False
def get_modulations(self, tensor: torch.Tensor, block_type: str, *, idx: int = 0):
# This function slices up the modulations tensor which has the following layout:
# single : num_single_blocks * 3 elements
# double_img : num_double_blocks * 6 elements
# double_txt : num_double_blocks * 6 elements
# final : 2 elements
if block_type == "final":
return (tensor[:, -2:-1, :], tensor[:, -1:, :])
single_block_count = self.params.depth_single_blocks
double_block_count = self.params.depth
offset = 3 * idx
if block_type == "single":
return ChromaModulationOut.from_offset(tensor, offset)
# Double block modulations are 6 elements so we double 3 * idx.
offset *= 2
if block_type in {"double_img", "double_txt"}:
# Advance past the single block modulations.
offset += 3 * single_block_count
if block_type == "double_txt":
# Advance past the double block img modulations.
offset += 6 * double_block_count
return (
ChromaModulationOut.from_offset(tensor, offset),
ChromaModulationOut.from_offset(tensor, offset + 3),
)
raise ValueError("Bad block_type")
def forward_blocks(
self,
img : Tensor,
img_ids : Tensor,
txt : Tensor,
txt_ids : Tensor,
timesteps : Tensor,
guidance : Tensor = None,
control = None,
update_cross_attn = None,
transformer_options ={},
attn_mask : Tensor = None,
UNCOND : bool = False,
) -> Tensor:
patches_replace = transformer_options.get("patches_replace", {})
if img.ndim != 3 or txt.ndim != 3:
raise ValueError("Input img and txt tensors must have 3 dimensions.")
# running on sequences img
img = self.img_in(img)
# distilled vector guidance
mod_index_length = 344
distill_timestep = timestep_embedding(timesteps.detach().clone(), 16).to(img.device, img.dtype)
# guidance = guidance *
distil_guidance = timestep_embedding(guidance.detach().clone(), 16).to(img.device, img.dtype)
# get all modulation index
modulation_index = timestep_embedding(torch.arange(mod_index_length), 32).to(img.device, img.dtype)
# we need to broadcast the modulation index here so each batch has all of the index
modulation_index = modulation_index.unsqueeze(0).repeat(img.shape[0], 1, 1).to(img.device, img.dtype)
# and we need to broadcast timestep and guidance along too
timestep_guidance = torch.cat([distill_timestep, distil_guidance], dim=1).unsqueeze(1).repeat(1, mod_index_length, 1).to(img.dtype).to(img.device, img.dtype)
# then and only then we could concatenate it together
input_vec = torch.cat([timestep_guidance, modulation_index], dim=-1).to(img.device, img.dtype)
mod_vectors = self.distilled_guidance_layer(input_vec)
txt = self.txt_in(txt)
ids = torch.cat((txt_ids, img_ids), dim=1)
pe = self.pe_embedder(ids)
weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0)
floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0)
mask_zero = None
mask = None
text_len = txt.shape[1] # mask_obj[0].text_len
if not UNCOND and 'AttnMask' in transformer_options: # and weight != 0:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
if mask_zero is None:
mask_zero = torch.ones_like(mask)
img_len = transformer_options['AttnMask'].img_len
#mask_zero[:text_len, :text_len] = mask[:text_len, :text_len]
mask_zero[:text_len, :] = mask[:text_len, :]
mask_zero[:, :text_len] = mask[:, :text_len]
if weight == 0:
mask = None
if UNCOND and 'AttnMask_neg' in transformer_options: # and weight != 0:
AttnMask = transformer_options['AttnMask_neg']
if mask_zero is None:
mask_zero = torch.ones_like(mask)
img_len = transformer_options['AttnMask_neg'].img_len
#mask_zero[:text_len, :text_len] = mask[:text_len, :text_len]
mask_zero[:text_len, :] = mask[:text_len, :]
mask_zero[:, :text_len] = mask[:, :text_len]
if weight == 0:
mask = None
elif UNCOND and 'AttnMask' in transformer_options:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
if mask_zero is None:
mask_zero = torch.ones_like(mask)
img_len = transformer_options['AttnMask'].img_len
#mask_zero[:text_len, :text_len] = mask[:text_len, :text_len]
mask_zero[:text_len, :] = mask[:text_len, :]
mask_zero[:, :text_len] = mask[:, :text_len]
if weight == 0:
mask = None
if mask is not None and not type(mask[0][0].item()) == bool:
mask = mask.to(img.dtype)
if mask_zero is not None and not type(mask_zero[0][0].item()) == bool:
mask_zero = mask_zero.to(img.dtype)
total_layers = len(self.double_blocks) + len(self.single_blocks)
attn_mask = mask if attn_mask is None else attn_mask
blocks_replace = patches_replace.get("dit", {})
for i, block in enumerate(self.double_blocks):
if i not in self.skip_mmdit:
double_mod = (
self.get_modulations(mod_vectors, "double_img", idx=i),
self.get_modulations(mod_vectors, "double_txt", idx=i),
)
if ("double_block", i) in blocks_replace:
def block_wrap(args):
out = {}
out["img"], out["txt"] = block( img = args["img"],
txt = args["txt"],
vec = args["vec"],
pe = args["pe"],
attn_mask = args.get("attn_mask"))
return out
out = blocks_replace[("double_block", i)]({ "img" : img,
"txt" : txt,
"vec" : double_mod,
"pe" : pe,
"attn_mask" : attn_mask},
{"original_block" : block_wrap})
txt = out["txt"]
img = out["img"]
else:
if weight > 0 and mask is not None and weight <= i/total_layers:
img, txt = block(img=img, txt=txt, vec=double_mod, pe=pe, attn_mask=mask_zero)
elif (weight < 0 and mask is not None and abs(weight) <= (1 - i/total_layers)):
img_tmpZ, txt_tmpZ = img.clone(), txt.clone()
img_tmpZ, txt = block(img=img_tmpZ, txt=txt_tmpZ, vec=double_mod, pe=pe, attn_mask=mask)
img, txt_tmpZ = block(img=img , txt=txt , vec=double_mod, pe=pe, attn_mask=mask_zero)
elif floor > 0 and mask is not None and floor >= i/total_layers:
mask_tmp = mask.clone()
mask_tmp[text_len:, text_len:] = 1.0
img, txt = block(img=img, txt=txt, vec=double_mod, pe=pe, attn_mask=mask_tmp)
elif floor < 0 and mask is not None and abs(floor) >= (1 - i/total_layers):
mask_tmp = mask.clone()
mask_tmp[text_len:, text_len:] = 1.0
img, txt = block(img=img, txt=txt, vec=double_mod, pe=pe, attn_mask=mask_tmp)
elif update_cross_attn is not None and update_cross_attn['skip_cross_attn']:
print("update_cross_attn not yet implemented for Chroma.", flush=True)
#img, txt_init = block(img, img_masks, txt, clip, rope, mask, update_cross_attn=update_cross_attn)
else:
img, txt = block(img=img, txt=txt, vec=double_mod, pe=pe, attn_mask=attn_mask)
#img, txt = block(img=img, txt=txt, vec=double_mod, pe=pe, attn_mask=attn_mask)
if control is not None: # Controlnet
control_i = control.get("input")
if i < len(control_i):
add = control_i[i]
if add is not None:
img += add
img = torch.cat((txt, img), 1)
for i, block in enumerate(self.single_blocks):
if i not in self.skip_dit:
single_mod = self.get_modulations(mod_vectors, "single", idx=i)
if ("single_block", i) in blocks_replace:
def block_wrap(args):
out = {}
out["img"] = block( args["img"],
vec=args["vec"],
pe=args["pe"],
attn_mask=args.get("attn_mask"))
return out
out = blocks_replace[("single_block", i)]({ "img" : img,
"vec" : single_mod,
"pe" : pe,
"attn_mask" : attn_mask},
{"original_block" : block_wrap})
img = out["img"]
else:
if weight > 0 and mask is not None and weight <= (i+len(self.double_blocks))/total_layers:
img = block(img, vec=single_mod, pe=pe, attn_mask=mask_zero)
elif weight < 0 and mask is not None and abs(weight) <= (1 - (i+len(self.double_blocks))/total_layers):
img = block(img, vec=single_mod, pe=pe, attn_mask=mask_zero)
elif floor > 0 and mask is not None and floor >= (i+len(self.double_blocks))/total_layers:
mask_tmp = mask.clone()
mask_tmp[text_len:, text_len:] = 1.0
img = block(img, vec=single_mod, pe=pe, attn_mask=mask_tmp)
elif floor < 0 and mask is not None and abs(floor) >= (1 - (i+len(self.double_blocks))/total_layers):
mask_tmp = mask.clone()
mask_tmp[text_len:, text_len:] = 1.0
img = block(img, vec=single_mod, pe=pe, attn_mask=mask_tmp)
else:
img = block(img, vec=single_mod, pe=pe, attn_mask=attn_mask)
if control is not None: # Controlnet
control_o = control.get("output")
if i < len(control_o):
add = control_o[i]
if add is not None:
img[:, txt.shape[1] :, ...] += add
img = img[:, txt.shape[1] :, ...]
final_mod = self.get_modulations(mod_vectors, "final")
img = self.final_layer(img, vec=final_mod) # (N, T, patch_size ** 2 * out_channels)
return img
def forward_chroma_depr(self, x, timestep, context, guidance, control=None, transformer_options={}, **kwargs):
bs, c, h, w = x.shape
patch_size = 2
x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))
img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
h_len = ((h + (patch_size // 2)) // patch_size)
w_len = ((w + (patch_size // 2)) // patch_size)
img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
out = self.forward_orig(img, img_ids, context, txt_ids, timestep, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None))
return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
def _get_img_ids(self, x, bs, h_len, w_len, h_start, h_end, w_start, w_end):
img_ids = torch.zeros( (h_len, w_len, 3), device=x.device, dtype=x.dtype)
img_ids[..., 1] += torch.linspace(h_start, h_end - 1, steps=h_len, device=x.device, dtype=x.dtype)[:, None]
img_ids[..., 2] += torch.linspace(w_start, w_end - 1, steps=w_len, device=x.device, dtype=x.dtype)[None, :]
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
return img_ids
def forward(self,
x,
timestep,
context,
#y,
guidance,
control = None,
transformer_options = {},
**kwargs
):
x_orig = x.clone()
SIGMA = timestep[0].unsqueeze(0)
update_cross_attn = transformer_options.get("update_cross_attn")
EO = transformer_options.get("ExtraOptions", ExtraOptions(""))
if EO is not None:
EO.mute = True
y0_style_pos = transformer_options.get("y0_style_pos")
y0_style_neg = transformer_options.get("y0_style_neg")
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight", 0.0)
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight", 0.0)
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight", 0.0)
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight", 0.0)
y0_style_neg_synweight *= y0_style_neg_weight
weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0)
floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0)
freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method")
freqsep_sigma = transformer_options.get("freqsep_sigma")
freqsep_kernel_size = transformer_options.get("freqsep_kernel_size")
freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size")
freqsep_stride = transformer_options.get("freqsep_stride")
freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight")
freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight")
freqsep_mask = transformer_options.get("freqsep_mask")
out_list = []
for i in range(len(transformer_options['cond_or_uncond'])):
UNCOND = transformer_options['cond_or_uncond'][i] == 1
if update_cross_attn is not None:
update_cross_attn['UNCOND'] = UNCOND
img = x
bs, c, h, w = x.shape
patch_size = 2
img = comfy.ldm.common_dit.pad_to_patch_size(img, (patch_size, patch_size)) # 1,16,192,192
transformer_options['original_shape'] = img.shape
transformer_options['patch_size'] = patch_size
h_len = ((h + (patch_size // 2)) // patch_size) # h_len 96
w_len = ((w + (patch_size // 2)) // patch_size) # w_len 96
img = rearrange(img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64 1,16,128,128 -> 1,4096,64
context_tmp = None
if not UNCOND and 'AttnMask' in transformer_options: # and weight != 0:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
if weight == 0:
context_tmp = transformer_options['RegContext'].context.to(context.dtype).to(context.device)
mask = None
else:
context_tmp = transformer_options['RegContext'].context.to(context.dtype).to(context.device)
if UNCOND and 'AttnMask_neg' in transformer_options: # and weight != 0:
AttnMask = transformer_options['AttnMask_neg']
mask = transformer_options['AttnMask_neg'].attn_mask.mask.to('cuda')
if weight == 0:
context_tmp = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device)
mask = None
else:
context_tmp = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device)
elif UNCOND and 'AttnMask' in transformer_options:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
A = context
B = transformer_options['RegContext'].context
context_tmp = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :]
if context_tmp is None:
context_tmp = context[i][None,...].clone()
txt_ids = torch.zeros((bs, context_tmp.shape[1], 3), device=img.device, dtype=img.dtype) # txt_ids 1, 256,3
img_ids_orig = self._get_img_ids(img, bs, h_len, w_len, 0, h_len, 0, w_len) # img_ids_orig = 1,9216,3
out_tmp = self.forward_blocks(img [i][None,...].clone(),
img_ids_orig[i][None,...].clone(),
context_tmp,
txt_ids [i][None,...].clone(),
timestep [i][None,...].clone(),
#y [i][None,...].clone(),
guidance [i][None,...].clone(),
control,
update_cross_attn=update_cross_attn,
transformer_options=transformer_options,
UNCOND = UNCOND,
) # context 1,256,4096 y 1,768
out_list.append(out_tmp)
out = torch.stack(out_list, dim=0).squeeze(dim=1)
eps = rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
dtype = eps.dtype if self.style_dtype is None else self.style_dtype
if y0_style_pos is not None:
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight")
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight")
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_pos_mask = transformer_options.get("y0_style_pos_mask")
y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge")
y0_style_pos = y0_style_pos.to(dtype)
x = x_orig.clone().to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised)
y0_adain_embed = self.Retrojector.embed(y0_style_pos)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if EO("scattersort_median_LP"):
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7))
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7))
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad)
denoised_spatial = denoised_spatial_LP + denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
if freqsep_mask is not None:
freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float()
freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact')
if hasattr(self, "adain_tile"):
tile_h, tile_w = self.adain_tile
denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if self.adain_flag:
h_off = tile_h // 2
w_off = tile_w // 2
denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off]
self.adain_flag = False
else:
h_off = 0
w_off = 0
self.adain_flag = True
tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w))
y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w))
tiles_out = []
for i in range(tiles.shape[0]):
tile = tiles[i].unsqueeze(0)
y0_tile = y0_tiles[i].unsqueeze(0)
tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
tile = adain_seq_inplace(tile, y0_tile)
tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w))
tiles_out_tensor = torch.cat(tiles_out, dim=0)
tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides)
if h_off == 0:
denoised_pretile = tiles_out_tensor
else:
denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor
denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"):
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median_pw":
denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight)
elif freqsep_lowpass_method == "gaussian_pw":
denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median":
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size)
elif freqsep_lowpass_method == "gaussian":
denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
if EO("adain_fs_uhp"):
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP
y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP
#denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP
denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP
denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
else:
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
if transformer_options.get('y0_standard_guide') is not None:
y0_standard_guide = transformer_options.get('y0_standard_guide')
y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide)
f_cs = self.StyleWCT.get(y0_standard_guide_embed)
self.y0_standard_guide = self.Retrojector.unembed(f_cs)
if transformer_options.get('y0_inv_standard_guide') is not None:
y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide')
y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide)
f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed)
self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs)
denoised_approx = self.Retrojector.unembed(denoised_embed)
eps = (x - denoised_approx) / sigma
if not UNCOND:
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
else:
eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
elif eps.shape[0] == 1 and UNCOND:
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
if y0_style_neg is not None:
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight")
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight")
y0_style_neg_synweight *= y0_style_neg_weight
y0_style_neg_mask = transformer_options.get("y0_style_neg_mask")
y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge")
y0_style_neg = y0_style_neg.to(dtype)
x = x_orig.clone().to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised)
y0_adain_embed = self.Retrojector.embed(y0_style_neg)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
denoised_approx = self.Retrojector.unembed(denoised_embed)
if UNCOND:
eps = (x - denoised_approx) / sigma
eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
elif eps.shape[0] == 1 and not UNCOND:
eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
return eps
dtype = eps.dtype if self.style_dtype is None else self.style_dtype
pinv_dtype = torch.float32 if dtype != torch.float64 else dtype
W_inv = None
#if eps.shape[0] == 2 or (eps.shape[0] == 1): #: and not UNCOND):
if y0_style_pos is not None and y0_style_pos_weight != 0.0:
y0_style_pos = y0_style_pos.to(dtype)
x = x.to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
img = comfy.ldm.common_dit.pad_to_patch_size(denoised, (self.patch_size, self.patch_size))
img = rearrange(img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64 1,16,128,128 -> 1,4096,64
img_y0_adain = comfy.ldm.common_dit.pad_to_patch_size(y0_style_pos, (self.patch_size, self.patch_size))
img_y0_adain = rearrange(img_y0_adain, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64 1,16,128,128 -> 1,4096,64
W = self.img_in.weight.data.to(dtype) # shape [2560, 64]
b = self.img_in.bias.data.to(dtype) # shape [2560]
denoised_embed = F.linear(img .to(W), W, b).to(img)
y0_adain_embed = F.linear(img_y0_adain.to(W), W, b).to(img_y0_adain)
if transformer_options['y0_style_method'] == "AdaIN":
if freqsep_mask is not None:
freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float()
freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact')
if freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"):
#if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
# self.y0_adain_embed = y0_adain_embed
# self.adain_pw_cache = None
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median_alt":
denoised_spatial_new = adain_patchwise_row_batch_medblur(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True)
elif freqsep_lowpass_method == "median_pw":
denoised_spatial_new = adain_patchwise_row_batch_realmedblur(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight)
elif freqsep_lowpass_method == "gaussian_pw":
denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None and freqsep_lowpass_method == "distribution":
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
denoised_spatial_new = adain_patchwise_strict_sortmatch9(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), kernel_size=freqsep_kernel_size, inner_kernel_size=freqsep_inner_kernel_size, mask=freqsep_mask, stride=freqsep_stride)
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median":
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size)
elif freqsep_lowpass_method == "gaussian":
denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
if EO("adain_fs_uhp"):
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP
y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP
#denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP
denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP
denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
else:
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
#denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
#for adain_iter in range(EO("style_iter", 0)):
# denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
# denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype)
# denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img)
# denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
self.y0_adain_embed = y0_adain_embed
f_s = y0_adain_embed[0].clone()
self.mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - self.mu_s
cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values
whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T
self.y0_color = whiten.to(f_s_centered)
for wct_i in range(eps.shape[0]):
f_c = denoised_embed[wct_i].clone()
mu_c = f_c.mean(dim=0, keepdim=True)
f_c_centered = f_c - mu_c
cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
inv_sqrt_eig = S_eig.clamp(min=0).rsqrt()
whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T
whiten = whiten.to(f_c_centered)
f_c_whitened = f_c_centered @ whiten.T
f_cs = f_c_whitened @ self.y0_color.T + self.mu_s
denoised_embed[wct_i] = f_cs
denoised_approx = (denoised_embed - b.to(denoised_embed)) @ torch.linalg.pinv(W).T.to(denoised_embed)
denoised_approx = denoised_approx.to(eps)
denoised_approx = rearrange(denoised_approx, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
eps = (x - denoised_approx) / sigma
if not UNCOND:
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
else:
eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
elif eps.shape[0] == 1 and UNCOND:
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
#if eps.shape[0] == 2:
# eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
eps = eps.float()
#if eps.shape[0] == 2 or (eps.shape[0] == 1): # and UNCOND):
if y0_style_neg is not None and y0_style_neg_weight != 0.0:
y0_style_neg = y0_style_neg.to(dtype)
x = x.to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
img = comfy.ldm.common_dit.pad_to_patch_size(denoised, (self.patch_size, self.patch_size))
h_len = ((h + (patch_size // 2)) // patch_size) # h_len 96
w_len = ((w + (patch_size // 2)) // patch_size) # w_len 96
img = rearrange(img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64 1,16,128,128 -> 1,4096,64
img_y0_adain = comfy.ldm.common_dit.pad_to_patch_size(y0_style_neg, (self.patch_size, self.patch_size))
img_y0_adain = rearrange(img_y0_adain, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64 1,16,128,128 -> 1,4096,64
W = self.img_in.weight.data.to(dtype) # shape [2560, 64]
b = self.img_in.bias.data.to(dtype) # shape [2560]
denoised_embed = F.linear(img .to(W), W, b).to(img)
y0_adain_embed = F.linear(img_y0_adain.to(W), W, b).to(img_y0_adain)
if transformer_options['y0_style_method'] == "AdaIN":
if freqsep_mask is not None:
freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float()
freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact')
if freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"):
#if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
# self.y0_adain_embed = y0_adain_embed
# self.adain_pw_cache = None
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median_alt":
denoised_spatial_new = adain_patchwise_row_batch_medblur(denoised_spatial.clone(), y0_adain_spatial.clone(), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True)
elif freqsep_lowpass_method == "median_pw":
denoised_spatial_new = adain_patchwise_row_batch_realmedblur(denoised_spatial.clone(), y0_adain_spatial.clone(), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight)
elif freqsep_lowpass_method == "gaussian_pw":
denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone(), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None and freqsep_lowpass_method == "distribution":
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
denoised_spatial_new = adain_patchwise_strict_sortmatch9(denoised_spatial.clone(), y0_adain_spatial.clone(), kernel_size=freqsep_kernel_size, inner_kernel_size=freqsep_inner_kernel_size, mask=freqsep_mask, stride=freqsep_stride)
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median":
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size)
elif freqsep_lowpass_method == "gaussian":
denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
if EO("adain_fs_uhp"):
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP
y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP
#denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP
denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP
denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
else:
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
#denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
#for adain_iter in range(EO("style_iter", 0)):
# denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
# denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype)
# denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img)
# denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
self.y0_adain_embed = y0_adain_embed
f_s = y0_adain_embed[0].clone()
self.mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - self.mu_s
cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values
whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T
self.y0_color = whiten.to(f_s_centered)
for wct_i in range(eps.shape[0]):
f_c = denoised_embed[wct_i].clone()
mu_c = f_c.mean(dim=0, keepdim=True)
f_c_centered = f_c - mu_c
cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
inv_sqrt_eig = S_eig.clamp(min=0).rsqrt()
whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T
whiten = whiten.to(f_c_centered)
f_c_whitened = f_c_centered @ whiten.T
f_cs = f_c_whitened @ self.y0_color.T + self.mu_s
denoised_embed[wct_i] = f_cs
denoised_approx = (denoised_embed - b.to(denoised_embed)) @ torch.linalg.pinv(W).T.to(denoised_embed)
denoised_approx = denoised_approx.to(eps)
denoised_approx = rearrange(denoised_approx, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
if UNCOND:
eps = (x - denoised_approx) / sigma
eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
elif eps.shape[0] == 1 and not UNCOND:
eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
return eps
def adain_seq(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
return ((content - content.mean(1, keepdim=True)) / (content.std(1, keepdim=True) + eps)) * (style.std(1, keepdim=True) + eps) + style.mean(1, keepdim=True)
def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
mean_c = content.mean(1, keepdim=True)
std_c = content.std (1, keepdim=True).add_(eps) # in-place add
mean_s = style.mean (1, keepdim=True)
std_s = style.std (1, keepdim=True).add_(eps)
content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain
return content
def gaussian_blur_2d(img: torch.Tensor, sigma: float, kernel_size: int = None) -> torch.Tensor:
B, C, H, W = img.shape
dtype = img.dtype
device = img.device
if kernel_size is None:
kernel_size = int(2 * math.ceil(3 * sigma) + 1)
if kernel_size % 2 == 0:
kernel_size += 1
coords = torch.arange(kernel_size, dtype=torch.float64) - kernel_size // 2
g = torch.exp(-0.5 * (coords / sigma) ** 2)
g = g / g.sum()
kernel_2d = g[:, None] * g[None, :]
kernel_2d = kernel_2d.to(dtype=dtype, device=device)
kernel = kernel_2d.expand(C, 1, kernel_size, kernel_size)
pad = kernel_size // 2
img_padded = F.pad(img, (pad, pad, pad, pad), mode='reflect')
return F.conv2d(img_padded, kernel, groups=C)
def median_blur_2d(img: torch.Tensor, kernel_size: int = 3) -> torch.Tensor:
if kernel_size % 2 == 0:
kernel_size += 1
pad = kernel_size // 2
B, C, H, W = img.shape
img_padded = F.pad(img, (pad, pad, pad, pad), mode='reflect')
unfolded = img_padded.unfold(2, kernel_size, 1).unfold(3, kernel_size, 1)
# unfolded: [B, C, H, W, kH, kW] → flatten to patches
patches = unfolded.contiguous().view(B, C, H, W, -1)
median = patches.median(dim=-1).values
return median
def adain_patchwise(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5) -> torch.Tensor:
B, C, H, W = content.shape
device = content.device
dtype = content.dtype
if kernel_size is None:
kernel_size = int(2 * math.ceil(3 * sigma) + 1)
if kernel_size % 2 == 0:
kernel_size += 1
pad = kernel_size // 2
coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad
gauss = torch.exp(-0.5 * (coords / sigma) ** 2)
gauss /= gauss.sum()
kernel_2d = (gauss[:, None] * gauss[None, :]).to(dtype=dtype)
weight = kernel_2d.view(1, 1, kernel_size, kernel_size)
content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect')
style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect')
result = torch.zeros_like(content)
for i in range(H):
for j in range(W):
c_patch = content_padded[:, :, i:i + kernel_size, j:j + kernel_size]
s_patch = style_padded[:, :, i:i + kernel_size, j:j + kernel_size]
w = weight.expand_as(c_patch)
c_mean = (c_patch * w).sum(dim=(-1, -2), keepdim=True)
c_std = ((c_patch - c_mean)**2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
s_mean = (s_patch * w).sum(dim=(-1, -2), keepdim=True)
s_std = ((s_patch - s_mean)**2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
normed = (c_patch[:, :, pad:pad+1, pad:pad+1] - c_mean) / c_std
stylized = normed * s_std + s_mean
result[:, :, i, j] = stylized.squeeze(-1).squeeze(-1)
return result
def adain_patchwise_row_batch(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5) -> torch.Tensor:
B, C, H, W = content.shape
device, dtype = content.device, content.dtype
if kernel_size is None:
kernel_size = int(2 * math.ceil(3 * sigma) + 1)
if kernel_size % 2 == 0:
kernel_size += 1
pad = kernel_size // 2
coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad
gauss = torch.exp(-0.5 * (coords / sigma) ** 2)
gauss = (gauss / gauss.sum()).to(dtype)
kernel_2d = (gauss[:, None] * gauss[None, :])
weight = kernel_2d.view(1, 1, kernel_size, kernel_size)
content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect')
style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect')
result = torch.zeros_like(content)
for i in range(H):
c_row_patches = torch.stack([
content_padded[:, :, i:i+kernel_size, j:j+kernel_size]
for j in range(W)
], dim=0) # [W, B, C, k, k]
s_row_patches = torch.stack([
style_padded[:, :, i:i+kernel_size, j:j+kernel_size]
for j in range(W)
], dim=0)
w = weight.expand_as(c_row_patches[0])
c_mean = (c_row_patches * w).sum(dim=(-1, -2), keepdim=True)
c_std = ((c_row_patches - c_mean) ** 2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
s_mean = (s_row_patches * w).sum(dim=(-1, -2), keepdim=True)
s_std = ((s_row_patches - s_mean) ** 2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
center = kernel_size // 2
central = c_row_patches[:, :, :, center:center+1, center:center+1]
normed = (central - c_mean) / c_std
stylized = normed * s_std + s_mean
result[:, :, i, :] = stylized.squeeze(-1).squeeze(-1).permute(1, 2, 0) # [B,C,W]
return result
def adain_patchwise_row_batch_medblur(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5, mask: torch.Tensor = None, use_median_blur: bool = False) -> torch.Tensor:
B, C, H, W = content.shape
device, dtype = content.device, content.dtype
if kernel_size is None:
kernel_size = int(2 * math.ceil(3 * abs(sigma)) + 1)
if kernel_size % 2 == 0:
kernel_size += 1
pad = kernel_size // 2
content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect')
style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect')
result = torch.zeros_like(content)
scaling = torch.ones((B, 1, H, W), device=device, dtype=dtype)
sigma_scale = torch.ones((H, W), device=device, dtype=torch.float32)
if mask is not None:
with torch.no_grad():
padded_mask = F.pad(mask.float(), (pad, pad, pad, pad), mode="reflect")
blurred_mask = F.avg_pool2d(padded_mask, kernel_size=kernel_size, stride=1, padding=pad)
blurred_mask = blurred_mask[..., pad:-pad, pad:-pad]
edge_proximity = blurred_mask * (1.0 - blurred_mask)
scaling = 1.0 - (edge_proximity / 0.25).clamp(0.0, 1.0)
sigma_scale = scaling[0, 0] # assuming single-channel mask broadcasted across B, C
if not use_median_blur:
coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad
base_gauss = torch.exp(-0.5 * (coords / sigma) ** 2)
base_gauss = (base_gauss / base_gauss.sum()).to(dtype)
gaussian_table = {}
for s in sigma_scale.unique():
sig = float((sigma * s + eps).clamp(min=1e-3))
gauss_local = torch.exp(-0.5 * (coords / sig) ** 2)
gauss_local = (gauss_local / gauss_local.sum()).to(dtype)
kernel_2d = gauss_local[:, None] * gauss_local[None, :]
gaussian_table[s.item()] = kernel_2d
for i in range(H):
row_result = torch.zeros(B, C, W, dtype=dtype, device=device)
for j in range(W):
c_patch = content_padded[:, :, i:i+kernel_size, j:j+kernel_size]
s_patch = style_padded[:, :, i:i+kernel_size, j:j+kernel_size]
if use_median_blur:
c_flat = c_patch.reshape(B, C, -1)
s_flat = s_patch.reshape(B, C, -1)
c_median = c_flat.median(dim=-1, keepdim=True).values
s_median = s_flat.median(dim=-1, keepdim=True).values
c_std = (c_flat - c_median).abs().mean(dim=-1, keepdim=True) + eps
s_std = (s_flat - s_median).abs().mean(dim=-1, keepdim=True) + eps
center = kernel_size // 2
central = c_patch[:, :, center, center].unsqueeze(-1)
normed = (central - c_median) / c_std
stylized = normed * s_std + s_median
else:
k = gaussian_table[float(sigma_scale[i, j].item())]
local_weight = k.view(1, 1, kernel_size, kernel_size).expand(B, C, kernel_size, kernel_size)
c_mean = (c_patch * local_weight).sum(dim=(-1, -2), keepdim=True)
c_std = ((c_patch - c_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
s_mean = (s_patch * local_weight).sum(dim=(-1, -2), keepdim=True)
s_std = ((s_patch - s_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
center = kernel_size // 2
central = c_patch[:, :, center:center+1, center:center+1]
normed = (central - c_mean) / c_std
stylized = normed * s_std + s_mean
local_scaling = scaling[:, :, i, j].view(B, 1, 1, 1)
stylized = central * (1 - local_scaling) + stylized * local_scaling
row_result[:, :, j] = stylized.squeeze(-1).squeeze(-1)
result[:, :, i, :] = row_result
return result
def adain_patchwise_row_batch_realmedblur(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5, mask: torch.Tensor = None, use_median_blur: bool = False, lowpass_weight=1.0, highpass_weight=1.0) -> torch.Tensor:
B, C, H, W = content.shape
device, dtype = content.device, content.dtype
if kernel_size is None:
kernel_size = int(2 * math.ceil(3 * abs(sigma)) + 1)
if kernel_size % 2 == 0:
kernel_size += 1
pad = kernel_size // 2
content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect')
style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect')
result = torch.zeros_like(content)
scaling = torch.ones((B, 1, H, W), device=device, dtype=dtype)
sigma_scale = torch.ones((H, W), device=device, dtype=torch.float32)
if mask is not None:
with torch.no_grad():
padded_mask = F.pad(mask.float(), (pad, pad, pad, pad), mode="reflect")
blurred_mask = F.avg_pool2d(padded_mask, kernel_size=kernel_size, stride=1, padding=pad)
blurred_mask = blurred_mask[..., pad:-pad, pad:-pad]
edge_proximity = blurred_mask * (1.0 - blurred_mask)
scaling = 1.0 - (edge_proximity / 0.25).clamp(0.0, 1.0)
sigma_scale = scaling[0, 0] # assuming single-channel mask broadcasted across B, C
if not use_median_blur:
coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad
base_gauss = torch.exp(-0.5 * (coords / sigma) ** 2)
base_gauss = (base_gauss / base_gauss.sum()).to(dtype)
gaussian_table = {}
for s in sigma_scale.unique():
sig = float((sigma * s + eps).clamp(min=1e-3))
gauss_local = torch.exp(-0.5 * (coords / sig) ** 2)
gauss_local = (gauss_local / gauss_local.sum()).to(dtype)
kernel_2d = gauss_local[:, None] * gauss_local[None, :]
gaussian_table[s.item()] = kernel_2d
for i in range(H):
row_result = torch.zeros(B, C, W, dtype=dtype, device=device)
for j in range(W):
c_patch = content_padded[:, :, i:i+kernel_size, j:j+kernel_size]
s_patch = style_padded[:, :, i:i+kernel_size, j:j+kernel_size]
if use_median_blur:
# Median blur with residual restoration
unfolded_c = c_patch.reshape(B, C, -1)
unfolded_s = s_patch.reshape(B, C, -1)
c_median = unfolded_c.median(dim=-1, keepdim=True).values
s_median = unfolded_s.median(dim=-1, keepdim=True).values
center = kernel_size // 2
central = c_patch[:, :, center, center].view(B, C, 1)
residual = central - c_median
stylized = lowpass_weight * s_median + residual * highpass_weight
else:
k = gaussian_table[float(sigma_scale[i, j].item())]
local_weight = k.view(1, 1, kernel_size, kernel_size).expand(B, C, kernel_size, kernel_size)
c_mean = (c_patch * local_weight).sum(dim=(-1, -2), keepdim=True)
c_std = ((c_patch - c_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
s_mean = (s_patch * local_weight).sum(dim=(-1, -2), keepdim=True)
s_std = ((s_patch - s_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
center = kernel_size // 2
central = c_patch[:, :, center:center+1, center:center+1]
normed = (central - c_mean) / c_std
stylized = normed * s_std + s_mean
local_scaling = scaling[:, :, i, j].view(B, 1, 1)
stylized = central * (1 - local_scaling) + stylized * local_scaling
row_result[:, :, j] = stylized.squeeze(-1)
result[:, :, i, :] = row_result
return result
def patchwise_sort_transfer9(src: torch.Tensor, ref: torch.Tensor) -> torch.Tensor:
"""
src, ref: [B, C, N] where N = K*K
Returns: [B, C, N] with values from ref permuted to match the sort-order of src.
"""
src_sorted, src_idx = src.sort(dim=-1)
ref_sorted, _ = ref.sort(dim=-1)
out = torch.zeros_like(src)
out.scatter_(dim=-1, index=src_idx, src=ref_sorted)
return out
def masked_patchwise_sort_transfer9(
src : torch.Tensor, # [B, C, N]
ref : torch.Tensor, # [B, C, N]
mask_flat : torch.Tensor # [B, N] bool
) -> torch.Tensor:
"""
Only rearrange N positions where mask_flat[b] is True... to be implemented fully later.
"""
B,C,N = src.shape
out = src.clone()
for b in range(B):
valid = mask_flat[b] # [N] boolean
if valid.sum() == 0:
continue
sc = src[b,:,valid] # [C, M]
ss = ref[b,:,valid] # [C, M]
sc_s, idx = sc.sort(dim=-1) # sort the channelz
ss_s, _ = ss.sort(dim=-1)
res = torch.zeros_like(sc)
res.scatter_(dim=-1, index=idx, src=ss_s)
out[b,:,valid] = res
return out
def adain_patchwise_strict_sortmatch9(
content : torch.Tensor, # [B,C,H,W]
style : torch.Tensor, # [B,C,H,W]
kernel_size : int,
inner_kernel_size : int = 1,
stride : int = 1,
mask : torch.Tensor = None # [B,1,H,W]
) -> torch.Tensor:
B,C,H,W = content.shape
assert inner_kernel_size <= kernel_size
pad = kernel_size//2
inner_off = (kernel_size - inner_kernel_size)//2
# reflect-pad
cp = F.pad(content, (pad,)*4, mode='reflect')
sp = F.pad(style, (pad,)*4, mode='reflect')
out = content.clone()
if mask is not None:
mask = mask[:,0].bool() # [B,H,W]
for i in range(0, H, stride):
for j in range(0, W, stride):
pc = cp[:, :, i:i+kernel_size, j:j+kernel_size] # [B,C,K,K]
ps = sp[:, :, i:i+kernel_size, j:j+kernel_size]
Bc = pc.reshape(B, C, -1)
Bs = ps.reshape(B, C, -1)
matched_flat = patchwise_sort_transfer9(Bc, Bs)
matched = matched_flat.view(B, C, kernel_size, kernel_size)
y0, x0 = inner_off, inner_off
y1, x1 = y0 + inner_kernel_size, x0 + inner_kernel_size
inner = matched[:, :, y0:y1, x0:x1] # [B,C,inner,inner]
dst_y0 = i + y0 - pad
dst_x0 = j + x0 - pad
dst_y1 = dst_y0 + inner_kernel_size
dst_x1 = dst_x0 + inner_kernel_size
oy0 = max(dst_y0, 0); ox0 = max(dst_x0, 0)
oy1 = min(dst_y1, H); ox1 = min(dst_x1, W)
iy0 = oy0 - dst_y0; ix0 = ox0 - dst_x0
iy1 = iy0 + (oy1 - oy0); ix1 = ix0 + (ox1 - ox0)
if mask is None:
out[:, :, oy0:oy1, ox0:ox1] = inner[:, :, iy0:iy1, ix0:ix1]
else:
ibm = mask[:, oy0:oy1, ox0:ox1] # [B,inner,inner]
for b in range(B):
sel = ibm[b] # [inner,inner] # w/ regard to kernel
if sel.any():
out[b:b+1, :, oy0:oy1, ox0:ox1][:, :,sel] = inner[b:b+1, :, iy0:iy1, ix0:ix1][:, :, sel]
return out
================================================
FILE: conditioning.py
================================================
import torch
import torch.nn.functional as F
import math
from torch import Tensor
from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar, List
from dataclasses import dataclass, field
import copy
import base64
import pickle # used strictly for serializing conditioning in the ConditioningToBase64 and Base64ToConditioning nodes for API use. (Offloading T5 processing to another machine to avoid model shuffling.)
import comfy.supported_models
import node_helpers
import gc
from .sigmas import get_sigmas
from .helper import initialize_or_scale, precision_tool, get_res4lyf_scheduler_list, pad_tensor_list_to_max_len
from .latents import get_orthogonal, get_collinear
from .res4lyf import RESplain
from .beta.constants import MAX_STEPS
from .attention_masks import FullAttentionMask, FullAttentionMaskHiDream, CrossAttentionMask, SplitAttentionMask, RegionalContext
from .flux.redux import ReReduxImageEncoder
def multiply_nested_tensors(structure, scalar):
if isinstance(structure, torch.Tensor):
return structure * scalar
elif isinstance(structure, list):
return [multiply_nested_tensors(item, scalar) for item in structure]
elif isinstance(structure, dict):
return {key: multiply_nested_tensors(value, scalar) for key, value in structure.items()}
else:
return structure
def pad_to_same_tokens(x1, x2, pad_value=0.0):
T1, T2 = x1.shape[1], x2.shape[1]
if T1 == T2:
return x1, x2
max_T = max(T1, T2)
x1_padded = F.pad(x1, (0, 0, 0, max_T - T1), value=pad_value)
x2_padded = F.pad(x2, (0, 0, 0, max_T - T2), value=pad_value)
return x1_padded, x2_padded
class ConditioningOrthoCollin:
@classmethod
def INPUT_TYPES(cls):
return {"required": {
"conditioning_0": ("CONDITIONING", ),
"conditioning_1": ("CONDITIONING", ),
"t5_strength" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"clip_strength" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
}}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "combine"
CATEGORY = "RES4LYF/conditioning"
EXPERIMENTAL = True
def combine(self, conditioning_0, conditioning_1, t5_strength, clip_strength):
t5_0_1_collin = get_collinear (conditioning_0[0][0], conditioning_1[0][0])
t5_1_0_ortho = get_orthogonal(conditioning_1[0][0], conditioning_0[0][0])
t5_combined = t5_0_1_collin + t5_1_0_ortho
t5_1_0_collin = get_collinear (conditioning_1[0][0], conditioning_0[0][0])
t5_0_1_ortho = get_orthogonal(conditioning_0[0][0], conditioning_1[0][0])
t5_B_combined = t5_1_0_collin + t5_0_1_ortho
pooled_0_1_collin = get_collinear (conditioning_0[0][1]['pooled_output'].unsqueeze(0), conditioning_1[0][1]['pooled_output'].unsqueeze(0)).squeeze(0)
pooled_1_0_ortho = get_orthogonal(conditioning_1[0][1]['pooled_output'].unsqueeze(0), conditioning_0[0][1]['pooled_output'].unsqueeze(0)).squeeze(0)
pooled_combined = pooled_0_1_collin + pooled_1_0_ortho
#conditioning_0[0][0] = conditioning_0[0][0] + t5_strength * (t5_combined - conditioning_0[0][0])
#conditioning_0[0][0] = t5_strength * t5_combined + (1-t5_strength) * t5_B_combined
conditioning_0[0][0] = t5_strength * t5_0_1_collin + (1-t5_strength) * t5_1_0_collin
conditioning_0[0][1]['pooled_output'] = conditioning_0[0][1]['pooled_output'] + clip_strength * (pooled_combined - conditioning_0[0][1]['pooled_output'])
return (conditioning_0, )
class CLIPTextEncodeFluxUnguided:
@classmethod
def INPUT_TYPES(cls):
return {"required": {
"clip" : ("CLIP", ),
"clip_l": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"t5xxl" : ("STRING", {"multiline": True, "dynamicPrompts": True}),
}}
RETURN_TYPES = ("CONDITIONING","INT","INT",)
RETURN_NAMES = ("conditioning", "clip_l_end", "t5xxl_end",)
FUNCTION = "encode"
CATEGORY = "RES4LYF/conditioning"
EXPERIMENTAL = True
def encode(self, clip, clip_l, t5xxl):
tokens = clip.tokenize(clip_l)
tokens["t5xxl"] = clip.tokenize(t5xxl)["t5xxl"]
clip_l_end=0
for i in range(len(tokens['l'][0])):
if tokens['l'][0][i][0] == 49407:
clip_l_end=i
break
t5xxl_end=0
for i in range(len(tokens['l'][0])): # bug? should this be t5xxl?
if tokens['t5xxl'][0][i][0] == 1:
t5xxl_end=i
break
output = clip.encode_from_tokens(tokens, return_pooled=True, return_dict=True)
cond = output.pop("cond")
conditioning = [[cond, output]]
conditioning[0][1]['clip_l_end'] = clip_l_end
conditioning[0][1]['t5xxl_end'] = t5xxl_end
return (conditioning, clip_l_end, t5xxl_end,)
class StyleModelApplyStyle:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"conditioning": ("CONDITIONING", ),
"style_model": ("STYLE_MODEL", ),
"clip_vision_output": ("CLIP_VISION_OUTPUT", ),
"strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.001}),
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
DESCRIPTION = "Use with Flux Redux."
EXPERIMENTAL = True
def main(self, clip_vision_output, style_model, conditioning, strength=1.0):
c = style_model.model.feature_match(conditioning, clip_vision_output)
#cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0)
#cond = strength * cond
#c = []
#for t in conditioning:
# n = [torch.cat((t[0], cond), dim=1), t[1].copy()]
# c.append(n)
return (c, )
class ConditioningZeroAndTruncate:
# needs updating to ensure dims are correct for arbitrary models without hardcoding.
# vanilla ConditioningZeroOut node doesn't truncate and SD3.5M degrades badly with large embeddings, even if zeroed out, as the negative conditioning
@classmethod
def INPUT_TYPES(cls):
return { "required": {"conditioning": ("CONDITIONING", )}}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "zero_out"
CATEGORY = "RES4LYF/conditioning"
DESCRIPTION = "Use for negative conditioning with SD3.5. ConditioningZeroOut does not truncate the embedding, \
which results in severe degradation of image quality with SD3.5 when the token limit is exceeded."
def zero_out(self, conditioning):
c = []
for t in conditioning:
d = t[1].copy()
pooled_output = d.get("pooled_output", None)
if pooled_output is not None:
d["pooled_output"] = torch.zeros((1,2048), dtype=t[0].dtype, device=t[0].device)
n = [torch.zeros((1,154,4096), dtype=t[0].dtype, device=t[0].device), d]
c.append(n)
return (c, )
class ConditioningTruncate:
# needs updating to ensure dims are correct for arbitrary models without hardcoding.
@classmethod
def INPUT_TYPES(cls):
return { "required": {"conditioning": ("CONDITIONING", )}}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "zero_out"
CATEGORY = "RES4LYF/conditioning"
DESCRIPTION = "Use for positive conditioning with SD3.5. Tokens beyond 77 result in degradation of image quality."
EXPERIMENTAL = True
def zero_out(self, conditioning):
c = []
for t in conditioning:
d = t[1].copy()
pooled_output = d.get("pooled_output", None)
if pooled_output is not None:
d["pooled_output"] = d["pooled_output"][:, :2048]
n = [t[0][:, :154, :4096], d]
c.append(n)
return (c, )
class ConditioningMultiply:
@classmethod
def INPUT_TYPES(cls):
return {"required": {"conditioning": ("CONDITIONING", ),
"multiplier": ("FLOAT", {"default": 1.0, "min": -1000000000.0, "max": 1000000000.0, "step": 0.01})
}}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def main(self, conditioning, multiplier):
c = multiply_nested_tensors(conditioning, multiplier)
return (c,)
class ConditioningAdd:
@classmethod
def INPUT_TYPES(cls):
return {"required": {"conditioning_1": ("CONDITIONING", ),
"conditioning_2": ("CONDITIONING", ),
"multiplier": ("FLOAT", {"default": 1.0, "min": -1000000000.0, "max": 1000000000.0, "step": 0.01})
}}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def main(self, conditioning_1, conditioning_2, multiplier):
conditioning_1[0][0] += multiplier * conditioning_2[0][0]
conditioning_1[0][1]['pooled_output'] += multiplier * conditioning_2[0][1]['pooled_output']
return (conditioning_1,)
class ConditioningCombine:
@classmethod
def INPUT_TYPES(cls):
return {"required": {"conditioning_1": ("CONDITIONING", ), "conditioning_2": ("CONDITIONING", )}}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "combine"
CATEGORY = "RES4LYF/conditioning"
def combine(self, conditioning_1, conditioning_2):
return (conditioning_1 + conditioning_2, )
class ConditioningAverage :
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"conditioning_to": ("CONDITIONING", ),
"conditioning_from": ("CONDITIONING", ),
"conditioning_to_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
CATEGORY = "RES4LYF/conditioning"
FUNCTION = "addWeighted"
def addWeighted(self, conditioning_to, conditioning_from, conditioning_to_strength):
out = []
if len(conditioning_from) > 1:
RESplain("Warning: ConditioningAverage conditioning_from contains more than 1 cond, only the first one will actually be applied to conditioning_to.")
cond_from = conditioning_from[0][0]
pooled_output_from = conditioning_from[0][1].get("pooled_output", None)
for i in range(len(conditioning_to)):
t1 = conditioning_to[i][0]
pooled_output_to = conditioning_to[i][1].get("pooled_output", pooled_output_from)
t0 = cond_from[:,:t1.shape[1]]
if t0.shape[1] < t1.shape[1]:
t0 = torch.cat([t0] + [torch.zeros((1, (t1.shape[1] - t0.shape[1]), t1.shape[2]))], dim=1)
tw = torch.mul(t1, conditioning_to_strength) + torch.mul(t0, (1.0 - conditioning_to_strength))
t_to = conditioning_to[i][1].copy()
if pooled_output_from is not None and pooled_output_to is not None:
t_to["pooled_output"] = torch.mul(pooled_output_to, conditioning_to_strength) + torch.mul(pooled_output_from, (1.0 - conditioning_to_strength))
elif pooled_output_from is not None:
t_to["pooled_output"] = pooled_output_from
n = [tw, t_to]
out.append(n)
return (out, )
class ConditioningSetTimestepRange:
@classmethod
def INPUT_TYPES(cls):
return {"required": {"conditioning": ("CONDITIONING", ),
"start": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
"end": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001})
}}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "set_range"
CATEGORY = "RES4LYF/conditioning"
def set_range(self, conditioning, start, end):
c = node_helpers.conditioning_set_values(conditioning, {"start_percent": start,
"end_percent": end})
return (c, )
class ConditioningAverageScheduler: # don't think this is implemented correctly. needs to be reworked
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"conditioning_0": ("CONDITIONING", ),
"conditioning_1": ("CONDITIONING", ),
"ratio": ("SIGMAS", ),
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
EXPERIMENTAL = True
@staticmethod
def addWeighted(conditioning_to, conditioning_from, conditioning_to_strength): #this function borrowed from comfyui
out = []
if len(conditioning_from) > 1:
RESplain("Warning: ConditioningAverage conditioning_from contains more than 1 cond, only the first one will actually be applied to conditioning_to.")
cond_from = conditioning_from[0][0]
pooled_output_from = conditioning_from[0][1].get("pooled_output", None)
for i in range(len(conditioning_to)):
t1 = conditioning_to[i][0]
pooled_output_to = conditioning_to[i][1].get("pooled_output", pooled_output_from)
t0 = cond_from[:,:t1.shape[1]]
if t0.shape[1] < t1.shape[1]:
t0 = torch.cat([t0] + [torch.zeros((1, (t1.shape[1] - t0.shape[1]), t1.shape[2]))], dim=1)
tw = torch.mul(t1, conditioning_to_strength) + torch.mul(t0, (1.0 - conditioning_to_strength))
t_to = conditioning_to[i][1].copy()
if pooled_output_from is not None and pooled_output_to is not None:
t_to["pooled_output"] = torch.mul(pooled_output_to, conditioning_to_strength) + torch.mul(pooled_output_from, (1.0 - conditioning_to_strength))
elif pooled_output_from is not None:
t_to["pooled_output"] = pooled_output_from
n = [tw, t_to]
out.append(n)
return out
@staticmethod
def create_percent_array(steps):
step_size = 1.0 / steps
return [{"start_percent": i * step_size, "end_percent": (i + 1) * step_size} for i in range(steps)]
def main(self, conditioning_0, conditioning_1, ratio):
steps = len(ratio)
percents = self.create_percent_array(steps)
cond = []
for i in range(steps):
average = self.addWeighted(conditioning_0, conditioning_1, ratio[i].item())
cond += node_helpers.conditioning_set_values(average, {"start_percent": percents[i]["start_percent"], "end_percent": percents[i]["end_percent"]})
return (cond,)
class StableCascade_StageB_Conditioning64:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"conditioning": ("CONDITIONING",),
"stage_c": ("LATENT",),
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "set_prior"
CATEGORY = "RES4LYF/conditioning"
@precision_tool.cast_tensor
def set_prior(self, conditioning, stage_c):
c = []
for t in conditioning:
d = t[1].copy()
d['stable_cascade_prior'] = stage_c['samples']
n = [t[0], d]
c.append(n)
return (c, )
class Conditioning_Recast64:
@classmethod
def INPUT_TYPES(cls):
return {"required": { "cond_0": ("CONDITIONING",),
},
"optional": { "cond_1": ("CONDITIONING",),}
}
RETURN_TYPES = ("CONDITIONING","CONDITIONING",)
RETURN_NAMES = ("cond_0_recast","cond_1_recast",)
FUNCTION = "main"
CATEGORY = "RES4LYF/precision"
EXPERIMENTAL = True
@precision_tool.cast_tensor
def main(self, cond_0, cond_1 = None):
cond_0[0][0] = cond_0[0][0].to(torch.float64)
if 'pooled_output' in cond_0[0][1]:
cond_0[0][1]["pooled_output"] = cond_0[0][1]["pooled_output"].to(torch.float64)
if cond_1 is not None:
cond_1[0][0] = cond_1[0][0].to(torch.float64)
if 'pooled_output' in cond_0[0][1]:
cond_1[0][1]["pooled_output"] = cond_1[0][1]["pooled_output"].to(torch.float64)
return (cond_0, cond_1,)
class ConditioningToBase64:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"conditioning": ("CONDITIONING",),
},
"hidden": {
"unique_id": "UNIQUE_ID",
"extra_pnginfo": "EXTRA_PNGINFO",
},
}
RETURN_TYPES = ("STRING",)
RETURN_NAMES = ("string",)
FUNCTION = "notify"
OUTPUT_NODE = True
OUTPUT_IS_LIST = (True,)
CATEGORY = "RES4LYF/utilities"
def notify(self, unique_id=None, extra_pnginfo=None, conditioning=None):
conditioning_pickle = pickle.dumps(conditioning)
conditioning_base64 = base64.b64encode(conditioning_pickle).decode('utf-8')
text = [conditioning_base64]
if unique_id is not None and extra_pnginfo is not None:
if not isinstance(extra_pnginfo, list):
RESplain("Error: extra_pnginfo is not a list")
elif (
not isinstance(extra_pnginfo[0], dict)
or "workflow" not in extra_pnginfo[0]
):
RESplain("Error: extra_pnginfo[0] is not a dict or missing 'workflow' key")
else:
workflow = extra_pnginfo[0]["workflow"]
node = next(
(x for x in workflow["nodes"] if str(x["id"]) == str(unique_id[0])),
None,
)
if node:
node["widgets_values"] = [text]
return {"ui": {"text": text}, "result": (text,)}
class Base64ToConditioning:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"data": ("STRING", {"default": ""}),
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/utilities"
def main(self, data):
conditioning_pickle = base64.b64decode(data)
conditioning = pickle.loads(conditioning_pickle)
return (conditioning,)
class ConditioningDownsampleT5:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"conditioning": ("CONDITIONING",),
"token_limit" : ("INT", {'default': 128, 'min': 1, 'max': 16384}),
},
"optional": {
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
EXPERIMENTAL = True
def main(self, conditioning, token_limit):
conditioning[0][0] = downsample_tokens(conditioning[0][0], token_limit)
return (conditioning, )
"""class ConditioningBatch4:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"conditioning_0": ("CONDITIONING",),
},
"optional": {
"conditioning_1": ("CONDITIONING",),
"conditioning_2": ("CONDITIONING",),
"conditioning_3": ("CONDITIONING",),
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def main(self, conditioning_0, conditioning_1=None, conditioning_2=None, conditioning_3=None, ):
c = copy.deepcopy(conditioning_0)
if conditioning_1 is not None:
c.append(conditioning_1[0])
if conditioning_2 is not None:
c.append(conditioning_2[0])
if conditioning_3 is not None:
c.append(conditioning_3[0])
return (c, )"""
class ConditioningBatch4:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"conditioning_0": ("CONDITIONING",),
},
"optional": {
"conditioning_1": ("CONDITIONING",),
"conditioning_2": ("CONDITIONING",),
"conditioning_3": ("CONDITIONING",),
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def main(self, conditioning_0, conditioning_1=None, conditioning_2=None, conditioning_3=None, ):
c = []
c.append(conditioning_0)
if conditioning_1 is not None:
c.append(conditioning_1)
if conditioning_2 is not None:
c.append(conditioning_2)
if conditioning_3 is not None:
c.append(conditioning_3)
return (c, )
class ConditioningBatch8:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"conditioning_0": ("CONDITIONING",),
},
"optional": {
"conditioning_1": ("CONDITIONING",),
"conditioning_2": ("CONDITIONING",),
"conditioning_3": ("CONDITIONING",),
"conditioning_4": ("CONDITIONING",),
"conditioning_5": ("CONDITIONING",),
"conditioning_6": ("CONDITIONING",),
"conditioning_7": ("CONDITIONING",),
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def main(self, conditioning_0, conditioning_1=None, conditioning_2=None, conditioning_3=None, conditioning_4=None, conditioning_5=None, conditioning_6=None, conditioning_7=None, ):
c = []
c.append(conditioning_0)
if conditioning_1 is not None:
c.append(conditioning_1)
if conditioning_2 is not None:
c.append(conditioning_2)
if conditioning_3 is not None:
c.append(conditioning_3)
if conditioning_4 is not None:
c.append(conditioning_4)
if conditioning_5 is not None:
c.append(conditioning_5)
if conditioning_6 is not None:
c.append(conditioning_6)
if conditioning_7 is not None:
c.append(conditioning_7)
return (c, )
class EmptyConditioningGenerator:
def __init__(self, model=None, conditioning=None, device=None, dtype=None):
""" device, dtype currently unused """
if model is not None:
self.device = device
self.dtype = dtype
import comfy.supported_models
self.model_config = model.model.model_config
self.llama3_shape = None
self.pooled_len = 0
if isinstance(self.model_config, comfy.supported_models.SD3):
self.text_len_base = 154
self.text_channels = 4096
self.pooled_len = 2048
elif isinstance(self.model_config, (comfy.supported_models.Flux, comfy.supported_models.FluxSchnell, comfy.supported_models.Chroma)):
self.text_len_base = 256
self.text_channels = 4096
self.pooled_len = 768
elif isinstance(self.model_config, comfy.supported_models.AuraFlow):
self.text_len_base = 256
self.text_channels = 2048
#self.pooled_len = 1
elif isinstance(self.model_config, comfy.supported_models.Stable_Cascade_C):
self.text_len_base = 77
self.text_channels = 1280
self.pooled_len = 1280
elif isinstance(self.model_config, comfy.supported_models.WAN21_T2V) or isinstance(self.model_config, comfy.supported_models.WAN21_I2V):
self.text_len_base = 512
self.text_channels = 5120 # sometimes needs to be 4096, like when initializing in samplers_py in shark?
#self.pooled_len = 1
elif isinstance(self.model_config, comfy.supported_models.HiDream):
self.text_len_base = 128
self.text_channels = 4096 # sometimes needs to be 4096, like when initializing in samplers_py in shark?
self.pooled_len = 2048
self.llama3_shape = torch.Size([1,32,128,4096])
elif isinstance(self.model_config, comfy.supported_models.LTXV):
self.text_len_base = 128
self.text_channels = 4096
#self.pooled_len = 1
elif isinstance(self.model_config, comfy.supported_models.SD15):
self.text_len_base = 77
self.text_channels = 768
self.pooled_len = 768
elif isinstance(self.model_config, comfy.supported_models.SDXL):
self.text_len_base = 77
self.text_channels = 2048
self.pooled_len = 1280
elif isinstance(self.model_config, comfy.supported_models.HunyuanVideo) or \
isinstance (self.model_config, comfy.supported_models.HunyuanVideoI2V) or \
isinstance (self.model_config, comfy.supported_models.HunyuanVideoSkyreelsI2V):
self.text_len_base = 128
self.text_channels = 4096
#self.pooled_len = 1
else:
raise ValueError(f"Unknown model config: {type(self.model_config)}")
elif conditioning is not None:
self.device = conditioning[0][0].device
self.dtype = conditioning[0][0].dtype
self.text_len_base = conditioning[0][0].shape[-2]
if 'pooled_output' in conditioning[0][1]:
self.pooled_len = conditioning[0][1]['pooled_output'].shape[-1]
else:
self.pooled_len = 0
self.text_channels = conditioning[0][0].shape[-1]
def get_empty_conditioning(self):
if self.llama3_shape is not None and self.pooled_len > 0:
return [[
torch.zeros((1, self.text_len_base, self.text_channels)),
{
'pooled_output' : torch.zeros((1, self.pooled_len)),
'conditioning_llama3': torch.zeros(self.llama3_shape),
}
]]
elif self.pooled_len > 0:
return [[
torch.zeros((1, self.text_len_base, self.text_channels)),
{
'pooled_output': torch.zeros((1, self.pooled_len)),
}
]]
else:
return [[
torch.zeros((1, self.text_len_base, self.text_channels)),
]]
def get_empty_conditionings(self, count):
return [self.get_empty_conditioning() for _ in range(count)]
def zero_none_conditionings_(self, *conds):
if len(conds) == 1 and isinstance(conds[0], (list, tuple)):
conds = conds[0]
for i, cond in enumerate(conds):
conds[i] = self.get_empty_conditioning() if cond is None else cond
return conds
"""def zero_conditioning_from_list(conds):
for cond in conds:
if cond is not None:
for i in range(len(cond)):
pooled = cond[i][1].get('pooled_output')
pooled_len = pooled.shape[-1] if pooled is not None else 1 # 1 default pooled_output len for those without it
cond_zero = [[
torch.zeros_like(cond[i][0]),
{"pooled_output": torch.zeros((1,pooled_len), dtype=cond[i][0].dtype, device=cond[i][0].device)},
]]
return cond_zero"""
def zero_conditioning_from_list(conds):
for cond in conds:
if cond is not None:
for i in range(len(cond)):
pooled = cond[i][1].get('pooled_output')
llama3 = cond[i][1].get('conditioning_llama3')
pooled_len = pooled.shape[-1] if pooled is not None else 1
llama3_shape = llama3.shape if llama3 is not None else (1, 32, 128, 4096)
cond_zero = [[
torch.zeros_like(cond[i][0]),
{
"pooled_output": torch.zeros((1, pooled_len), dtype=cond[i][0].dtype, device=cond[i][0].device),
"conditioning_llama3": torch.zeros(llama3_shape, dtype=cond[i][0].dtype, device=cond[i][0].device),
},
]]
return cond_zero
class TemporalMaskGenerator:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"switch_frame": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
"frames": ("INT", {"default": 65, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
}
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("temporal_mask",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
EXPERIMENTAL = True
def main(self,
switch_frame = 33,
frames = 65,
invert_mask = False,
):
switch_frame = switch_frame // 4
frames = frames // 4 + 1
temporal_mask = torch.ones((frames, 2, 2))
temporal_mask[switch_frame:,...] = 0.0
if invert_mask:
temporal_mask = 1 - temporal_mask
return (temporal_mask,)
class TemporalSplitAttnMask_Midframe:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"self_attn_midframe": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
"cross_attn_midframe": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
"self_attn_invert": ("BOOLEAN", {"default": False}),
"cross_attn_invert": ("BOOLEAN", {"default": False}),
"frames": ("INT", {"default": 65, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
},
"optional":
{
}
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("temporal_mask",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
EXPERIMENTAL = True
def main(self,
self_attn_midframe = 33,
cross_attn_midframe = 33,
self_attn_invert = False,
cross_attn_invert = False,
frames = 65,
):
frames = frames // 4 + 1
temporal_self_mask = torch.ones((frames, 2, 2))
temporal_cross_mask = torch.ones((frames, 2, 2))
self_attn_midframe = self_attn_midframe // 4
cross_attn_midframe = cross_attn_midframe // 4
temporal_self_mask[self_attn_midframe :,...] = 0.0
temporal_cross_mask[cross_attn_midframe:,...] = 0.0
if self_attn_invert:
temporal_self_mask = 1 - temporal_self_mask
if cross_attn_invert:
temporal_cross_mask = 1 - temporal_cross_mask
temporal_attn_masks = torch.stack([temporal_cross_mask, temporal_self_mask])
return (temporal_attn_masks,)
class TemporalSplitAttnMask:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"self_attn_start": ("INT", {"default": 1, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
"self_attn_stop": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
"cross_attn_start": ("INT", {"default": 1, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
"cross_attn_stop": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
#"frames": ("INT", {"default": 65, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
},
"optional":
{
}
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("temporal_mask",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
def main(self,
self_attn_start = 0,
self_attn_stop = 33,
cross_attn_start = 0,
cross_attn_stop = 33,
#frames = 65,
):
#frames = frames // 4 + 1
self_attn_start = self_attn_start // 4 #+ 1
self_attn_stop = self_attn_stop // 4 + 1
cross_attn_start = cross_attn_start // 4 #+ 1
cross_attn_stop = cross_attn_stop // 4 + 1
max_stop = max(self_attn_stop, cross_attn_stop)
temporal_self_mask = torch.zeros((max_stop, 1, 1))
temporal_cross_mask = torch.zeros((max_stop, 1, 1))
temporal_self_mask [ self_attn_start: self_attn_stop,...] = 1.0
temporal_cross_mask[cross_attn_start:cross_attn_stop,...] = 1.0
temporal_attn_masks = torch.stack([temporal_cross_mask, temporal_self_mask])
return (temporal_attn_masks,)
class TemporalCrossAttnMask:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"cross_attn_start": ("INT", {"default": 1, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
"cross_attn_stop": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}),
},
"optional":
{
}
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("temporal_mask",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
def main(self,
cross_attn_start = 0,
cross_attn_stop = 33,
):
cross_attn_start = cross_attn_start // 4 #+ 1
cross_attn_stop = cross_attn_stop // 4 + 1
temporal_self_mask = torch.zeros((cross_attn_stop, 1, 1)) # dummy to satisfy stack
temporal_cross_mask = torch.zeros((cross_attn_stop, 1, 1))
temporal_cross_mask[cross_attn_start:cross_attn_stop,...] = 1.0
temporal_attn_masks = torch.stack([temporal_cross_mask, temporal_self_mask])
return (temporal_attn_masks,)
@dataclass
class RegionalParameters:
weights : List[float] = field(default_factory=list)
floors : List[float] = field(default_factory=list)
REG_MASK_TYPE_2 = [
"gradient",
"gradient_masked",
"gradient_unmasked",
"boolean",
"boolean_masked",
"boolean_unmasked",
]
REG_MASK_TYPE_3 = [
"gradient",
"gradient_A",
"gradient_B",
"gradient_unmasked",
"gradient_AB",
"gradient_A,unmasked",
"gradient_B,unmasked",
"boolean",
"boolean_A",
"boolean_B",
"boolean_unmasked",
"boolean_AB",
"boolean_A,unmasked",
"boolean_B,unmasked",
]
REG_MASK_TYPE_AB = [
"gradient",
"gradient_A",
"gradient_B",
"boolean",
"boolean_A",
"boolean_B",
]
REG_MASK_TYPE_ABC = [
"gradient",
"gradient_A",
"gradient_B",
"gradient_C",
"gradient_AB",
"gradient_AC",
"gradient_BC",
"boolean",
"boolean_A",
"boolean_B",
"boolean_C",
"boolean_AB",
"boolean_AC",
"boolean_BC",
]
class ClownRegionalConditioning_AB:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"region_bleed_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": -1, "min": -1, "max": 10000}),
"mask_type": (REG_MASK_TYPE_AB, {"default": "boolean"}),
"edge_width": ("INT", {"default": 0, "min": 0, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional": {
"conditioning_A": ("CONDITIONING", ),
"conditioning_B": ("CONDITIONING", ),
"mask_A": ("MASK", ),
"mask_B": ("MASK", ),
"weights": ("SIGMAS", ),
"region_bleeds": ("SIGMAS", ),
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def create_callback(self, **kwargs):
def callback(model):
kwargs["model"] = model
pos_cond, = self.prepare_regional_cond(**kwargs)
return pos_cond
return callback
def main(self,
weight : float = 1.0,
start_sigma : float = 0.0,
end_sigma : float = 1.0,
weight_scheduler = None,
start_step : int = 0,
end_step : int = -1,
conditioning_A = None,
conditioning_B = None,
weights : Tensor = None,
region_bleeds : Tensor = None,
region_bleed : float = 0.0,
region_bleed_start_step : int = 0,
mask_type : str = "boolean",
edge_width : int = 0,
mask_A = None,
mask_B = None,
invert_mask : bool = False
) -> Tuple[Tensor]:
mask = mask_A
unmask = mask_B
if end_step == -1:
end_step = MAX_STEPS
callback = self.create_callback(weight = weight,
start_sigma = start_sigma,
end_sigma = end_sigma,
weight_scheduler = weight_scheduler,
start_step = start_step,
end_step = end_step,
weights = weights,
region_bleeds = region_bleeds,
region_bleed = region_bleed,
region_bleed_start_step = region_bleed_start_step,
mask_type = mask_type,
edge_width = edge_width,
mask = mask,
unmask = unmask,
invert_mask = invert_mask,
conditioning_A = conditioning_A,
conditioning_B = conditioning_B,
)
cond = zero_conditioning_from_list([conditioning_A, conditioning_B])
cond[0][1]['callback_regional'] = callback
return (cond,)
def prepare_regional_cond(self,
model,
weight : float = 1.0,
start_sigma : float = 0.0,
end_sigma : float = 1.0,
weight_scheduler = None,
start_step : int = 0,
end_step : int = -1,
conditioning_A = None,
conditioning_B = None,
weights : Tensor = None,
region_bleeds : Tensor = None,
region_bleed : float = 0.0,
region_bleed_start_step : int = 0,
mask_type : str = "gradient",
edge_width : int = 0,
mask = None,
unmask = None,
invert_mask : bool = False,
) -> Tuple[Tensor]:
default_dtype = torch.float64
default_device = torch.device("cuda")
if end_step == -1:
end_step = MAX_STEPS
if weights is None and weight_scheduler != "constant":
total_steps = end_step - start_step
weights = get_sigmas(model, weight_scheduler, total_steps, 1.0).to(dtype=default_dtype, device=default_device) #/ model.inner_model.inner_model.model_sampling.sigma_max #scaling doesn't matter as this is a flux-only node
prepend = torch.zeros(start_step, dtype=default_dtype, device=default_device)
weights = torch.cat((prepend, weights), dim=0)
if invert_mask and mask is not None:
mask = 1-mask
unmask = 1-unmask
floor, floors = region_bleed, region_bleeds
weights = initialize_or_scale(weights, weight, end_step).to(default_dtype).to(default_device)
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
prepend = torch.full((region_bleed_start_step,), 0.0, dtype=default_dtype, device=default_device)
floors = initialize_or_scale(floors, floor, end_step).to(default_dtype).to(default_device)
floors = F.pad(floors, (0, MAX_STEPS), value=0.0)
floors = torch.cat((prepend, floors), dim=0)
if (conditioning_A is None) and (conditioning_B is None):
cond = None
elif mask is not None:
EmptyCondGen = EmptyConditioningGenerator(model)
conditioning_A, conditioning_B = EmptyCondGen.zero_none_conditionings_([conditioning_A, conditioning_B])
cond = copy.deepcopy(conditioning_A)
if isinstance(model.model.model_config, (comfy.supported_models.WAN21_T2V, comfy.supported_models.WAN21_I2V)):
if model.model.diffusion_model.blocks[0].self_attn.winderz_type != "false":
AttnMask = CrossAttentionMask(mask_type, edge_width)
else:
AttnMask = SplitAttentionMask(mask_type, edge_width)
elif isinstance(model.model.model_config, comfy.supported_models.HiDream):
AttnMask = FullAttentionMaskHiDream(mask_type, edge_width)
elif isinstance(model.model.model_config, (comfy.supported_models.SDXL, comfy.supported_models.SD15, comfy.supported_models.Stable_Cascade_C)):
AttnMask = SplitAttentionMask(mask_type, edge_width)
else:
AttnMask = FullAttentionMask(mask_type, edge_width)
RegContext = RegionalContext()
if isinstance(model.model.model_config, comfy.supported_models.HiDream):
AttnMask.add_region_sizes(
[
conditioning_A[0][0].shape[-2],
conditioning_A[0][1]['conditioning_llama3'][0,0,...].shape[-2],
conditioning_A[0][1]['conditioning_llama3'][0,0,...].shape[-2],
],
mask)
AttnMask.add_region_sizes(
[
conditioning_B[0][0].shape[-2],
conditioning_B[0][1]['conditioning_llama3'][0,0,...].shape[-2],
conditioning_B[0][1]['conditioning_llama3'][0,0,...].shape[-2],
],
unmask)
RegContext.add_region_llama3(conditioning_A[0][1]['conditioning_llama3'])
RegContext.add_region_llama3(conditioning_B[0][1]['conditioning_llama3'])
else:
AttnMask.add_region(conditioning_A[0][0], mask)
AttnMask.add_region(conditioning_B[0][0], unmask)
RegContext.add_region(conditioning_A[0][0], conditioning_A[0][1].get('pooled_output'))
RegContext.add_region(conditioning_B[0][0], conditioning_B[0][1].get('pooled_output'))
if 'clip_vision_output' in conditioning_A[0][1]: # For WAN... dicey results
RegContext.add_region_clip_fea(conditioning_A[0][1]['clip_vision_output'].penultimate_hidden_states)
RegContext.add_region_clip_fea(conditioning_B[0][1]['clip_vision_output'].penultimate_hidden_states)
if 'unclip_conditioning' in conditioning_A[0][1]:
RegContext.add_region_clip_fea(conditioning_A[0][1]['unclip_conditioning'][0]['clip_vision_output'].image_embeds) #['penultimate_hidden_states'])
if 'unclip_conditioning' in conditioning_B[0][1]:
RegContext.add_region_clip_fea(conditioning_B[0][1]['unclip_conditioning'][0]['clip_vision_output'].image_embeds) #['penultimate_hidden_states'])
cond[0][1]['AttnMask'] = AttnMask
cond[0][1]['RegContext'] = RegContext
cond = merge_with_base(base=cond, others=[conditioning_A, conditioning_B])
if 'pooled_output' in cond[0][1] and cond[0][1]['pooled_output'] is not None:
cond[0][1]['pooled_output'] = (conditioning_A[0][1]['pooled_output'] + conditioning_B[0][1]['pooled_output']) / 2
#if 'conditioning_llama3' in cond[0][1] and cond[0][1]['conditioning_llama3'] is not None:
# cond[0][1]['conditioning_llama3'] = (conditioning_A[0][1]['conditioning_llama3'] + conditioning_B[0][1]['conditioning_llama3']) / 2
#cond[0] = list(cond[0])
#cond[0][0] = (conditioning_A[0][0] + conditioning_B[0][0]) / 2
#cond[0] = tuple(cond[0])
else:
cond = conditioning_A
cond[0][1]['RegParam'] = RegionalParameters(weights, floors)
return (cond,)
class ClownRegionalConditioning_ABC:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"region_bleed_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": 100, "min": -1, "max": 10000}),
"mask_type": (REG_MASK_TYPE_ABC, {"default": "boolean"}),
"edge_width": ("INT", {"default": 0, "min": 0, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional": {
"conditioning_A": ("CONDITIONING", ),
"conditioning_B": ("CONDITIONING", ),
"conditioning_C": ("CONDITIONING", ),
"mask_A": ("MASK", ),
"mask_B": ("MASK", ),
"mask_C": ("MASK", ),
"weights": ("SIGMAS", ),
"region_bleeds": ("SIGMAS", ),
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def create_callback(self, **kwargs):
def callback(model):
kwargs["model"] = model
pos_cond, = self.prepare_regional_cond(**kwargs)
return pos_cond
return callback
def main(self,
weight : float = 1.0,
start_sigma : float = 0.0,
end_sigma : float = 1.0,
weight_scheduler = None,
start_step : int = 0,
end_step : int = -1,
conditioning_A = None,
conditioning_B = None,
conditioning_C = None,
weights : Tensor = None,
region_bleeds : Tensor = None,
region_bleed : float = 0.0,
region_bleed_start_step : int = 0,
mask_type : str = "boolean",
edge_width : int = 0,
mask_A = None,
mask_B = None,
mask_C = None,
invert_mask : bool = False
) -> Tuple[Tensor]:
if end_step == -1:
end_step = MAX_STEPS
callback = self.create_callback(weight = weight,
start_sigma = start_sigma,
end_sigma = end_sigma,
weight_scheduler = weight_scheduler,
start_step = start_step,
end_step = end_step,
weights = weights,
region_bleeds = region_bleeds,
region_bleed = region_bleed,
region_bleed_start_step = region_bleed_start_step,
mask_type = mask_type,
edge_width = edge_width,
mask_A = mask_A,
mask_B = mask_B,
mask_C = mask_C,
invert_mask = invert_mask,
conditioning_A = conditioning_A,
conditioning_B = conditioning_B,
conditioning_C = conditioning_C,
)
cond = zero_conditioning_from_list([conditioning_A, conditioning_B, conditioning_C])
cond[0][1]['callback_regional'] = callback
return (cond,)
def prepare_regional_cond(self,
model,
weight : float = 1.0,
start_sigma : float = 0.0,
end_sigma : float = 1.0,
weight_scheduler = None,
start_step : int = 0,
end_step : int = -1,
conditioning_A = None,
conditioning_B = None,
conditioning_C = None,
weights : Tensor = None,
region_bleeds : Tensor = None,
region_bleed : float = 0.0,
region_bleed_start_step : int = 0,
mask_type : str = "boolean",
edge_width : int = 0,
mask_A = None,
mask_B = None,
mask_C = None,
invert_mask : bool = False,
) -> Tuple[Tensor]:
default_dtype = torch.float64
default_device = torch.device("cuda")
if end_step == -1:
end_step = MAX_STEPS
if weights is None and weight_scheduler != "constant":
total_steps = end_step - start_step
weights = get_sigmas(model, weight_scheduler, total_steps, 1.0).to(dtype=default_dtype, device=default_device) #/ model.inner_model.inner_model.model_sampling.sigma_max #scaling doesn't matter as this is a flux-only node
prepend = torch.zeros(start_step, dtype=default_dtype, device=default_device)
weights = torch.cat((prepend, weights), dim=0)
if invert_mask and mask_A is not None:
mask_A = 1-mask_A
if invert_mask and mask_B is not None:
mask_B = 1-mask_B
mask_AB_inv = mask_C
if invert_mask and mask_AB_inv is not None:
mask_AB_inv = 1-mask_AB_inv
floor, floors = region_bleed, region_bleeds
weights = initialize_or_scale(weights, weight, end_step).to(default_dtype)
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
prepend = torch.full((region_bleed_start_step,), 0.0, dtype=default_dtype, device=default_device)
floors = initialize_or_scale(floors, floor, end_step).to(default_dtype).to(default_device)
floors = F.pad(floors, (0, MAX_STEPS), value=0.0)
floors = torch.cat((prepend, floors), dim=0)
if (conditioning_A is None) and (conditioning_B is None) and (conditioning_C is None):
conditioning = None
elif mask_A is not None:
EmptyCondGen = EmptyConditioningGenerator(model)
conditioning_A, conditioning_B, conditioning_C = EmptyCondGen.zero_none_conditionings_([conditioning_A, conditioning_B, conditioning_C])
conditioning = copy.deepcopy(conditioning_A)
if isinstance(model.model.model_config, (comfy.supported_models.WAN21_T2V, comfy.supported_models.WAN21_I2V)):
if model.model.diffusion_model.blocks[0].self_attn.winderz_type != "false":
AttnMask = CrossAttentionMask(mask_type, edge_width)
else:
AttnMask = SplitAttentionMask(mask_type, edge_width)
elif isinstance(model.model.model_config, comfy.supported_models.HiDream):
AttnMask = FullAttentionMaskHiDream(mask_type, edge_width)
elif isinstance(model.model.model_config, (comfy.supported_models.SDXL, comfy.supported_models.SD15, comfy.supported_models.Stable_Cascade_C)):
AttnMask = SplitAttentionMask(mask_type, edge_width)
else:
AttnMask = FullAttentionMask(mask_type, edge_width)
RegContext = RegionalContext()
if isinstance(model.model.model_config, comfy.supported_models.HiDream):
AttnMask.add_region_sizes(
[
conditioning_A[0][0].shape[-2],
conditioning_A[0][1]['conditioning_llama3'][0,0,...].shape[-2],
conditioning_A[0][1]['conditioning_llama3'][0,0,...].shape[-2],
],
mask_A)
AttnMask.add_region_sizes(
[
conditioning_B[0][0].shape[-2],
conditioning_B[0][1]['conditioning_llama3'][0,0,...].shape[-2],
conditioning_B[0][1]['conditioning_llama3'][0,0,...].shape[-2],
],
mask_B)
AttnMask.add_region_sizes(
[
conditioning_C[0][0].shape[-2],
conditioning_C[0][1]['conditioning_llama3'][0,0,...].shape[-2],
conditioning_C[0][1]['conditioning_llama3'][0,0,...].shape[-2],
],
mask_AB_inv)
RegContext.add_region_llama3(conditioning_A[0][1]['conditioning_llama3'])
RegContext.add_region_llama3(conditioning_B[0][1]['conditioning_llama3'])
RegContext.add_region_llama3(conditioning_C[0][1]['conditioning_llama3'])
else:
AttnMask.add_region(conditioning_A[0][0], mask_A)
AttnMask.add_region(conditioning_B[0][0], mask_B)
AttnMask.add_region(conditioning_C[0][0], mask_AB_inv)
RegContext.add_region(conditioning_A[0][0], conditioning_A[0][1].get('pooled_output'))
RegContext.add_region(conditioning_B[0][0], conditioning_B[0][1].get('pooled_output'))
RegContext.add_region(conditioning_C[0][0], conditioning_C[0][1].get('pooled_output'))
#if 'pooled_output' in conditioning_A[0][1]:
# RegContext.pooled_output = conditioning_A[0][1]['pooled_output'] + conditioning_B[0][1]['pooled_output'] + conditioning_C[0][1]['pooled_output']
conditioning[0][1]['AttnMask'] = AttnMask
conditioning[0][1]['RegContext'] = RegContext
conditioning = merge_with_base(base=conditioning, others=[conditioning_A, conditioning_B, conditioning_C])
if 'pooled_output' in conditioning[0][1] and conditioning[0][1]['pooled_output'] is not None:
conditioning[0][1]['pooled_output'] = (conditioning_A[0][1]['pooled_output'] + conditioning_B[0][1]['pooled_output'] + conditioning_C[0][1]['pooled_output']) / 3
else:
conditioning = conditioning_A
conditioning[0][1]['RegParam'] = RegionalParameters(weights, floors)
return (conditioning,)
class ClownRegionalConditioning2(ClownRegionalConditioning_AB):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"region_bleed_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": -1, "min": -1, "max": 10000}),
"mask_type": (REG_MASK_TYPE_2, {"default": "boolean"}),
"edge_width": ("INT", {"default": 0, "min": -10000, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional": {
"conditioning_masked": ("CONDITIONING", ),
"conditioning_unmasked": ("CONDITIONING", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
"region_bleeds": ("SIGMAS", ),
}
}
def main(self, conditioning_masked, conditioning_unmasked, mask, **kwargs):
return super().main(
conditioning_A = conditioning_masked,
conditioning_B = conditioning_unmasked,
mask_A = mask,
mask_B = 1-mask,
**kwargs
)
class ClownRegionalConditioning3(ClownRegionalConditioning_ABC):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"region_bleed_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": 100, "min": -1, "max": 10000}),
"mask_type": (REG_MASK_TYPE_3, {"default": "boolean"}),
"edge_width": ("INT", {"default": 0, "min": 0, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional": {
"conditioning_A": ("CONDITIONING", ),
"conditioning_B": ("CONDITIONING", ),
"conditioning_unmasked": ("CONDITIONING", ),
"mask_A": ("MASK", ),
"mask_B": ("MASK", ),
"weights": ("SIGMAS", ),
"region_bleeds": ("SIGMAS", ),
}
}
def main(self, conditioning_unmasked, mask_A, mask_B, **kwargs):
mask_AB_inv = torch.ones_like(mask_A) - mask_A - mask_B
mask_AB_inv[mask_AB_inv < 0] = 0
return super().main(
conditioning_C = conditioning_unmasked,
mask_A = mask_A,
mask_B = mask_B,
mask_C = mask_AB_inv,
**kwargs
)
class ClownRegionalConditioning:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"spineless": ("BOOLEAN", {"default": False}),
"edge_width": ("INT", {"default": 0, "min": -10000, "max": 10000}),
},
"optional":
{
"cond_regions": ("COND_REGIONS", ),
"conditioning": ("CONDITIONING", ),
"mask": ("MASK", ),
}
}
RETURN_TYPES = ("COND_REGIONS",)
RETURN_NAMES = ("cond_regions",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def main(self,
spineless = False,
edge_width = 0,
cond_regions = None,
conditioning = None,
mask = None,
):
cond_reg = [] if cond_regions is None else copy.deepcopy(cond_regions)
if mask is None:
mask = torch.ones_like(cond_reg[0]['mask'])
for i in range(len(cond_reg)):
if mask.dtype == torch.bool:
mask &= cond_reg[i]['mask'].to(cond_reg[0]['mask'].dtype)
else:
mask = mask - cond_reg[i]['mask'].to(cond_reg[0]['mask'].dtype)
mask[mask < 0] = 0.0
cond_reg.append(
{
'use_self_attn_mask': not spineless,
'edge_width' : edge_width,
'conditioning' : conditioning,
'mask' : mask,
}
)
return (cond_reg,)
class ClownRegionalConditionings:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"region_bleed_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": -1, "min": -1, "max": 10000}),
"mask_type": (["gradient", "boolean"], {"default": "boolean"}),
"invert_masks": ("BOOLEAN", {"default": False}),
},
"optional": {
"cond_regions": ("COND_REGIONS", ),
"weights": ("SIGMAS", ),
"region_bleeds": ("SIGMAS", ),
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def create_callback(self, **kwargs):
def callback(model):
kwargs["model"] = model
pos_cond, = self.prepare_regional_cond(**kwargs)
return pos_cond
return callback
def main(self,
weight : float = 1.0,
start_sigma : float = 0.0,
end_sigma : float = 1.0,
weight_scheduler = None,
start_step : int = 0,
end_step : int = -1,
cond_regions = None,
weights : Tensor = None,
region_bleeds : Tensor = None,
region_bleed : float = 0.0,
region_bleed_start_step : int = 0,
mask_type : str = "boolean",
invert_masks : bool = False
) -> Tuple[Tensor]:
if end_step == -1:
end_step = MAX_STEPS
callback = self.create_callback(weight = weight,
start_sigma = start_sigma,
end_sigma = end_sigma,
weight_scheduler = weight_scheduler,
start_step = start_step,
end_step = end_step,
weights = weights,
region_bleeds = region_bleeds,
region_bleed = region_bleed,
region_bleed_start_step = region_bleed_start_step,
mask_type = mask_type,
invert_masks = invert_masks,
cond_regions = cond_regions,
)
cond_list = [region['conditioning'] for region in cond_regions]
conditioning = zero_conditioning_from_list(cond_list)
conditioning[0][1]['callback_regional'] = callback
return (conditioning,)
def prepare_regional_cond(self,
model,
weight : float = 1.0,
start_sigma : float = 0.0,
end_sigma : float = 1.0,
weight_scheduler = None,
start_step : int = 0,
end_step : int = -1,
weights : Tensor = None,
region_bleeds : Tensor = None,
region_bleed : float = 0.0,
region_bleed_start_step : int = 0,
mask_type : str = "gradient",
cond_regions = None,
invert_masks : bool = False,
) -> Tuple[Tensor]:
default_dtype = torch.float64
default_device = torch.device("cuda")
cond_list = [region['conditioning'] for region in cond_regions]
mask_list = [region['mask'] for region in cond_regions]
edge_width_list = [region['edge_width'] for region in cond_regions]
use_self_attn_mask_list = [region['use_self_attn_mask'] for region in cond_regions]
if end_step == -1:
end_step = MAX_STEPS
if weights is None and weight_scheduler != "constant":
total_steps = end_step - start_step
weights = get_sigmas(model, weight_scheduler, total_steps, 1.0).to(dtype=default_dtype, device=default_device) #/ model.inner_model.inner_model.model_sampling.sigma_max #scaling doesn't matter as this is a flux-only node
prepend = torch.zeros(start_step, dtype=default_dtype, device=default_device)
weights = torch.cat((prepend, weights), dim=0)
if invert_masks:
for i in range(len(mask_list)):
if mask_list[i].dtype == torch.bool:
mask_list[i] = ~mask_list[i]
else:
mask_list[i] = 1 - mask_list[i]
floor, floors = region_bleed, region_bleeds
weights = initialize_or_scale(weights, weight, end_step).to(default_dtype).to(default_device)
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
prepend = torch.full((region_bleed_start_step,), 0.0, dtype=default_dtype, device=default_device)
floors = initialize_or_scale(floors, floor, end_step).to(default_dtype).to(default_device)
floors = F.pad(floors, (0, MAX_STEPS), value=0.0)
floors = torch.cat((prepend, floors), dim=0)
EmptyCondGen = EmptyConditioningGenerator(model)
cond_list = EmptyCondGen.zero_none_conditionings_(cond_list)
conditioning = copy.deepcopy(cond_list[0])
if isinstance(model.model.model_config, comfy.supported_models.WAN21_T2V) or isinstance(model.model.model_config, comfy.supported_models.WAN21_I2V):
if model.model.diffusion_model.blocks[0].self_attn.winderz_type != "false":
AttnMask = CrossAttentionMask (mask_type, edge_width_list=edge_width_list, use_self_attn_mask_list=use_self_attn_mask_list)
else:
AttnMask = SplitAttentionMask (mask_type, edge_width_list=edge_width_list, use_self_attn_mask_list=use_self_attn_mask_list)
elif isinstance(model.model.model_config, comfy.supported_models.HiDream):
AttnMask = FullAttentionMaskHiDream(mask_type, edge_width_list=edge_width_list, use_self_attn_mask_list=use_self_attn_mask_list)
elif isinstance(model.model.model_config, comfy.supported_models.SDXL) or isinstance(model.model.model_config, comfy.supported_models.SD15):
AttnMask = SplitAttentionMask(mask_type, edge_width_list=edge_width_list, use_self_attn_mask_list=use_self_attn_mask_list)
else:
AttnMask = FullAttentionMask (mask_type, edge_width_list=edge_width_list, use_self_attn_mask_list=use_self_attn_mask_list)
RegContext = RegionalContext()
for cond, mask in zip(cond_list, mask_list):
if isinstance(model.model.model_config, comfy.supported_models.HiDream):
AttnMask.add_region_sizes(
[
cond[0][0].shape[-2],
cond[0][1]['conditioning_llama3'][0,0,...].shape[-2],
cond[0][1]['conditioning_llama3'][0,0,...].shape[-2],
],
mask)
RegContext.add_region_llama3(cond[0][1]['conditioning_llama3'])
else:
AttnMask.add_region(cond[0][0], mask)
RegContext.add_region(cond[0][0])
if 'clip_vision_output' in cond[0][1]: # For WAN... dicey results
RegContext.add_region_clip_fea(cond[0][1]['clip_vision_output'].penultimate_hidden_states)
conditioning[0][1]['AttnMask'] = AttnMask
conditioning[0][1]['RegContext'] = RegContext
conditioning[0][1]['RegParam'] = RegionalParameters(weights, floors)
conditioning = merge_with_base(base=conditioning, others=cond_list)
if 'pooled_output' in conditioning[0][1] and conditioning[0][1]['pooled_output'] is not None:
conditioning[0][1]['pooled_output'] = torch.stack([cond_tmp[0][1]['pooled_output'] for cond_tmp in cond_list]).mean(dim=0)
#conditioning[0][1]['pooled_output'] = cond_list[0][0][1]['pooled_output']
return (conditioning,)
def merge_with_base(
base : List[ Tuple[torch.Tensor, Dict[str, Any]]],
others : List[List[Tuple[torch.Tensor, Dict[str, Any]]]],
dim : int = -2
) -> List[ Tuple[torch.Tensor, Dict[str, Any]]]:
"""
Merge `base` plus an arbitrary list of other conditioning objects:
- base: zero out its tensors, for use as an accumulator
- For each level ℓ:
• Collect the base’s zeroed tensor + all others’ ℓ-tensors.
• Pad them along `dim` to the same length and sum.
• Replace merged[ℓ][0] with that sum.
- For each tensor-valued key in the base’s info-dict at level ℓ:
• Gather a zeroed tensor + that key from all others.
• Pad & sum, and store back under that key.
- Any non-tensor entries in the base’s info are preserved untouched.
"""
max_levels = max(len(base), *(len(p) for p in others))
for lvl in range(max_levels):
if lvl >= len(base): # if base lacks this level, skip entirely
continue
# --- tokens merge ---
base_tokens, base_info = base[lvl]
zero_tokens = torch.zeros_like(base_tokens)
toks = [zero_tokens]
# zero-out any tensor fields in base_info
for key, val in base_info.items():
if isinstance(val, torch.Tensor):
base_info[key] = torch.zeros_like(val)
# collect same-level tokens from each other
for pos in others:
if lvl < len(pos):
toks.append(pos[lvl][0])
toks = pad_tensor_list_to_max_len(toks, dim=dim)
base_tokens = sum(toks)
base[lvl] = (base_tokens, base_info)
# --- info-dict tensor merge ---
for key, val in list(base_info.items()):
if not isinstance(val, torch.Tensor):
continue
pieces = [val] # zeroed base tensor
for pos in others:
if lvl < len(pos):
info_i = pos[lvl][1]
if key in info_i and isinstance(info_i[key], torch.Tensor):
pieces.append(info_i[key])
pieces = pad_tensor_list_to_max_len(pieces, dim=dim)
base[lvl][1][key] = sum(pieces)
return base
def best_hw(n): # get factor pair closesst to a true square
best = (1, n)
min_diff = n
for i in range(1, int(n**0.5) + 1):
if n % i == 0:
j = n // i
if abs(i - j) < min_diff:
best = (i, j)
min_diff = abs(i - j)
return best
def downsample_tokens(cond: torch.Tensor, target_tokens: int, mode="bicubic") -> torch.Tensor:
B, T, D = cond.shape
def next_square(n: int):
root = math.ceil(n**0.5)
return root * root
padded_len = next_square(T)
pad_amount = padded_len - T
if pad_amount > 0:
pad_tensor = torch.zeros(B, pad_amount, D, dtype=cond.dtype, device=cond.device)
cond = torch.cat([cond, pad_tensor], dim=1)
side_len = int(math.sqrt(padded_len))
cond_reshaped = cond.view(B, side_len, side_len, D).permute(0, 3, 1, 2) # [B, D, H, W]
H_target, W_target = best_hw(target_tokens)
cond_interp = F.interpolate(cond_reshaped, size=(H_target, W_target), mode=mode)
cond_final = cond_interp.permute(0, 2, 3, 1).reshape(B, -1, D)
cond_final = cond_final[:, :target_tokens, :]
return cond_final
class CrossAttn_EraseReplace_HiDream:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"clip": ("CLIP", ),
"t5xxl_erase": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"llama_erase": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"t5xxl_replace": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"llama_replace": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"t5xxl_erase_token": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"llama_erase_token": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"t5xxl_replace_token": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"llama_replace_token": ("STRING", {"multiline": True, "dynamicPrompts": True}),
}}
RETURN_TYPES = ("CONDITIONING","CONDITIONING",)
RETURN_NAMES = ("positive", "negative",)
FUNCTION = "encode"
CATEGORY = "advanced/conditioning"
EXPERIMENTAL = True
def encode(self, clip, t5xxl_erase, llama_erase, t5xxl_replace, llama_replace, t5xxl_erase_token, llama_erase_token, t5xxl_replace_token, llama_replace_token):
tokens_erase = clip.tokenize("")
tokens_erase["l"] = clip.tokenize("")["l"]
tokens_replace = clip.tokenize("")
tokens_replace["l"] = clip.tokenize("")["l"]
tokens_erase ["t5xxl"] = clip.tokenize(t5xxl_erase) ["t5xxl"]
tokens_erase ["llama"] = clip.tokenize(llama_erase) ["llama"]
tokens_replace["t5xxl"] = clip.tokenize(t5xxl_replace)["t5xxl"]
tokens_replace["llama"] = clip.tokenize(llama_replace)["llama"]
tokens_erase_token = clip.tokenize("")
tokens_erase_token["l"] = clip.tokenize("")["l"]
tokens_replace_token = clip.tokenize("")
tokens_replace_token["l"] = clip.tokenize("")["l"]
tokens_erase_token ["t5xxl"] = clip.tokenize(t5xxl_erase_token) ["t5xxl"]
tokens_erase_token ["llama"] = clip.tokenize(llama_erase_token) ["llama"]
tokens_replace_token["t5xxl"] = clip.tokenize(t5xxl_replace_token)["t5xxl"]
tokens_replace_token["llama"] = clip.tokenize(llama_replace_token)["llama"]
encoded_erase = clip.encode_from_tokens_scheduled(tokens_erase)
encoded_replace = clip.encode_from_tokens_scheduled(tokens_replace)
return (encoded_replace, encoded_erase, )
class CrossAttn_EraseReplace_Flux:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"clip": ("CLIP", ),
"t5xxl_erase": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"t5xxl_replace": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"t5xxl_erase_token": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"t5xxl_replace_token": ("STRING", {"multiline": True, "dynamicPrompts": True}),
}}
RETURN_TYPES = ("CONDITIONING","CONDITIONING",)
RETURN_NAMES = ("positive", "negative",)
FUNCTION = "encode"
CATEGORY = "advanced/conditioning"
EXPERIMENTAL = True
def encode(self, clip, t5xxl_erase, llama_erase, t5xxl_replace, llama_replace, t5xxl_erase_token, llama_erase_token, t5xxl_replace_token, llama_replace_token):
tokens_erase = clip.tokenize("")
tokens_erase["l"] = clip.tokenize("")["l"]
tokens_replace = clip.tokenize("")
tokens_replace["l"] = clip.tokenize("")["l"]
tokens_erase ["t5xxl"] = clip.tokenize(t5xxl_erase) ["t5xxl"]
tokens_erase ["llama"] = clip.tokenize(llama_erase) ["llama"]
tokens_replace["t5xxl"] = clip.tokenize(t5xxl_replace)["t5xxl"]
tokens_replace["llama"] = clip.tokenize(llama_replace)["llama"]
tokens_erase_token = clip.tokenize("")
tokens_erase_token["l"] = clip.tokenize("")["l"]
tokens_replace_token = clip.tokenize("")
tokens_replace_token["l"] = clip.tokenize("")["l"]
tokens_erase_token ["t5xxl"] = clip.tokenize(t5xxl_erase_token) ["t5xxl"]
tokens_erase_token ["llama"] = clip.tokenize(llama_erase_token) ["llama"]
tokens_replace_token["t5xxl"] = clip.tokenize(t5xxl_replace_token)["t5xxl"]
tokens_replace_token["llama"] = clip.tokenize(llama_replace_token)["llama"]
encoded_erase = clip.encode_from_tokens_scheduled(tokens_erase)
encoded_replace = clip.encode_from_tokens_scheduled(tokens_replace)
return (encoded_replace, encoded_erase, )
================================================
FILE: example_workflows/chroma regional antiblur.json
================================================
{"last_node_id":726,"last_link_id":2104,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":2098}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":2099}],"outputs":[{"name":"","type":"CLIP","links":[1939,2092,2101],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":2100}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":21,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":701,"type":"Note","pos":[80,-520],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[-210,-520],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":676,"type":"InvertMask","pos":[20,-370],"size":[142.42074584960938,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2073}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2083],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":7,"type":"VAEEncodeAdvanced","pos":[719.6110229492188,16.752899169921875],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":16,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":710,"type":"MaskPreview","pos":[180,-190],"size":[210,246],"flags":{},"order":17,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":20,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":715,"type":"SolidMask","pos":[-220,-370],"size":[210,106],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2073],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1024,1024]},{"id":716,"type":"SolidMask","pos":[-220,-220],"size":[210,106],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2065],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,384,864]},{"id":709,"type":"MaskComposite","pos":[190,-370],"size":[210,126],"flags":{},"order":11,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2083},{"name":"source","localized_name":"source","type":"MASK","link":2065}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2091],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[256,160,"add"]},{"id":704,"type":"Note","pos":[101.74818420410156,112.67951965332031],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":703,"type":"Note","pos":[423.10699462890625,-96.14085388183594],"size":[241.9689483642578,386.7543640136719],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask.\n\nboolean_masked means that the masked area can \"see\" the rest of the image, but the unmasked area cannot. \"boolean\" would mean neither area could see the rest of the image.\n\nTry setting to boolean_unmasked and see what happens!\n\nIf you still have blur, try reducing edge_width (and if you have seams, try increasing it, or setting end_step to something like 20). \n\nAlso verify that you can generate the background prompt alone without blur (if you can't, this won't work). And don't get stuck on one seed.\n\nVaguely human-shaped masks also tend to work better than the blocky one used here."],"color":"#432","bgcolor":"#653"},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2104},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2102},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,4,3,"fixed","standard",true]},{"id":723,"type":"CLIPTextEncode","pos":[460,-240],"size":[210,88],"flags":{"collapsed":false},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2092}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2093],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a college campus"]},{"id":662,"type":"CLIPTextEncode","pos":[460,-370],"size":[210,88],"flags":{"collapsed":false},"order":13,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2094],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat"]},{"id":724,"type":"ClownModelLoader","pos":[615.2467651367188,-699.0204467773438],"size":[361.6804504394531,266],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2097],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2099],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2100],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["chroma-unlocked-v29.5.safetensors","fp8_e4m3fn_fast","t5xxl_fp8_e4m3fn_scaled.safetensors",".none",".none",".none","chroma","ae.sft"]},{"id":725,"type":"ReChromaPatcher","pos":[1030.2850341796875,-698.6190795898438],"size":[210,82],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2097}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2098],"slot_index":0}],"properties":{"Node name for S&R":"ReChromaPatcher"},"widgets_values":["float64",true]},{"id":726,"type":"CLIPTextEncode","pos":[772.4685668945312,350.9657897949219],"size":[210,88],"flags":{"collapsed":false},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2101}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2102],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["low quality, bad quality, mutated, low detail, blurry, out of focus, jpeg artifacts"]},{"id":722,"type":"ClownRegionalConditioning2","pos":[690,-370],"size":[287.75750732421875,330],"flags":{},"order":18,"mode":0,"inputs":[{"name":"conditioning_masked","localized_name":"conditioning_masked","type":"CONDITIONING","shape":7,"link":2094},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":2093},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2091},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[2104],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning2"},"widgets_values":[1,0,0,"constant",0,10,"boolean_masked",32,false]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1967,13,0,401,0,"MODEL"],[2054,709,0,710,0,"MASK"],[2065,716,0,709,1,"MASK"],[2073,715,0,676,0,"MASK"],[2083,676,0,709,0,"MASK"],[2091,709,0,722,2,"MASK"],[2092,490,0,723,0,"CLIP"],[2093,723,0,722,1,"CONDITIONING"],[2094,662,0,722,0,"CONDITIONING"],[2096,401,0,397,0,"LATENT"],[2097,724,0,725,0,"MODEL"],[2098,725,0,13,0,"*"],[2099,724,1,490,0,"*"],[2100,724,2,14,0,"*"],[2101,490,0,726,0,"CLIP"],[2102,726,0,401,2,"CONDITIONING"],[2104,722,0,401,1,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.5863092971715371,"offset":[2215.7489179851177,830.3089944212893]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/chroma txt2img.json
================================================
{"last_node_id":727,"last_link_id":2113,"nodes":[{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":6,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":5,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":2112}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":4,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2108},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2107},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2102},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2113},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,4,3,"fixed","standard",true]},{"id":662,"type":"CLIPTextEncode","pos":[770.2921752929688,-373.6678771972656],"size":[210,88],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2109}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2107],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat"]},{"id":726,"type":"CLIPTextEncode","pos":[772.46923828125,-238.8079376220703],"size":[210,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2110}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2102],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["low quality, bad quality, mutated, low detail, blurry, out of focus, jpeg artifacts"]},{"id":727,"type":"EmptyLatentImage","pos":[771.9976196289062,-98.32988739013672],"size":[213.03683471679688,106],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[2113],"slot_index":0}],"properties":{"Node name for S&R":"EmptyLatentImage"},"widgets_values":[1024,1024,1]},{"id":724,"type":"ClownModelLoader","pos":[380.5105285644531,-376.99224853515625],"size":[361.6804504394531,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2108],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2109,2110],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2112],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["chroma-unlocked-v37-detail-calibrated.safetensors","fp8_e4m3fn_fast","t5xxl_fp8_e4m3fn_scaled.safetensors",".none",".none",".none","chroma","ae.sft"]}],"links":[[1329,397,0,398,0,"IMAGE"],[2096,401,0,397,0,"LATENT"],[2102,726,0,401,2,"CONDITIONING"],[2107,662,0,401,1,"CONDITIONING"],[2108,724,0,401,0,"MODEL"],[2109,724,1,662,0,"CLIP"],[2110,724,1,726,0,"CLIP"],[2112,724,2,397,1,"VAE"],[2113,727,0,401,3,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.5863092971715371,"offset":[1675.8567061174099,917.6014919421251]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/comparison ksampler vs csksampler chain workflows.json
================================================
{"last_node_id":1423,"last_link_id":3992,"nodes":[{"id":13,"type":"Reroute","pos":[17750,830],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":3988}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":402,"type":"QuadrupleCLIPLoader","pos":[17300,870],"size":[407.7720031738281,130],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[1552],"slot_index":0}],"properties":{"Node name for S&R":"QuadrupleCLIPLoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors"]},{"id":403,"type":"UNETLoader","pos":[17390,740],"size":[320.7802429199219,82],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[3988],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn"]},{"id":404,"type":"VAELoader","pos":[17500,1060],"size":[210,58],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[1344],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ae.sft"]},{"id":1381,"type":"Reroute","pos":[18770,-310],"size":[75,26],"flags":{},"order":23,"mode":0,"inputs":[{"name":"","type":"*","link":3961}],"outputs":[{"name":"","type":"CONDITIONING","links":[3881]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1383,"type":"Reroute","pos":[18770,-420],"size":[75,26],"flags":{},"order":27,"mode":0,"inputs":[{"name":"","type":"*","link":3877}],"outputs":[{"name":"","type":"MODEL","links":[3879],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1388,"type":"Reroute","pos":[18750,410],"size":[75,26],"flags":{},"order":28,"mode":0,"inputs":[{"name":"","type":"*","link":3886}],"outputs":[{"name":"","type":"MODEL","links":[3887,3891,3896,3901],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1393,"type":"SaveImage","pos":[20400,450],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":51,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3908}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1399,"type":"Reroute","pos":[18790,1920],"size":[75,26],"flags":{},"order":22,"mode":0,"inputs":[{"name":"","type":"*","link":3967}],"outputs":[{"name":"","type":"CONDITIONING","links":[3925,3933]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1401,"type":"Reroute","pos":[18780,1870],"size":[75,26],"flags":{},"order":30,"mode":0,"inputs":[{"name":"","type":"*","link":3916}],"outputs":[{"name":"","type":"MODEL","links":[3924,3931,3932],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1408,"type":"FlipSigmas","pos":[19150,2270],"size":[140,26],"flags":{},"order":42,"mode":0,"inputs":[{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","link":3941}],"outputs":[{"name":"SIGMAS","localized_name":"SIGMAS","type":"SIGMAS","links":[3929]}],"properties":{"Node name for S&R":"FlipSigmas"},"widgets_values":[]},{"id":1394,"type":"SamplerCustom","pos":[18940,1910],"size":[253.52972412109375,230],"flags":{},"order":46,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3924},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3925},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3926},{"name":"sampler","localized_name":"sampler","type":"SAMPLER","link":3928},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","link":3929},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3979}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3938],"slot_index":0},{"name":"denoised_output","localized_name":"denoised_output","type":"LATENT","links":null}],"properties":{"Node name for S&R":"SamplerCustom"},"widgets_values":[false,0,"fixed",1]},{"id":1411,"type":"SplitSigmas","pos":[19030,2350],"size":[210,78],"flags":{},"order":38,"mode":0,"inputs":[{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","link":3940}],"outputs":[{"name":"high_sigmas","localized_name":"high_sigmas","type":"SIGMAS","links":null},{"name":"low_sigmas","localized_name":"low_sigmas","type":"SIGMAS","links":[3941,3942],"slot_index":1}],"properties":{"Node name for S&R":"SplitSigmas"},"widgets_values":[15]},{"id":1409,"type":"BetaSamplingScheduler","pos":[18780,2360],"size":[210,106],"flags":{},"order":34,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3931}],"outputs":[{"name":"SIGMAS","localized_name":"SIGMAS","type":"SIGMAS","links":[3940],"slot_index":0}],"properties":{"Node name for S&R":"BetaSamplingScheduler"},"widgets_values":[30,0.5,0.7]},{"id":1407,"type":"KSamplerSelect","pos":[18720,2210],"size":[210,58],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"SAMPLER","localized_name":"SAMPLER","type":"SAMPLER","links":[3928,3935]}],"properties":{"Node name for S&R":"KSamplerSelect"},"widgets_values":["euler"]},{"id":1395,"type":"Reroute","pos":[18750,1110],"size":[75,26],"flags":{},"order":21,"mode":0,"inputs":[{"name":"","type":"*","link":3965}],"outputs":[{"name":"","type":"CONDITIONING","links":[3949],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1405,"type":"VAEDecode","pos":[19650,1810],"size":[210,46],"flags":{},"order":52,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3992},{"name":"vae","localized_name":"vae","type":"VAE","link":3922}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3923],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1403,"type":"VAEDecode","pos":[19650,990],"size":[210,46],"flags":{},"order":41,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3991},{"name":"vae","localized_name":"vae","type":"VAE","link":3919}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3920],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1263,"type":"VAEDecode","pos":[20410,-500],"size":[210,46],"flags":{},"order":47,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3989},{"name":"vae","localized_name":"vae","type":"VAE","link":3429}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3430],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":490,"type":"Reroute","pos":[17750,870],"size":[75,26],"flags":{},"order":8,"mode":0,"inputs":[{"name":"","type":"*","link":1552}],"outputs":[{"name":"","type":"CLIP","links":[3959,3960],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1385,"type":"Reroute","pos":[18750,520],"size":[75,26],"flags":{},"order":24,"mode":0,"inputs":[{"name":"","type":"*","link":3964}],"outputs":[{"name":"","type":"CONDITIONING","links":[3889,3893,3898,3903],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1415,"type":"CLIPTextEncode","pos":[17860,1070],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3960}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3961,3964,3966,3968],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":1414,"type":"CLIPTextEncode","pos":[17860,870],"size":[271.3465270996094,126.98572540283203],"flags":{},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3959}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3962,3963,3965,3967],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a photo of a doghead cannibal holding a sign that says \"the clown jumped the shark\" in a landfill at night"]},{"id":1397,"type":"Reroute","pos":[18750,1060],"size":[75,26],"flags":{},"order":29,"mode":0,"inputs":[{"name":"","type":"*","link":3912}],"outputs":[{"name":"","type":"MODEL","links":[3948],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1402,"type":"Reroute","pos":[18780,1980],"size":[75,26],"flags":{},"order":26,"mode":0,"inputs":[{"name":"","type":"*","link":3968}],"outputs":[{"name":"","type":"CONDITIONING","links":[3926,3934],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1417,"type":"LoadImage","pos":[18263.712890625,1364.093017578125],"size":[315,314],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3973]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["00107-496528661.png","image"]},{"id":1420,"type":"VAEEncode","pos":[18710,2080],"size":[140,46],"flags":{},"order":18,"mode":0,"inputs":[{"name":"pixels","localized_name":"pixels","type":"IMAGE","link":3977},{"name":"vae","localized_name":"vae","type":"VAE","link":3980}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3979],"slot_index":0}],"properties":{"Node name for S&R":"VAEEncode"},"widgets_values":[]},{"id":1419,"type":"ImageResize+","pos":[18460,2080],"size":[210,218],"flags":{},"order":11,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3976}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3977],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[1024,1024,"bicubic","stretch","always",0]},{"id":14,"type":"Reroute","pos":[17750,910],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":1344}],"outputs":[{"name":"","type":"VAE","links":[3429,3907,3919,3922,3969,3980],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1418,"type":"LoadImage","pos":[18120,2080],"size":[315,314],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3976],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["00107-496528661.png","image"]},{"id":1398,"type":"Reroute","pos":[18750,1160],"size":[75,26],"flags":{},"order":25,"mode":0,"inputs":[{"name":"","type":"*","link":3966}],"outputs":[{"name":"","type":"CONDITIONING","links":[3950],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1416,"type":"VAEEncodeAdvanced","pos":[18620,1370],"size":[253.78292846679688,278],"flags":{},"order":17,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3973},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":3969}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[3975],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":1423,"type":"FluxLoader","pos":[16942.298828125,795.814208984375],"size":[315,282],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null},{"name":"clip","localized_name":"clip","type":"CLIP","links":null},{"name":"vae","localized_name":"vae","type":"VAE","links":null},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":null},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":null}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae",".none",".none"]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[17868.26953125,666.623046875],"size":[260.3999938964844,126],"flags":{},"order":16,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3987}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3877,3886,3912,3916],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":1422,"type":"EmptyLatentImage","pos":[17486.916015625,540.6340942382812],"size":[315,106],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3985,3986,3987],"slot_index":0}],"properties":{"Node name for S&R":"EmptyLatentImage"},"widgets_values":[1024,1024,1]},{"id":1380,"type":"Reroute","pos":[18768.1875,-255.9905242919922],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":3985}],"outputs":[{"name":"","type":"LATENT","links":[3882],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1382,"type":"Reroute","pos":[18769.365234375,-367.63720703125],"size":[75,26],"flags":{},"order":19,"mode":0,"inputs":[{"name":"","type":"*","link":3962}],"outputs":[{"name":"","type":"CONDITIONING","links":[3880]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1386,"type":"Reroute","pos":[18750.548828125,467.08831787109375],"size":[75,26],"flags":{},"order":20,"mode":0,"inputs":[{"name":"","type":"*","link":3963}],"outputs":[{"name":"","type":"CONDITIONING","links":[3888,3892,3897,3902]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1387,"type":"Reroute","pos":[18747.00390625,569.2838745117188],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":3986}],"outputs":[{"name":"","type":"LATENT","links":[3890]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1264,"type":"SaveImage","pos":[20410,-410],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":50,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3430}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1392,"type":"VAEDecode","pos":[20400,360],"size":[210,46],"flags":{},"order":48,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3990},{"name":"vae","localized_name":"vae","type":"VAE","link":3907}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3908],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1410,"type":"SamplerCustom","pos":[19300,1900],"size":[272.0888977050781,230],"flags":{},"order":49,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3932},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3933},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3934},{"name":"sampler","localized_name":"sampler","type":"SAMPLER","link":3935},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","link":3942},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3938}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3992],"slot_index":0},{"name":"denoised_output","localized_name":"denoised_output","type":"LATENT","links":null}],"properties":{"Node name for S&R":"SamplerCustom"},"widgets_values":[false,0,"fixed",4]},{"id":1261,"type":"ClownsharKSampler_Beta","pos":[18944.17578125,-390],"size":[283.8435974121094,418],"flags":{},"order":31,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3879},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3880},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3881},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3882},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3427],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","beta57",30,5,1,4,0,"fixed","standard",true]},{"id":1262,"type":"ClownsharkChainsampler_Beta","pos":[19310.083984375,-402.36279296875],"size":[285.8560485839844,298],"flags":{},"order":35,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3427},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3435],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",5,4,"resample",true]},{"id":1266,"type":"ClownsharkChainsampler_Beta","pos":[19679.115234375,-407.62518310546875],"size":[269.3165283203125,298],"flags":{},"order":39,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3435},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3436],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",5,4,"resample",true]},{"id":1265,"type":"ClownsharkChainsampler_Beta","pos":[20054.2421875,-408.6135559082031],"size":[271.6801452636719,298],"flags":{},"order":43,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3436},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3989],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",-1,4,"resample",true]},{"id":1384,"type":"KSamplerAdvanced","pos":[18936.240234375,444.8757019042969],"size":[278.3764343261719,334],"flags":{},"order":32,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3887},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3888},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3889},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3890}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3895],"slot_index":0}],"properties":{"Node name for S&R":"KSamplerAdvanced"},"widgets_values":["enable",0,"fixed",30,4,"euler","beta57",0,5,"enable"]},{"id":1391,"type":"KSamplerAdvanced","pos":[20044.978515625,449.22869873046875],"size":[278.3769226074219,334],"flags":{},"order":44,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3901},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3902},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3903},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3905}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3990],"slot_index":0}],"properties":{"Node name for S&R":"KSamplerAdvanced"},"widgets_values":["disable",15,"fixed",30,4,"euler","beta57",15,10000,"disable"]},{"id":1390,"type":"KSamplerAdvanced","pos":[19672.99609375,448.818603515625],"size":[273.651123046875,334],"flags":{},"order":40,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3896},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3897},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3898},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3900}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3905],"slot_index":0}],"properties":{"Node name for S&R":"KSamplerAdvanced"},"widgets_values":["disable",10,"fixed",30,4,"euler","beta57",10,15,"enable"]},{"id":1389,"type":"KSamplerAdvanced","pos":[19308.921875,451.14801025390625],"size":[273.652099609375,334],"flags":{},"order":36,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3891},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3892},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3893},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3895}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3900],"slot_index":0}],"properties":{"Node name for S&R":"KSamplerAdvanced"},"widgets_values":["disable",5,"fixed",30,4,"euler","beta57",5,10,"enable"]},{"id":1413,"type":"ClownsharkChainsampler_Beta","pos":[19294.095703125,1089.451171875],"size":[275.2236328125,298],"flags":{},"order":37,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3947},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3991],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,4,"resample",true]},{"id":1412,"type":"ClownsharKSampler_Beta","pos":[18922.447265625,1091.1812744140625],"size":[281.48095703125,418],"flags":{},"order":33,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3948},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3949},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3950},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3975},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3947],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","beta57",30,15,1,1,0,"fixed","unsample",true]},{"id":1406,"type":"SaveImage","pos":[19650,1900],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":53,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3923}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1404,"type":"SaveImage","pos":[19650,1080],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":45,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3920}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]}],"links":[[1344,404,0,14,0,"*"],[1395,13,0,431,0,"MODEL"],[1552,402,0,490,0,"*"],[3427,1261,0,1262,4,"LATENT"],[3429,14,0,1263,1,"VAE"],[3430,1263,0,1264,0,"IMAGE"],[3435,1262,0,1266,4,"LATENT"],[3436,1266,0,1265,4,"LATENT"],[3877,431,0,1383,0,"*"],[3879,1383,0,1261,0,"MODEL"],[3880,1382,0,1261,1,"CONDITIONING"],[3881,1381,0,1261,2,"CONDITIONING"],[3882,1380,0,1261,3,"LATENT"],[3886,431,0,1388,0,"*"],[3887,1388,0,1384,0,"MODEL"],[3888,1386,0,1384,1,"CONDITIONING"],[3889,1385,0,1384,2,"CONDITIONING"],[3890,1387,0,1384,3,"LATENT"],[3891,1388,0,1389,0,"MODEL"],[3892,1386,0,1389,1,"CONDITIONING"],[3893,1385,0,1389,2,"CONDITIONING"],[3895,1384,0,1389,3,"LATENT"],[3896,1388,0,1390,0,"MODEL"],[3897,1386,0,1390,1,"CONDITIONING"],[3898,1385,0,1390,2,"CONDITIONING"],[3900,1389,0,1390,3,"LATENT"],[3901,1388,0,1391,0,"MODEL"],[3902,1386,0,1391,1,"CONDITIONING"],[3903,1385,0,1391,2,"CONDITIONING"],[3905,1390,0,1391,3,"LATENT"],[3907,14,0,1392,1,"VAE"],[3908,1392,0,1393,0,"IMAGE"],[3912,431,0,1397,0,"*"],[3916,431,0,1401,0,"*"],[3919,14,0,1403,1,"VAE"],[3920,1403,0,1404,0,"IMAGE"],[3922,14,0,1405,1,"VAE"],[3923,1405,0,1406,0,"IMAGE"],[3924,1401,0,1394,0,"MODEL"],[3925,1399,0,1394,1,"CONDITIONING"],[3926,1402,0,1394,2,"CONDITIONING"],[3928,1407,0,1394,3,"SAMPLER"],[3929,1408,0,1394,4,"SIGMAS"],[3931,1401,0,1409,0,"MODEL"],[3932,1401,0,1410,0,"MODEL"],[3933,1399,0,1410,1,"CONDITIONING"],[3934,1402,0,1410,2,"CONDITIONING"],[3935,1407,0,1410,3,"SAMPLER"],[3938,1394,0,1410,5,"LATENT"],[3940,1409,0,1411,0,"SIGMAS"],[3941,1411,1,1408,0,"SIGMAS"],[3942,1411,1,1410,4,"SIGMAS"],[3947,1412,0,1413,4,"LATENT"],[3948,1397,0,1412,0,"MODEL"],[3949,1395,0,1412,1,"CONDITIONING"],[3950,1398,0,1412,2,"CONDITIONING"],[3959,490,0,1414,0,"CLIP"],[3960,490,0,1415,0,"CLIP"],[3961,1415,0,1381,0,"*"],[3962,1414,0,1382,0,"*"],[3963,1414,0,1386,0,"*"],[3964,1415,0,1385,0,"*"],[3965,1414,0,1395,0,"*"],[3966,1415,0,1398,0,"*"],[3967,1414,0,1399,0,"*"],[3968,1415,0,1402,0,"*"],[3969,14,0,1416,4,"VAE"],[3973,1417,0,1416,0,"IMAGE"],[3975,1416,0,1412,3,"LATENT"],[3976,1418,0,1419,0,"IMAGE"],[3977,1419,0,1420,0,"IMAGE"],[3979,1420,0,1394,5,"LATENT"],[3980,14,0,1420,1,"VAE"],[3985,1422,0,1380,0,"*"],[3986,1422,0,1387,0,"*"],[3987,1422,0,431,1,"LATENT"],[3988,403,0,13,0,"*"],[3989,1265,0,1263,0,"LATENT"],[3990,1391,0,1392,0,"LATENT"],[3991,1413,0,1403,0,"LATENT"],[3992,1410,0,1405,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":0.9849732675807865,"offset":[-14560.618477888858,-446.28944651783576]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux faceswap sync pulid.json
================================================
{"last_node_id":1741,"last_link_id":6622,"nodes":[{"id":490,"type":"Reroute","pos":[-1346.8087158203125,-823.3269653320312],"size":[75,26],"flags":{},"order":39,"mode":0,"inputs":[{"name":"","type":"*","link":6398}],"outputs":[{"name":"","type":"CLIP","links":[4157,6103],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1162,"type":"Reroute","pos":[1930.0975341796875,-817.45556640625],"size":[75,26],"flags":{},"order":78,"mode":0,"inputs":[{"name":"","type":"*","link":4185}],"outputs":[{"name":"","type":"IMAGE","links":[4186],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":744,"type":"SaveImage","pos":[1276.456787109375,-719.9273681640625],"size":[424.53594970703125,455.0760192871094],"flags":{},"order":72,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2241}],"outputs":[],"title":"Save Patch","properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"],"color":"#332922","bgcolor":"#593930"},{"id":1022,"type":"ImageBlend","pos":[2313.7607421875,-792.44091796875],"size":[210,102],"flags":{"collapsed":true},"order":73,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3568},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3570}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3569],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":729,"type":"SetImageSize","pos":[-812.6932373046875,-86.24114227294922],"size":[210,102],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[2104,2108,4998],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[2105,2109,4999],"slot_index":1}],"title":"Inpaint Tile Size","properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1024,1024]},{"id":1161,"type":"Image Save","pos":[2186.75634765625,-722.2388916015625],"size":[351.4677734375,796.8805541992188],"flags":{},"order":79,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":4186}],"outputs":[{"name":"images","localized_name":"images","type":"IMAGE","links":null},{"name":"files","localized_name":"files","type":"STRING","links":null}],"properties":{"Node name for S&R":"Image Save"},"widgets_values":["[time(%Y-%m-%d)]","ComfyUI","_",4,"false","jpeg",300,100,"true","false","false","false","true","true","true"],"color":"#232","bgcolor":"#353"},{"id":1024,"type":"PreviewImage","pos":[1286.05859375,-198.6599884033203],"size":[413.7582092285156,445.8081359863281],"flags":{},"order":76,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3569}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[],"color":"#332922","bgcolor":"#593930"},{"id":758,"type":"ImageResize+","pos":[1468.4384765625,-790.391845703125],"size":[210,218],"flags":{"collapsed":true},"order":71,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":2201},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":2204},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":2205}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2198],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":1369,"type":"ImageResize+","pos":[2183.37109375,151.09762573242188],"size":[210,218],"flags":{"collapsed":true},"order":44,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":4996},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":4998},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":4999}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[5000],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":1407,"type":"Reroute","pos":[-914.50390625,-361.0196533203125],"size":[75,26],"flags":{},"order":37,"mode":0,"inputs":[{"name":"","type":"*","link":6620}],"outputs":[{"name":"","type":"MASK","links":[5021],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":725,"type":"Reroute","pos":[-914.8554077148438,-440.6482238769531],"size":[75,26],"flags":{},"order":36,"mode":0,"inputs":[{"name":"","type":"*","link":6619}],"outputs":[{"name":"","type":"IMAGE","links":[2210,2211,5054],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[-1346.8087158203125,-783.3269653320312],"size":[75,26],"flags":{},"order":35,"mode":0,"inputs":[{"name":"","type":"*","link":5447}],"outputs":[{"name":"","type":"VAE","links":[2153,3508],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1667,"type":"GrowMask","pos":[-302.060302734375,-164.22067260742188],"size":[210,82],"flags":{},"order":53,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":6360}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6361],"slot_index":0}],"properties":{"Node name for S&R":"GrowMask"},"widgets_values":[-10,false]},{"id":1039,"type":"ImageBlend","pos":[-769.9498901367188,220.86917114257812],"size":[210,102],"flags":{"collapsed":true},"order":50,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3606},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3605}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3607],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":731,"type":"SimpleMath+","pos":[-776.4415893554688,126.82145690917969],"size":[315,98],"flags":{"collapsed":true},"order":33,"mode":0,"inputs":[{"name":"a","localized_name":"a","type":"*","shape":7,"link":2108},{"name":"b","localized_name":"b","type":"*","shape":7,"link":2109},{"name":"c","localized_name":"c","type":"*","shape":7,"link":null}],"outputs":[{"name":"INT","localized_name":"INT","type":"INT","links":null},{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[2100],"slot_index":1}],"properties":{"Node name for S&R":"SimpleMath+"},"widgets_values":["a/b"]},{"id":728,"type":"MaskToImage","pos":[-791.0198364257812,176.82147216796875],"size":[176.39999389648438,26],"flags":{"collapsed":true},"order":45,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2106}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2103,3605],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":765,"type":"MaskToImage","pos":[2080.868896484375,-792.6943359375],"size":[182.28543090820312,26],"flags":{"collapsed":true},"order":46,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":5529}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3570],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":761,"type":"Image Comparer (rgthree)","pos":[1747.432373046875,-712.1251220703125],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":77,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2210},{"name":"image_b","type":"IMAGE","dir":3,"link":2200}],"outputs":[],"title":"Compare Output","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lonqd_00061_.png&type=temp&subfolder=&rand=0.1196562401371497"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lonqd_00062_.png&type=temp&subfolder=&rand=0.958614793318614"}]],"color":"#232","bgcolor":"#353"},{"id":1569,"type":"ClownGuides_Sync_Advanced","pos":[261.355224609375,-1000.5784912109375],"size":[315,1938],"flags":{"collapsed":true},"order":56,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":6201},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":6202},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6223},{"name":"mask_sync","localized_name":"mask_sync","type":"MASK","shape":7,"link":6224},{"name":"mask_drift_x","localized_name":"mask_drift_x","type":"MASK","shape":7,"link":6225},{"name":"mask_drift_y","localized_name":"mask_drift_y","type":"MASK","shape":7,"link":6226},{"name":"mask_lure_x","localized_name":"mask_lure_x","type":"MASK","shape":7,"link":6227},{"name":"mask_lure_y","localized_name":"mask_lure_y","type":"MASK","shape":7,"link":6228},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_masked","localized_name":"syncs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_unmasked","localized_name":"syncs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_masked","localized_name":"drift_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_unmasked","localized_name":"drift_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_masked","localized_name":"drift_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_unmasked","localized_name":"drift_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_masked","localized_name":"lure_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_unmasked","localized_name":"lure_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_masked","localized_name":"lure_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_unmasked","localized_name":"lure_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_x_data","type":"FLOAT","pos":[10,800],"widget":{"name":"drift_x_data"},"link":6239},{"name":"drift_y_guide","type":"FLOAT","pos":[10,1088],"widget":{"name":"drift_y_guide"},"link":6240},{"name":"sync_masked","type":"FLOAT","pos":[10,608],"widget":{"name":"sync_masked"},"link":6241}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6411],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Sync_Advanced"},"widgets_values":[1,1,"constant","constant",0,0,-1,-1,0,1,"constant","constant",0,0,-1,-1,0.2,0,1,0,"constant","constant",0,0,-1,-1,0,0,0.2,1,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,"y -> x",false,false,false,false,false,false]},{"id":1571,"type":"Reroute","pos":[141.35520935058594,-1030.5784912109375],"size":[75,26],"flags":{},"order":52,"mode":0,"inputs":[{"name":"","type":"*","link":6222}],"outputs":[{"name":"","type":"MASK","links":[6223,6224,6225,6226,6227,6228,6342,6584],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1368,"type":"Image Comparer (rgthree)","pos":[1744.9150390625,-199.16920471191406],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":74,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":4997},{"name":"image_b","type":"IMAGE","dir":3,"link":5000}],"outputs":[],"title":"Compare Patch","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_fyekd_00061_.png&type=temp&subfolder=&rand=0.6117808776963016"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_fyekd_00062_.png&type=temp&subfolder=&rand=0.2735573488508416"}]],"color":"#232","bgcolor":"#353"},{"id":1673,"type":"Note","pos":[1824.9287109375,-1010.687744140625],"size":[322.34954833984375,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Preview of first stage output: sometimes it can be worth manually (or automatically, using DINO, etc.) adjusting your mask for the second stage, based on this output."],"color":"#432","bgcolor":"#653"},{"id":1539,"type":"GrowMask","pos":[573.4215698242188,-1145.86767578125],"size":[214.5684051513672,82],"flags":{},"order":57,"mode":4,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":6342}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6343,6344,6345,6346,6347,6348],"slot_index":0}],"properties":{"Node name for S&R":"GrowMask"},"widgets_values":[10,false]},{"id":1383,"type":"Note","pos":[216.7359161376953,340.25775146484375],"size":[291.67218017578125,232.2296142578125],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["eta > 0.0 means you are using SDE/ancestral sampling. With this guide mode you will generally want to use bongmath = true.\n\nSamplers such as res_2s and res_3s will be very accurate. Try res_5s and res_8s if you really want to go crazy with it. They run 2x (2s), 3x (3s), etc slower than Euler.\n\nres_2m and 3m will be fast and also good, and run at the same speed as Euler.\n\neta_substep will increase the power of bongmath. If it is set to 0.0, you can turn bongmath off without any effect."],"color":"#432","bgcolor":"#653"},{"id":1380,"type":"Note","pos":[544.9375610351562,342.0576477050781],"size":[290.1026611328125,231.5842742919922],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Setting denoise to a negative value is equivalent to just scaling it. For example:\n\nDenoise = -0.90 is the same as multiplying every sigma value in the entire schedule by 0.9.\n\nI find this is a lot easier to control than the regular denoise scale. The difference between -0.95 and -0.9 is much more predictable than with 0.95 and 0.9. Most of us have seen how different denoise 0.8 might be with Karras vs. exponential. \n\nTry a denoise between -0.95 and -0.85. "],"color":"#432","bgcolor":"#653"},{"id":759,"type":"ImageCompositeMasked","pos":[1697.19140625,-790.8740844726562],"size":[210,186],"flags":{"collapsed":true},"order":75,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"IMAGE","link":2211},{"name":"source","localized_name":"source","type":"IMAGE","link":2198},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6447},{"name":"x","type":"INT","pos":[10,76],"widget":{"name":"x"},"link":2206},{"name":"y","type":"INT","pos":[10,100],"widget":{"name":"y"},"link":2207}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2200,4185],"slot_index":0}],"properties":{"Node name for S&R":"ImageCompositeMasked"},"widgets_values":[712,800,false]},{"id":1687,"type":"Note","pos":[-101.33948516845703,339.7750244140625],"size":[286.97723388671875,180.28128051757812],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The cycles node causes the connected sampler to loop between sampling and unsampling steps. (Unsampling is running the sampler backwards, where it predicts the noise that would lead to a given output).\n\nWhen unsample_eta is set to -1, it simply uses the same settings for eta as in the connected node. "],"color":"#432","bgcolor":"#653"},{"id":745,"type":"VAEDecode","pos":[1297.53369140625,-791.137939453125],"size":[140,46],"flags":{"collapsed":true},"order":70,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":6478},{"name":"vae","localized_name":"vae","type":"VAE","link":2153}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2201,2241,3568,4997],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1678,"type":"Note","pos":[-422.92510986328125,-333.6911926269531],"size":[324.0018005371094,113.63665771484375],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ReduxAdvanced is used to help get things on track. Bypass if you're having problems with it disrupting character likeness.\n\nThe SDE Mask ensures SDE noise is used only in the masked area, limiting change in unmasked areas that could lead to seams. "],"color":"#432","bgcolor":"#653"},{"id":1572,"type":"ClownGuides_Sync_Advanced","pos":[581.355224609375,-1000.5784912109375],"size":[315,1878],"flags":{"collapsed":true},"order":62,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":6229},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":6230},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6343},{"name":"mask_sync","localized_name":"mask_sync","type":"MASK","shape":7,"link":6344},{"name":"mask_drift_x","localized_name":"mask_drift_x","type":"MASK","shape":7,"link":6345},{"name":"mask_drift_y","localized_name":"mask_drift_y","type":"MASK","shape":7,"link":6346},{"name":"mask_lure_x","localized_name":"mask_lure_x","type":"MASK","shape":7,"link":6347},{"name":"mask_lure_y","localized_name":"mask_lure_y","type":"MASK","shape":7,"link":6348},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_masked","localized_name":"syncs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_unmasked","localized_name":"syncs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_masked","localized_name":"drift_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_unmasked","localized_name":"drift_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_masked","localized_name":"drift_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_unmasked","localized_name":"drift_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_masked","localized_name":"lure_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_unmasked","localized_name":"lure_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_masked","localized_name":"lure_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_unmasked","localized_name":"lure_ys_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6414],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Sync_Advanced"},"widgets_values":[0,1,"constant","constant",0,0,-1,-1,0,1,"constant","constant",0,0,-1,-1,0,0,1,0,"constant","constant",0,0,-1,-1,0,0,0,1,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,"y -> x",false,false,false,false,false,false]},{"id":1693,"type":"Note","pos":[-1535.57666015625,-641.8590087890625],"size":[276.7918701171875,88],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Padding can be very important. Some models/loras/IPadapter embeds etc. are going to respond very differently if the shot is close up vs. farther away."],"color":"#432","bgcolor":"#653"},{"id":1694,"type":"Note","pos":[-441.5133056640625,-999.14990234375],"size":[291.2616882324219,189.98562622070312],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase character likeness by: \n\nDecreasing \"Similarity\"\nIncreasing \"Drift Toward Target\"\nIncreasing cycles\nIncreasing eta (max 1.0)\nIncreasing denoise\n\nIncrease adherence to the input image by:\n\nDoing the opposite of any of the above\nIncreasing \"Drift Toward Guide\"\nEnabling the ReduxAdvanced node\n"],"color":"#432","bgcolor":"#653"},{"id":1277,"type":"SharkOptions_GuideCond_Beta","pos":[575.9444580078125,221.88970947265625],"size":[315,98],"flags":{"collapsed":true},"order":51,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":5653},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":4650},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[5493],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[1]},{"id":1040,"type":"PreviewImage","pos":[-1267.6248779296875,-30.252229690551758],"size":[304.98114013671875,265.58380126953125],"flags":{},"order":55,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3607}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1698,"type":"Note","pos":[-1623.859375,-355.951416015625],"size":[276.7918701171875,88],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Draw a mask over the face in the Load Image node. Ideally, try stopping precisely at the hairline, and just above or just below the chin."],"color":"#432","bgcolor":"#653"},{"id":1477,"type":"LoraLoader","pos":[-1684.5245361328125,-845.994140625],"size":[315,126],"flags":{},"order":34,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":5439},{"name":"clip","localized_name":"clip","type":"CLIP","link":5440}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[6397],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[6398],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["FLUX/Kirsten_Dunst_Flux_V1.safetensors",1,1]},{"id":1279,"type":"TorchCompileModels","pos":[-2086.55322265625,-1090.6181640625],"size":[285.9945068359375,179.0001983642578],"flags":{},"order":38,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6397}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[6396],"slot_index":0}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":1478,"type":"ModelSamplingAdvancedResolution","pos":[-1773.91259765625,-1030.6773681640625],"size":[260.3999938964844,126],"flags":{},"order":54,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6396},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":5442}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[6383],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":1454,"type":"ClownOptions_Cycles_Beta","pos":[-74.8967514038086,24.043270111083984],"size":[261.7955627441406,202],"flags":{},"order":9,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[6402],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[20,1,-1,"none",-1,1,true]},{"id":726,"type":"Mask Bounding Box Aspect Ratio","pos":[-828.6614990234375,-412.50946044921875],"size":[252,250],"flags":{"collapsed":false},"order":40,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","shape":7,"link":5054},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":5021},{"name":"aspect_ratio","type":"FLOAT","pos":[10,204],"widget":{"name":"aspect_ratio"},"link":2100}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2101,2102,3606,3721,4996,6543],"slot_index":0},{"name":"mask","localized_name":"mask","type":"MASK","links":[2106,5529],"slot_index":1},{"name":"mask_blurred","localized_name":"mask_blurred","type":"MASK","links":[6447],"slot_index":2},{"name":"x","localized_name":"x","type":"INT","links":[2206],"slot_index":3},{"name":"y","localized_name":"y","type":"INT","links":[2207],"slot_index":4},{"name":"width","localized_name":"width","type":"INT","links":[2204],"slot_index":5},{"name":"height","localized_name":"height","type":"INT","links":[2205],"slot_index":6}],"properties":{"Node name for S&R":"Mask Bounding Box Aspect Ratio"},"widgets_values":[100,40,1.75,false]},{"id":1702,"type":"PulidFluxInsightFaceLoader","pos":[-1150,-1080],"size":[365.4000244140625,58],"flags":{"collapsed":true},"order":10,"mode":0,"inputs":[],"outputs":[{"name":"FACEANALYSIS","localized_name":"FACEANALYSIS","type":"FACEANALYSIS","shape":3,"links":[6526],"slot_index":0}],"properties":{"Node name for S&R":"PulidFluxInsightFaceLoader"},"widgets_values":["CPU"]},{"id":1524,"type":"ReFluxPatcher","pos":[-1486.33251953125,-986.468505859375],"size":[210,82],"flags":{},"order":60,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6383}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[6547],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":13,"type":"Reroute","pos":[-1346.8087158203125,-863.3270874023438],"size":[75,26],"flags":{},"order":64,"mode":0,"inputs":[{"name":"","type":"*","link":6547}],"outputs":[{"name":"","type":"MODEL","links":[6548],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1703,"type":"PulidFluxModelLoader","pos":[-1140,-970],"size":[315,58],"flags":{"collapsed":true},"order":11,"mode":0,"inputs":[],"outputs":[{"name":"PULIDFLUX","localized_name":"PULIDFLUX","type":"PULIDFLUX","shape":3,"links":[6524],"slot_index":0}],"properties":{"Node name for S&R":"PulidFluxModelLoader"},"widgets_values":["pulid_flux_v0.9.0.safetensors"]},{"id":1688,"type":"Note","pos":[-1527.4205322265625,-1311.8199462890625],"size":[274.47601318359375,104.34856414794922],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ReFluxPatcher is required to use the \"Style\" nodes. Different \"Re...Patcher\" nodes are available for many other models, from SD1.5/SDXL to SD3.5, HiDream, AuraFlow, Chroma, WAN, and LTXV."],"color":"#432","bgcolor":"#653"},{"id":1071,"type":"CLIPVisionEncode","pos":[586.1533203125,119.24115753173828],"size":[253.60000610351562,78],"flags":{"collapsed":true},"order":43,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":6552},{"name":"image","localized_name":"image","type":"IMAGE","link":3721}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[3720],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":1073,"type":"CLIPTextEncode","pos":[575.77001953125,186.9269256591797],"size":[263.280517578125,88.73566436767578],"flags":{"collapsed":true},"order":41,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":4157}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[4650,4980],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":1476,"type":"FluxLoader","pos":[-2094.3544921875,-847.2406005859375],"size":[385.17449951171875,282],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[5439],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[5440],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[5447],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[6550,6552],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[6551,6553],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","siglip2-so400m-patch16-512.safetensors","flex1_redux_siglip2_512.safetensors"]},{"id":1716,"type":"Note","pos":[-2101.239013671875,-463.0836486816406],"size":[395.2708740234375,177.91754150390625],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["To use the 512x512 Redux models, download and place in the following paths:\n\ncomfy/models/style_models:\nhttps://huggingface.co/ostris/Flex.1-alpha-Redux/blob/main/flex1_redux_siglip2_512.safetensors\n\ncomfy/models/clip_vision:\nhttps://huggingface.co/google/siglip2-so400m-patch16-512/blob/main/model.safetensors\n\nRename the latter as siglip2-so400m-patch16-512.safetensors"],"color":"#432","bgcolor":"#653"},{"id":1701,"type":"PulidFluxEvaClipLoader","pos":[-1145.7685546875,-1024.2314453125],"size":[327.5999755859375,26],"flags":{"collapsed":true},"order":15,"mode":0,"inputs":[],"outputs":[{"name":"EVA_CLIP","localized_name":"EVA_CLIP","type":"EVA_CLIP","shape":3,"links":[6525],"slot_index":0}],"properties":{"Node name for S&R":"PulidFluxEvaClipLoader"},"widgets_values":[]},{"id":1548,"type":"ReduxAdvanced","pos":[-69.81456756591797,-498.3502502441406],"size":[248.6250457763672,234],"flags":{},"order":47,"mode":4,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":6422},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":6551},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":6550},{"name":"image","localized_name":"image","type":"IMAGE","link":6543},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[6421],"slot_index":0},{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"ReduxAdvanced"},"widgets_values":[3,"area","center crop (square)",1,0.1]},{"id":1072,"type":"StyleModelApply","pos":[596.4773559570312,153.7720947265625],"size":[262,122],"flags":{"collapsed":true},"order":48,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":4980},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":6553},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":3720}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[5653],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":1714,"type":"Note","pos":[-816.8351440429688,-725.0016479492188],"size":[252.3572998046875,162.81890869140625],"flags":{},"order":16,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The repo for PuLID Flux is currently broken, but the ReFluxPatcher node will repair the issues and make it usable. You must have ReFluxPatcher enabled to use this. Aside from that, install as instructed:\n\nhttps://github.com/balazik/ComfyUI-PuLID-Flux\n\n"],"color":"#432","bgcolor":"#653"},{"id":1575,"type":"PrimitiveFloat","pos":[11.355203628540039,-940.5784912109375],"size":[210,58],"flags":{},"order":17,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6241],"slot_index":0}],"title":"Similarity","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[1]},{"id":1573,"type":"PrimitiveFloat","pos":[10.393571853637695,-834.4251708984375],"size":[210,58],"flags":{},"order":18,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6239],"slot_index":0}],"title":"Drift Toward Target","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[0.2]},{"id":1574,"type":"PrimitiveFloat","pos":[11.355203628540039,-720.5784912109375],"size":[210,58],"flags":{},"order":19,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6240],"slot_index":0}],"title":"Drift Toward Guide","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[0.2]},{"id":727,"type":"VAEEncodeAdvanced","pos":[-789.0958862304688,67.53204345703125],"size":[262.4812927246094,298],"flags":{"collapsed":true},"order":49,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2101},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":2102},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":2103},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":3508},{"name":"width","type":"INT","pos":[10,160],"widget":{"name":"width"},"link":2104},{"name":"height","type":"INT","pos":[10,184],"widget":{"name":"height"},"link":2105}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[5373,5715,6201,6202,6229,6230,6412],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[6222,6360,6569,6570],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[5442],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[]}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1674,"type":"Note","pos":[170.8737030029297,-1390.4803466796875],"size":[322.6287841796875,128.15802001953125],"flags":{},"order":20,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Activate the style nodes if you are having issues with color, detail, light, blurriness or pixelation drifting too far from your source input.\n\nIf end_step is too high, you may get faint halos and an oversharpened look."],"color":"#432","bgcolor":"#653"},{"id":1689,"type":"Note","pos":[525.9268798828125,-1349.89794921875],"size":[263.00439453125,88],"flags":{},"order":21,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Expanding the mask for the second pass can sometimes help prevent seams."],"color":"#432","bgcolor":"#653"},{"id":1525,"type":"ClownGuide_Style_Beta","pos":[251.35520935058594,-950.5784912109375],"size":[252.0535430908203,286],"flags":{},"order":61,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":5715},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6569},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6411}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6051],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","scattersort",1,1,"constant",0,-1,false]},{"id":1672,"type":"ClownGuide_Style_Beta","pos":[561.355224609375,-950.5784912109375],"size":[252.0535430908203,286],"flags":{},"order":65,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":6412},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6570},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6414}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6415,6476],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","scattersort",1,1,"constant",0,-1,false]},{"id":1516,"type":"ClownOptions_SDE_Mask_Beta","pos":[-68.4439468383789,-163.1180877685547],"size":[252.8383331298828,126],"flags":{},"order":59,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6361},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[5776],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Mask_Beta"},"widgets_values":[1,0,false]},{"id":1731,"type":"ClownOptions_SDE_Mask_Beta","pos":[898.4906005859375,-756.2548217773438],"size":[252.8383331298828,126],"flags":{},"order":63,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6586},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[6585,6587],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Mask_Beta"},"widgets_values":[1,0,false]},{"id":1730,"type":"MaskEdge","pos":[903.2994384765625,-949.55322265625],"size":[248.64459228515625,130],"flags":{},"order":58,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":6584}],"outputs":[{"name":"edge_mask","localized_name":"edge_mask","type":"MASK","links":[6586],"slot_index":0}],"properties":{"Node name for S&R":"MaskEdge"},"widgets_values":[10,"percent",1,1]},{"id":1677,"type":"Note","pos":[-439.5185241699219,-738.3756713867188],"size":[290.3874816894531,88],"flags":{},"order":22,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Try setting both drift values to 0.0 or 0.2 as a starting point.\n"],"color":"#432","bgcolor":"#653"},{"id":1552,"type":"ClownOptions_SDE_Beta","pos":[-271.7193603515625,259.6875915527344],"size":[315,266],"flags":{"collapsed":true},"order":23,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",1,1,-1,"fixed"]},{"id":1726,"type":"ClownOptions_ImplicitSteps_Beta","pos":[-493.06549072265625,258.3205871582031],"size":[300.7710876464844,130],"flags":{"collapsed":true},"order":24,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownOptions_ImplicitSteps_Beta"},"widgets_values":["bongmath","bongmath",10,0]},{"id":1722,"type":"ClownOptions_DetailBoost_Beta","pos":[-302.6524963378906,-24.413410186767578],"size":[210.1761016845703,218],"flags":{"collapsed":false},"order":25,"mode":0,"inputs":[{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[6589,6590,6591],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_DetailBoost_Beta"},"widgets_values":[1,"model","hard",0.5,3,10]},{"id":1732,"type":"Note","pos":[890.6793823242188,-1148.8226318359375],"size":[290.3854675292969,122.62060546875],"flags":{},"order":26,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The mask below allows the SDE/ancestral noise used in the last two samplers to only hit the seams around the inpainted area.\n\nTry bypassing the SDE mask and see if you like the results - it lets the entire face be affected by noise."],"color":"#432","bgcolor":"#653"},{"id":1727,"type":"Note","pos":[-453.12371826171875,343.8135681152344],"size":[296.5935363769531,187.9747314453125],"flags":{},"order":27,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownOptions Detail gives a boost to detail a lot like the \"Detail Daemon\" node, though I think with somewhat less risk of mutations and loss of saturation. Change \"weight\", \"eta\", or \"end_step\" to control strength.\n\nImplicit steps can be used in place of \"Cycles\". Try setting steps_to_run to 3 or 4 if you use it.\n\nClownOptions SDE contains extra settings for noise, so you can change the type, amount, etc. with more precision."],"color":"#432","bgcolor":"#653"},{"id":1733,"type":"Note","pos":[-819.1915893554688,-1111.3170166015625],"size":[251.92019653320312,88],"flags":{},"order":28,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Try changing the weight or end_at if results look plastic."],"color":"#432","bgcolor":"#653"},{"id":1704,"type":"ApplyPulidFlux","pos":[-805.7684326171875,-986.1819458007812],"size":[219.79336547851562,206],"flags":{},"order":66,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6548},{"name":"pulid_flux","localized_name":"pulid_flux","type":"PULIDFLUX","link":6524},{"name":"eva_clip","localized_name":"eva_clip","type":"EVA_CLIP","link":6525},{"name":"face_analysis","localized_name":"face_analysis","type":"FACEANALYSIS","link":6526},{"name":"image","localized_name":"image","type":"IMAGE","link":null},{"name":"attn_mask","localized_name":"attn_mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","shape":3,"links":[6549],"slot_index":0}],"properties":{"Node name for S&R":"ApplyPulidFlux"},"widgets_values":[1,0,1]},{"id":1737,"type":"Note","pos":[-1184.4395751953125,-1304.4234619140625],"size":[251.92019653320312,88],"flags":{},"order":29,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The image you choose is very important. The face should have its proportions clearly distinguishable."],"color":"#432","bgcolor":"#653"},{"id":1717,"type":"LoadImage","pos":[-603.783203125,-1602.01904296875],"size":[315,314],"flags":{},"order":30,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (812).png","image"]},{"id":1446,"type":"ClownsharKSampler_Beta","pos":[214.812255859375,-508.00537109375],"size":[277.5089111328125,735.1378784179688],"flags":{},"order":67,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":6549},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":6421},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":5373},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6051},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":5493},{"name":"options 2","type":"OPTIONS","link":5776},{"name":"options 3","type":"OPTIONS","link":6402},{"name":"options 4","type":"OPTIONS","link":6589},{"name":"options 5","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6380],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[1,"exponential/res_2s","bong_tangent",30,1,0.65,1,100,"fixed","standard",true],"color":"#332922","bgcolor":"#593930"},{"id":1556,"type":"CLIPTextEncode","pos":[-392.6881408691406,-498.2940979003906],"size":[289.0962829589844,113.79679870605469],"flags":{"collapsed":false},"order":42,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":6103}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[6422],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1707,"type":"LoadImage","pos":[-1272.3699951171875,-406.4196472167969],"size":[315,314],"flags":{},"order":31,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[6619],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6620],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-18464655.700000048.png [input]","image"]},{"id":1740,"type":"Note","pos":[-892.4718627929688,-1299.925048828125],"size":[251.92019653320312,88],"flags":{},"order":32,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["PuLID will copy much of the lighting and especially position/angle of the face. Keep this in mind."],"color":"#432","bgcolor":"#653"},{"id":1690,"type":"ClownsharkChainsampler_Beta","pos":[865.4187622070312,-518.0064086914062],"size":[281.7781677246094,571.74853515625],"flags":{},"order":69,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":6566},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6476},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":6587},{"name":"options 2","type":"OPTIONS","link":6591},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6478],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"multistep/res_3m",-1,1,"resample",false]},{"id":1479,"type":"ClownsharkChainsampler_Beta","pos":[536.1533203125,-510.75872802734375],"size":[288.1370544433594,571.74853515625],"flags":{},"order":68,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":6380},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6415},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":6585},{"name":"options 2","type":"OPTIONS","link":6590},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6566],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"exponential/res_2s",2,1,"resample",true]}],"links":[[2100,731,1,726,2,"FLOAT"],[2101,726,0,727,0,"IMAGE"],[2102,726,0,727,1,"IMAGE"],[2103,728,0,727,2,"IMAGE"],[2104,729,0,727,5,"INT"],[2105,729,1,727,6,"INT"],[2106,726,1,728,0,"MASK"],[2108,729,0,731,0,"*"],[2109,729,1,731,1,"*"],[2153,14,0,745,1,"VAE"],[2198,758,0,759,1,"IMAGE"],[2200,759,0,761,1,"IMAGE"],[2201,745,0,758,0,"IMAGE"],[2204,726,5,758,1,"INT"],[2205,726,6,758,2,"INT"],[2206,726,3,759,3,"INT"],[2207,726,4,759,4,"INT"],[2210,725,0,761,0,"IMAGE"],[2211,725,0,759,0,"IMAGE"],[2241,745,0,744,0,"IMAGE"],[3508,14,0,727,4,"VAE"],[3568,745,0,1022,0,"IMAGE"],[3569,1022,0,1024,0,"IMAGE"],[3570,765,0,1022,1,"IMAGE"],[3605,728,0,1039,1,"IMAGE"],[3606,726,0,1039,0,"IMAGE"],[3607,1039,0,1040,0,"IMAGE"],[3720,1071,0,1072,2,"CLIP_VISION_OUTPUT"],[3721,726,0,1071,1,"IMAGE"],[4157,490,0,1073,0,"CLIP"],[4185,759,0,1162,0,"*"],[4186,1162,0,1161,0,"IMAGE"],[4650,1073,0,1277,1,"CONDITIONING"],[4980,1073,0,1072,0,"CONDITIONING"],[4996,726,0,1369,0,"IMAGE"],[4997,745,0,1368,0,"IMAGE"],[4998,729,0,1369,1,"INT"],[4999,729,1,1369,2,"INT"],[5000,1369,0,1368,1,"IMAGE"],[5021,1407,0,726,1,"MASK"],[5054,725,0,726,0,"IMAGE"],[5373,727,0,1446,3,"LATENT"],[5439,1476,0,1477,0,"MODEL"],[5440,1476,1,1477,1,"CLIP"],[5442,727,3,1478,1,"LATENT"],[5447,1476,2,14,0,"*"],[5493,1277,0,1446,6,"OPTIONS"],[5529,726,1,765,0,"MASK"],[5653,1072,0,1277,0,"CONDITIONING"],[5715,727,0,1525,0,"LATENT"],[5776,1516,0,1446,7,"OPTIONS"],[6051,1525,0,1446,5,"GUIDES"],[6103,490,0,1556,0,"CLIP"],[6201,727,0,1569,0,"LATENT"],[6202,727,0,1569,1,"LATENT"],[6222,727,2,1571,0,"*"],[6223,1571,0,1569,2,"MASK"],[6224,1571,0,1569,3,"MASK"],[6225,1571,0,1569,4,"MASK"],[6226,1571,0,1569,5,"MASK"],[6227,1571,0,1569,6,"MASK"],[6228,1571,0,1569,7,"MASK"],[6229,727,0,1572,0,"LATENT"],[6230,727,0,1572,1,"LATENT"],[6239,1573,0,1569,20,"FLOAT"],[6240,1574,0,1569,21,"FLOAT"],[6241,1575,0,1569,22,"FLOAT"],[6342,1571,0,1539,0,"MASK"],[6343,1539,0,1572,2,"MASK"],[6344,1539,0,1572,3,"MASK"],[6345,1539,0,1572,4,"MASK"],[6346,1539,0,1572,5,"MASK"],[6347,1539,0,1572,6,"MASK"],[6348,1539,0,1572,7,"MASK"],[6360,727,2,1667,0,"MASK"],[6361,1667,0,1516,0,"MASK"],[6380,1446,0,1479,4,"LATENT"],[6383,1478,0,1524,0,"MODEL"],[6396,1279,0,1478,0,"MODEL"],[6397,1477,0,1279,0,"MODEL"],[6398,1477,1,490,0,"*"],[6402,1454,0,1446,8,"OPTIONS"],[6411,1569,0,1525,3,"GUIDES"],[6412,727,0,1672,0,"LATENT"],[6414,1572,0,1672,3,"GUIDES"],[6415,1672,0,1479,5,"GUIDES"],[6421,1548,0,1446,1,"CONDITIONING"],[6422,1556,0,1548,0,"CONDITIONING"],[6447,726,2,759,2,"MASK"],[6476,1672,0,1690,5,"GUIDES"],[6478,1690,0,745,0,"LATENT"],[6524,1703,0,1704,1,"PULIDFLUX"],[6525,1701,0,1704,2,"EVA_CLIP"],[6526,1702,0,1704,3,"FACEANALYSIS"],[6543,726,0,1548,3,"IMAGE"],[6547,1524,0,13,0,"*"],[6548,13,0,1704,0,"MODEL"],[6549,1704,0,1446,0,"MODEL"],[6550,1476,3,1548,2,"CLIP_VISION"],[6551,1476,4,1548,1,"STYLE_MODEL"],[6552,1476,3,1071,0,"CLIP_VISION"],[6553,1476,4,1072,1,"STYLE_MODEL"],[6566,1479,0,1690,4,"LATENT"],[6569,727,2,1525,1,"MASK"],[6570,727,2,1672,1,"MASK"],[6584,1571,0,1730,0,"MASK"],[6585,1731,0,1479,6,"OPTIONS"],[6586,1730,0,1731,0,"MASK"],[6587,1731,0,1690,6,"OPTIONS"],[6589,1722,0,1446,9,"OPTIONS"],[6590,1722,0,1479,7,"OPTIONS"],[6591,1722,0,1690,7,"OPTIONS"],[6619,1707,0,725,0,"*"],[6620,1707,1,1407,0,"*"]],"groups":[{"id":1,"title":"Prepare Input","bounding":[-1310.92529296875,-489.52618408203125,755.7755737304688,762.867431640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Patch and Stitch","bounding":[1250.695068359375,-877.5091552734375,1320.4892578125,1148.6859130859375],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Loaders","bounding":[-2115.099853515625,-1180.8953857421875,881.3677368164062,646.2952880859375],"color":"#3f789e","font_size":24,"flags":{}},{"id":5,"title":"Sampling","bounding":[-510.548828125,-602.9613037109375,1686.064208984375,874.1248168945312],"color":"#3f789e","font_size":24,"flags":{}},{"id":6,"title":"Guides","bounding":[-37.0714225769043,-1229.123046875,888.9586791992188,587.7683715820312],"color":"#3f789e","font_size":24,"flags":{}},{"id":7,"title":"PuLID","bounding":[-1191.9031982421875,-1177.2020263671875,649.8841552734375,641.718994140625],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.3310000000000006,"offset":[4741.826990245036,1361.8744550803772]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux faceswap sync.json
================================================
{"last_node_id":1698,"last_link_id":6519,"nodes":[{"id":490,"type":"Reroute","pos":[-669.7835083007812,-822.2691040039062],"size":[75,26],"flags":{},"order":28,"mode":0,"inputs":[{"name":"","type":"*","link":6398}],"outputs":[{"name":"","type":"CLIP","links":[4157,6103],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1162,"type":"Reroute","pos":[1930.0975341796875,-817.45556640625],"size":[75,26],"flags":{},"order":66,"mode":0,"inputs":[{"name":"","type":"*","link":4185}],"outputs":[{"name":"","type":"IMAGE","links":[4186],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":744,"type":"SaveImage","pos":[1276.456787109375,-719.9273681640625],"size":[424.53594970703125,455.0760192871094],"flags":{},"order":60,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2241}],"outputs":[],"title":"Save Patch","properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"],"color":"#332922","bgcolor":"#593930"},{"id":1022,"type":"ImageBlend","pos":[2313.7607421875,-792.44091796875],"size":[210,102],"flags":{"collapsed":true},"order":61,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3568},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3570}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3569],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":729,"type":"SetImageSize","pos":[-812.6932373046875,-86.24114227294922],"size":[210,102],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[2104,2108,4998],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[2105,2109,4999],"slot_index":1}],"title":"Inpaint Tile Size","properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1024,1024]},{"id":1161,"type":"Image Save","pos":[2186.75634765625,-722.2388916015625],"size":[351.4677734375,796.8805541992188],"flags":{},"order":67,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":4186}],"outputs":[{"name":"images","localized_name":"images","type":"IMAGE","links":null},{"name":"files","localized_name":"files","type":"STRING","links":null}],"properties":{"Node name for S&R":"Image Save"},"widgets_values":["[time(%Y-%m-%d)]","ComfyUI","_",4,"false","jpeg",300,100,"true","false","false","false","true","true","true"],"color":"#232","bgcolor":"#353"},{"id":1024,"type":"PreviewImage","pos":[1286.05859375,-198.6599884033203],"size":[413.7582092285156,445.8081359863281],"flags":{},"order":64,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3569}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[],"color":"#332922","bgcolor":"#593930"},{"id":758,"type":"ImageResize+","pos":[1468.4384765625,-790.391845703125],"size":[210,218],"flags":{"collapsed":true},"order":59,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":2201},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":2204},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":2205}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2198],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":1369,"type":"ImageResize+","pos":[2183.37109375,151.09762573242188],"size":[210,218],"flags":{"collapsed":true},"order":33,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":4996},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":4998},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":4999}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[5000],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":1407,"type":"Reroute","pos":[-914.50390625,-361.0196533203125],"size":[75,26],"flags":{},"order":26,"mode":0,"inputs":[{"name":"","type":"*","link":6519}],"outputs":[{"name":"","type":"MASK","links":[5021],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":725,"type":"Reroute","pos":[-914.8554077148438,-440.6482238769531],"size":[75,26],"flags":{},"order":25,"mode":0,"inputs":[{"name":"","type":"*","link":6518}],"outputs":[{"name":"","type":"IMAGE","links":[2210,2211,5054],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1071,"type":"CLIPVisionEncode","pos":[586.1533203125,119.24115753173828],"size":[253.60000610351562,78],"flags":{"collapsed":true},"order":32,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":5443},{"name":"image","localized_name":"image","type":"IMAGE","link":3721}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[3720],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":1575,"type":"PrimitiveFloat","pos":[11.355203628540039,-940.5784912109375],"size":[210,58],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6241],"slot_index":0}],"title":"Similarity","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[1]},{"id":1654,"type":"LoadImage","pos":[773.8897705078125,1813.0185546875],"size":[315,314],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["7c2a2a772675a224-photo.JPG","image"]},{"id":1478,"type":"ModelSamplingAdvancedResolution","pos":[-1096.887451171875,-1029.6195068359375],"size":[260.3999938964844,126],"flags":{},"order":43,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6396},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":5442}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[6383],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":1279,"type":"TorchCompileModels","pos":[-1409.527587890625,-1089.560302734375],"size":[285.9945068359375,179.0001983642578],"flags":{},"order":27,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6397}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[6396],"slot_index":0}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":14,"type":"Reroute","pos":[-669.7835083007812,-782.2691040039062],"size":[75,26],"flags":{},"order":24,"mode":0,"inputs":[{"name":"","type":"*","link":5447}],"outputs":[{"name":"","type":"VAE","links":[2153,3508,6353],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":13,"type":"Reroute","pos":[-669.7835083007812,-862.2692260742188],"size":[75,26],"flags":{},"order":51,"mode":0,"inputs":[{"name":"","type":"*","link":5845}],"outputs":[{"name":"","type":"MODEL","links":[5846],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1516,"type":"ClownOptions_SDE_Mask_Beta","pos":[-68.4439468383789,-163.1180877685547],"size":[252.8383331298828,126],"flags":{},"order":47,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6361},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[5776,6016,6477],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Mask_Beta"},"widgets_values":[1,0,false]},{"id":1667,"type":"GrowMask","pos":[-302.060302734375,-164.22067260742188],"size":[210,82],"flags":{},"order":42,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":6360}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6361],"slot_index":0}],"properties":{"Node name for S&R":"GrowMask"},"widgets_values":[-10,false]},{"id":1039,"type":"ImageBlend","pos":[-769.9498901367188,220.86917114257812],"size":[210,102],"flags":{"collapsed":true},"order":39,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3606},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3605}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3607],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":727,"type":"VAEEncodeAdvanced","pos":[-789.0958862304688,67.53204345703125],"size":[262.4812927246094,298],"flags":{"collapsed":true},"order":38,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2101},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":2102},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":2103},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":3508},{"name":"width","type":"INT","pos":[10,160],"widget":{"name":"width"},"link":2104},{"name":"height","type":"INT","pos":[10,184],"widget":{"name":"height"},"link":2105}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[5373,5715,6201,6202,6229,6230,6412],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[6222,6360],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[5442],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[]}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":731,"type":"SimpleMath+","pos":[-776.4415893554688,126.82145690917969],"size":[315,98],"flags":{"collapsed":true},"order":22,"mode":0,"inputs":[{"name":"a","localized_name":"a","type":"*","shape":7,"link":2108},{"name":"b","localized_name":"b","type":"*","shape":7,"link":2109},{"name":"c","localized_name":"c","type":"*","shape":7,"link":null}],"outputs":[{"name":"INT","localized_name":"INT","type":"INT","links":null},{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[2100],"slot_index":1}],"properties":{"Node name for S&R":"SimpleMath+"},"widgets_values":["a/b"]},{"id":728,"type":"MaskToImage","pos":[-791.0198364257812,176.82147216796875],"size":[176.39999389648438,26],"flags":{"collapsed":true},"order":34,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2106}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2103,3605],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":765,"type":"MaskToImage","pos":[2080.868896484375,-792.6943359375],"size":[182.28543090820312,26],"flags":{"collapsed":true},"order":35,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":5529}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3570],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":761,"type":"Image Comparer (rgthree)","pos":[1747.432373046875,-712.1251220703125],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":65,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2210},{"name":"image_b","type":"IMAGE","dir":3,"link":2200}],"outputs":[],"title":"Compare Output","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_udooi_00119_.png&type=temp&subfolder=&rand=0.4602348825653009"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_udooi_00120_.png&type=temp&subfolder=&rand=0.24695456359911838"}]],"color":"#232","bgcolor":"#353"},{"id":1072,"type":"StyleModelApply","pos":[591.9240112304688,151.93089294433594],"size":[262,122],"flags":{"collapsed":true},"order":37,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":4980},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":5444},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":3720}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[5653],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":1073,"type":"CLIPTextEncode","pos":[575.77001953125,186.9269256591797],"size":[263.280517578125,88.73566436767578],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":4157}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[4650,4980],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":1569,"type":"ClownGuides_Sync_Advanced","pos":[261.355224609375,-1000.5784912109375],"size":[315,1938],"flags":{"collapsed":true},"order":45,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":6201},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":6202},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6223},{"name":"mask_sync","localized_name":"mask_sync","type":"MASK","shape":7,"link":6224},{"name":"mask_drift_x","localized_name":"mask_drift_x","type":"MASK","shape":7,"link":6225},{"name":"mask_drift_y","localized_name":"mask_drift_y","type":"MASK","shape":7,"link":6226},{"name":"mask_lure_x","localized_name":"mask_lure_x","type":"MASK","shape":7,"link":6227},{"name":"mask_lure_y","localized_name":"mask_lure_y","type":"MASK","shape":7,"link":6228},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_masked","localized_name":"syncs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_unmasked","localized_name":"syncs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_masked","localized_name":"drift_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_unmasked","localized_name":"drift_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_masked","localized_name":"drift_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_unmasked","localized_name":"drift_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_masked","localized_name":"lure_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_unmasked","localized_name":"lure_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_masked","localized_name":"lure_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_unmasked","localized_name":"lure_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_x_data","type":"FLOAT","pos":[10,800],"widget":{"name":"drift_x_data"},"link":6239},{"name":"drift_y_guide","type":"FLOAT","pos":[10,1088],"widget":{"name":"drift_y_guide"},"link":6240},{"name":"sync_masked","type":"FLOAT","pos":[10,608],"widget":{"name":"sync_masked"},"link":6241}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6411],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Sync_Advanced"},"widgets_values":[1,1,"constant","constant",0,0,-1,-1,0,1,"constant","constant",0,0,-1,-1,0.2,0,1,0,"constant","constant",0,0,-1,-1,0,0,0.2,1,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,"y -> x",false,false,false,false,false,false]},{"id":1571,"type":"Reroute","pos":[141.35520935058594,-1030.5784912109375],"size":[75,26],"flags":{},"order":41,"mode":0,"inputs":[{"name":"","type":"*","link":6222}],"outputs":[{"name":"","type":"MASK","links":[6223,6224,6225,6226,6227,6228,6342],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1664,"type":"VAEDecode","pos":[1440,-1320],"size":[140,46],"flags":{"collapsed":true},"order":55,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":6354},{"name":"vae","localized_name":"vae","type":"VAE","link":6353}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[6355],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1368,"type":"Image Comparer (rgthree)","pos":[1744.9150390625,-199.16920471191406],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":62,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":4997},{"name":"image_b","type":"IMAGE","dir":3,"link":5000}],"outputs":[],"title":"Compare Patch","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_sgbfj_00119_.png&type=temp&subfolder=&rand=0.4913573783056806"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_sgbfj_00120_.png&type=temp&subfolder=&rand=0.2366457814945162"}]],"color":"#232","bgcolor":"#353"},{"id":1665,"type":"PreviewImage","pos":[1430,-1270],"size":[343.7617492675781,360.52777099609375],"flags":{},"order":57,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":6355}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1673,"type":"Note","pos":[1824.9287109375,-1010.687744140625],"size":[322.34954833984375,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Preview of first stage output: sometimes it can be worth manually (or automatically, using DINO, etc.) adjusting your mask for the second stage, based on this output."],"color":"#432","bgcolor":"#653"},{"id":1539,"type":"GrowMask","pos":[573.4215698242188,-1145.86767578125],"size":[214.5684051513672,82],"flags":{},"order":46,"mode":4,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":6342}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6343,6344,6345,6346,6347,6348],"slot_index":0}],"properties":{"Node name for S&R":"GrowMask"},"widgets_values":[10,false]},{"id":1383,"type":"Note","pos":[216.7359161376953,340.25775146484375],"size":[291.67218017578125,232.2296142578125],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["eta > 0.0 means you are using SDE/ancestral sampling. With this guide mode you will generally want to use bongmath = true.\n\nSamplers such as res_2s and res_3s will be very accurate. Try res_5s and res_8s if you really want to go crazy with it. They run 2x (2s), 3x (3s), etc slower than Euler.\n\nres_2m and 3m will be fast and also good, and run at the same speed as Euler.\n\neta_substep will increase the power of bongmath. If it is set to 0.0, you can turn bongmath off without any effect."],"color":"#432","bgcolor":"#653"},{"id":1380,"type":"Note","pos":[544.9375610351562,342.0576477050781],"size":[290.1026611328125,231.5842742919922],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Setting denoise to a negative value is equivalent to just scaling it. For example:\n\nDenoise = -0.90 is the same as multiplying every sigma value in the entire schedule by 0.9.\n\nI find this is a lot easier to control than the regular denoise scale. The difference between -0.95 and -0.9 is much more predictable than with 0.95 and 0.9. Most of us have seen how different denoise 0.8 might be with Karras vs. exponential. \n\nTry a denoise between -0.95 and -0.85. "],"color":"#432","bgcolor":"#653"},{"id":759,"type":"ImageCompositeMasked","pos":[1697.19140625,-790.8740844726562],"size":[210,186],"flags":{"collapsed":true},"order":63,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"IMAGE","link":2211},{"name":"source","localized_name":"source","type":"IMAGE","link":2198},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6447},{"name":"x","type":"INT","pos":[10,76],"widget":{"name":"x"},"link":2206},{"name":"y","type":"INT","pos":[10,100],"widget":{"name":"y"},"link":2207}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2200,4185],"slot_index":0}],"properties":{"Node name for S&R":"ImageCompositeMasked"},"widgets_values":[712,800,false]},{"id":1552,"type":"ClownOptions_SDE_Beta","pos":[-275.5662841796875,211.60325622558594],"size":[315,266],"flags":{"collapsed":true},"order":6,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",1,1,-1,"fixed"]},{"id":1619,"type":"LoadImage","pos":[79.17283630371094,1820.8131103515625],"size":[315,314],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["9319202660b0e794-photo.JPG","image"]},{"id":1476,"type":"FluxLoader","pos":[-1417.3287353515625,-846.1827392578125],"size":[385.17449951171875,282],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[5439],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[5440],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[5447],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[5443,5993],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[5444,5994],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["flux1-dev.sft","fp8_e4m3fn_fast","clip_l_flux.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","ae.sft","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":1687,"type":"Note","pos":[-101.33948516845703,339.7750244140625],"size":[286.97723388671875,180.28128051757812],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The cycles node causes the connected sampler to loop between sampling and unsampling steps. (Unsampling is running the sampler backwards, where it predicts the noise that would lead to a given output).\n\nWhen unsample_eta is set to -1, it simply uses the same settings for eta as in the connected node. "],"color":"#432","bgcolor":"#653"},{"id":745,"type":"VAEDecode","pos":[1297.53369140625,-791.137939453125],"size":[140,46],"flags":{"collapsed":true},"order":58,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":6478},{"name":"vae","localized_name":"vae","type":"VAE","link":2153}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2201,2241,3568,4997],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1689,"type":"Note","pos":[525.9268798828125,-1349.89794921875],"size":[263.00439453125,88],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Expanding the mask for the second pass can sometimes help prevent seams."],"color":"#432","bgcolor":"#653"},{"id":1688,"type":"Note","pos":[-838.7593994140625,-1316.05126953125],"size":[274.47601318359375,104.34856414794922],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ReFluxPatcher is required to use the \"Style\" nodes. Different \"Re...Patcher\" nodes are available for many other models, from SD1.5/SDXL to SD3.5, HiDream, AuraFlow, Chroma, WAN, and LTXV."],"color":"#432","bgcolor":"#653"},{"id":1678,"type":"Note","pos":[-422.92510986328125,-333.6911926269531],"size":[324.0018005371094,113.63665771484375],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ReduxAdvanced is used to help get things on track. Bypass if you're having problems with it disrupting character likeness.\n\nThe SDE Mask ensures SDE noise is used only in the masked area, limiting change in unmasked areas that could lead to seams. "],"color":"#432","bgcolor":"#653"},{"id":1572,"type":"ClownGuides_Sync_Advanced","pos":[581.355224609375,-1000.5784912109375],"size":[315,1878],"flags":{"collapsed":true},"order":50,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":6229},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":6230},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6343},{"name":"mask_sync","localized_name":"mask_sync","type":"MASK","shape":7,"link":6344},{"name":"mask_drift_x","localized_name":"mask_drift_x","type":"MASK","shape":7,"link":6345},{"name":"mask_drift_y","localized_name":"mask_drift_y","type":"MASK","shape":7,"link":6346},{"name":"mask_lure_x","localized_name":"mask_lure_x","type":"MASK","shape":7,"link":6347},{"name":"mask_lure_y","localized_name":"mask_lure_y","type":"MASK","shape":7,"link":6348},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_masked","localized_name":"syncs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_unmasked","localized_name":"syncs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_masked","localized_name":"drift_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_unmasked","localized_name":"drift_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_masked","localized_name":"drift_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_unmasked","localized_name":"drift_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_masked","localized_name":"lure_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_unmasked","localized_name":"lure_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_masked","localized_name":"lure_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_unmasked","localized_name":"lure_ys_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6414],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Sync_Advanced"},"widgets_values":[0,1,"constant","constant",0,0,-1,-1,0,1,"constant","constant",0,0,-1,-1,0,0,1,0,"constant","constant",0,0,-1,-1,0,0,0,1,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,"y -> x",false,false,false,false,false,false]},{"id":1674,"type":"Note","pos":[170.8737030029297,-1390.4803466796875],"size":[322.6287841796875,128.15802001953125],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Activate the style nodes if you are having issues with color, detail, light, blurriness or pixelation drifting too far from your source input.\n\nIf end_step is too high, you may get faint halos and an oversharpened look."],"color":"#432","bgcolor":"#653"},{"id":1524,"type":"ReFluxPatcher","pos":[-809.3073120117188,-985.41064453125],"size":[210,82],"flags":{},"order":48,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6383}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[5845],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":1690,"type":"ClownsharkChainsampler_Beta","pos":[865.4187622070312,-518.0064086914062],"size":[281.7781677246094,571.74853515625],"flags":{},"order":56,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":6479},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6476},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":6477},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6478],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"multistep/res_3m",-1,1,"resample",false]},{"id":1479,"type":"ClownsharkChainsampler_Beta","pos":[536.1533203125,-510.75872802734375],"size":[288.1370544433594,571.74853515625],"flags":{},"order":54,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":6380},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6415},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":6016},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6479],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"exponential/res_2s",2,1,"resample",true]},{"id":1693,"type":"Note","pos":[-858.5514526367188,-640.8011474609375],"size":[276.7918701171875,88],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Padding can be very important. Some models/loras/IPadapter embeds etc. are going to respond very differently if the shot is close up vs. farther away."],"color":"#432","bgcolor":"#653"},{"id":1525,"type":"ClownGuide_Style_Beta","pos":[251.35520935058594,-950.5784912109375],"size":[252.0535430908203,286],"flags":{},"order":49,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":5715},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6411}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6051],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":1672,"type":"ClownGuide_Style_Beta","pos":[561.355224609375,-950.5784912109375],"size":[252.0535430908203,286],"flags":{},"order":52,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":6412},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6414}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6415,6476],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,5,false]},{"id":726,"type":"Mask Bounding Box Aspect Ratio","pos":[-828.6614990234375,-412.50946044921875],"size":[252,250],"flags":{"collapsed":false},"order":29,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","shape":7,"link":5054},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":5021},{"name":"aspect_ratio","type":"FLOAT","pos":[10,204],"widget":{"name":"aspect_ratio"},"link":2100}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2101,2102,3606,3721,4996,5995],"slot_index":0},{"name":"mask","localized_name":"mask","type":"MASK","links":[2106,5529],"slot_index":1},{"name":"mask_blurred","localized_name":"mask_blurred","type":"MASK","links":[6447],"slot_index":2},{"name":"x","localized_name":"x","type":"INT","links":[2206],"slot_index":3},{"name":"y","localized_name":"y","type":"INT","links":[2207],"slot_index":4},{"name":"width","localized_name":"width","type":"INT","links":[2204],"slot_index":5},{"name":"height","localized_name":"height","type":"INT","links":[2205],"slot_index":6}],"properties":{"Node name for S&R":"Mask Bounding Box Aspect Ratio"},"widgets_values":[100,40,1.75,false]},{"id":1677,"type":"Note","pos":[-439.5185241699219,-738.3756713867188],"size":[290.3874816894531,88],"flags":{},"order":15,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Try setting both drift values to 0.0 or 0.2 as a starting point.\n"],"color":"#432","bgcolor":"#653"},{"id":1694,"type":"Note","pos":[-441.5133056640625,-999.14990234375],"size":[291.2616882324219,189.98562622070312],"flags":{},"order":16,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase character likeness by: \n\nDecreasing \"Similarity\"\nIncreasing \"Drift Toward Target\"\nIncreasing cycles\nIncreasing eta (max 1.0)\nIncreasing denoise\n\nIncrease adherence to the input image by:\n\nDoing the opposite of any of the above\nIncreasing \"Drift Toward Guide\"\nEnabling the ReduxAdvanced node\n"],"color":"#432","bgcolor":"#653"},{"id":1277,"type":"SharkOptions_GuideCond_Beta","pos":[575.9444580078125,221.88970947265625],"size":[315,98],"flags":{"collapsed":true},"order":40,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":5653},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":4650},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[5493],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[1]},{"id":1548,"type":"ReduxAdvanced","pos":[-69.81456756591797,-498.3502502441406],"size":[248.6250457763672,234],"flags":{},"order":36,"mode":4,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":6422},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":5994},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":5993},{"name":"image","localized_name":"image","type":"IMAGE","link":5995},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[6421],"slot_index":0},{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"ReduxAdvanced"},"widgets_values":[3,"area","center crop (square)",1,0.1]},{"id":1446,"type":"ClownsharKSampler_Beta","pos":[214.812255859375,-508.00537109375],"size":[277.5089111328125,735.1378784179688],"flags":{},"order":53,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":5846},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":6421},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":5373},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6051},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":5493},{"name":"options 2","type":"OPTIONS","link":5776},{"name":"options 3","type":"OPTIONS","link":6402},{"name":"options 4","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6380],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[6354],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[1,"exponential/res_2s","bong_tangent",30,1,0.55,1,100,"fixed","standard",true],"color":"#332922","bgcolor":"#593930"},{"id":1454,"type":"ClownOptions_Cycles_Beta","pos":[-74.8967514038086,24.043270111083984],"size":[261.7955627441406,202],"flags":{},"order":17,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[6402],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[20,1,-1,"none",-1,1,true]},{"id":1573,"type":"PrimitiveFloat","pos":[10.393571853637695,-834.4251708984375],"size":[210,58],"flags":{},"order":18,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6239],"slot_index":0}],"title":"Drift Toward Target","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[0.2]},{"id":1574,"type":"PrimitiveFloat","pos":[11.355203628540039,-720.5784912109375],"size":[210,58],"flags":{},"order":19,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6240],"slot_index":0}],"title":"Drift Toward Guide","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[0.2]},{"id":1477,"type":"LoraLoader","pos":[-1007.4993896484375,-844.936279296875],"size":[315,126],"flags":{},"order":23,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":5439},{"name":"clip","localized_name":"clip","type":"CLIP","link":5440}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[6397],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[6398],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["FLUX/Kirsten_Dunst_Flux_V1.safetensors",1,1]},{"id":1556,"type":"CLIPTextEncode","pos":[-392.6881408691406,-498.2940979003906],"size":[289.0962829589844,113.79679870605469],"flags":{"collapsed":false},"order":31,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":6103}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[6422],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["kirsten dunst"],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1451,"type":"LoadImage","pos":[-1267.7357177734375,-412.5631103515625],"size":[315,314],"flags":{},"order":20,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[6518],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6519],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-54212258.30000001.png [input]","image"]},{"id":1040,"type":"PreviewImage","pos":[-1267.6248779296875,-30.252229690551758],"size":[304.98114013671875,265.58380126953125],"flags":{},"order":44,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3607}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1698,"type":"Note","pos":[-1623.859375,-355.951416015625],"size":[276.7918701171875,88],"flags":{},"order":21,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Draw a mask over the face in the Load Image node. Ideally, try stopping precisely at the hairline, and just above or just below the chin."],"color":"#432","bgcolor":"#653"}],"links":[[2100,731,1,726,2,"FLOAT"],[2101,726,0,727,0,"IMAGE"],[2102,726,0,727,1,"IMAGE"],[2103,728,0,727,2,"IMAGE"],[2104,729,0,727,5,"INT"],[2105,729,1,727,6,"INT"],[2106,726,1,728,0,"MASK"],[2108,729,0,731,0,"*"],[2109,729,1,731,1,"*"],[2153,14,0,745,1,"VAE"],[2198,758,0,759,1,"IMAGE"],[2200,759,0,761,1,"IMAGE"],[2201,745,0,758,0,"IMAGE"],[2204,726,5,758,1,"INT"],[2205,726,6,758,2,"INT"],[2206,726,3,759,3,"INT"],[2207,726,4,759,4,"INT"],[2210,725,0,761,0,"IMAGE"],[2211,725,0,759,0,"IMAGE"],[2241,745,0,744,0,"IMAGE"],[3508,14,0,727,4,"VAE"],[3568,745,0,1022,0,"IMAGE"],[3569,1022,0,1024,0,"IMAGE"],[3570,765,0,1022,1,"IMAGE"],[3605,728,0,1039,1,"IMAGE"],[3606,726,0,1039,0,"IMAGE"],[3607,1039,0,1040,0,"IMAGE"],[3720,1071,0,1072,2,"CLIP_VISION_OUTPUT"],[3721,726,0,1071,1,"IMAGE"],[4157,490,0,1073,0,"CLIP"],[4185,759,0,1162,0,"*"],[4186,1162,0,1161,0,"IMAGE"],[4650,1073,0,1277,1,"CONDITIONING"],[4980,1073,0,1072,0,"CONDITIONING"],[4996,726,0,1369,0,"IMAGE"],[4997,745,0,1368,0,"IMAGE"],[4998,729,0,1369,1,"INT"],[4999,729,1,1369,2,"INT"],[5000,1369,0,1368,1,"IMAGE"],[5021,1407,0,726,1,"MASK"],[5054,725,0,726,0,"IMAGE"],[5373,727,0,1446,3,"LATENT"],[5439,1476,0,1477,0,"MODEL"],[5440,1476,1,1477,1,"CLIP"],[5442,727,3,1478,1,"LATENT"],[5443,1476,3,1071,0,"CLIP_VISION"],[5444,1476,4,1072,1,"STYLE_MODEL"],[5447,1476,2,14,0,"*"],[5493,1277,0,1446,6,"OPTIONS"],[5529,726,1,765,0,"MASK"],[5653,1072,0,1277,0,"CONDITIONING"],[5715,727,0,1525,0,"LATENT"],[5776,1516,0,1446,7,"OPTIONS"],[5845,1524,0,13,0,"*"],[5846,13,0,1446,0,"MODEL"],[5993,1476,3,1548,2,"CLIP_VISION"],[5994,1476,4,1548,1,"STYLE_MODEL"],[5995,726,0,1548,3,"IMAGE"],[6016,1516,0,1479,6,"OPTIONS"],[6051,1525,0,1446,5,"GUIDES"],[6103,490,0,1556,0,"CLIP"],[6201,727,0,1569,0,"LATENT"],[6202,727,0,1569,1,"LATENT"],[6222,727,2,1571,0,"*"],[6223,1571,0,1569,2,"MASK"],[6224,1571,0,1569,3,"MASK"],[6225,1571,0,1569,4,"MASK"],[6226,1571,0,1569,5,"MASK"],[6227,1571,0,1569,6,"MASK"],[6228,1571,0,1569,7,"MASK"],[6229,727,0,1572,0,"LATENT"],[6230,727,0,1572,1,"LATENT"],[6239,1573,0,1569,20,"FLOAT"],[6240,1574,0,1569,21,"FLOAT"],[6241,1575,0,1569,22,"FLOAT"],[6342,1571,0,1539,0,"MASK"],[6343,1539,0,1572,2,"MASK"],[6344,1539,0,1572,3,"MASK"],[6345,1539,0,1572,4,"MASK"],[6346,1539,0,1572,5,"MASK"],[6347,1539,0,1572,6,"MASK"],[6348,1539,0,1572,7,"MASK"],[6353,14,0,1664,1,"VAE"],[6354,1446,1,1664,0,"LATENT"],[6355,1664,0,1665,0,"IMAGE"],[6360,727,2,1667,0,"MASK"],[6361,1667,0,1516,0,"MASK"],[6380,1446,0,1479,4,"LATENT"],[6383,1478,0,1524,0,"MODEL"],[6396,1279,0,1478,0,"MODEL"],[6397,1477,0,1279,0,"MODEL"],[6398,1477,1,490,0,"*"],[6402,1454,0,1446,8,"OPTIONS"],[6411,1569,0,1525,3,"GUIDES"],[6412,727,0,1672,0,"LATENT"],[6414,1572,0,1672,3,"GUIDES"],[6415,1672,0,1479,5,"GUIDES"],[6421,1548,0,1446,1,"CONDITIONING"],[6422,1556,0,1548,0,"CONDITIONING"],[6447,726,2,759,2,"MASK"],[6476,1672,0,1690,5,"GUIDES"],[6477,1516,0,1690,6,"OPTIONS"],[6478,1690,0,745,0,"LATENT"],[6479,1479,0,1690,4,"LATENT"],[6518,1451,0,725,0,"*"],[6519,1451,1,1407,0,"*"]],"groups":[{"id":1,"title":"Prepare Input","bounding":[-1310.92529296875,-489.52618408203125,755.7755737304688,762.867431640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Patch and Stitch","bounding":[1250.695068359375,-877.5091552734375,1320.4892578125,1148.6859130859375],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Loaders","bounding":[-1438.07421875,-1179.8375244140625,881.3677368164062,646.2952880859375],"color":"#3f789e","font_size":24,"flags":{}},{"id":5,"title":"Sampling","bounding":[-510.548828125,-602.9613037109375,1686.064208984375,874.1248168945312],"color":"#3f789e","font_size":24,"flags":{}},{"id":6,"title":"Guides","bounding":[-37.0714225769043,-1229.123046875,888.9586791992188,587.7683715820312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.2100000000000002,"offset":[4241.572246240033,1450.4856076460571]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux faceswap.json
================================================
{"last_node_id":1153,"last_link_id":4163,"nodes":[{"id":758,"type":"ImageResize+","pos":[1987.2191162109375,-351.3092041015625],"size":[210,218],"flags":{"collapsed":true},"order":31,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":2201},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":2204},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":2205}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2198],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":490,"type":"Reroute","pos":[-693.37158203125,-93.71382904052734],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":4149}],"outputs":[{"name":"","type":"CLIP","links":[4157],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":728,"type":"MaskToImage","pos":[219.2652130126953,854.9601440429688],"size":[176.39999389648438,26],"flags":{"collapsed":true},"order":14,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2106}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2103,3605],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":765,"type":"MaskToImage","pos":[2707.509765625,226.7833709716797],"size":[182.28543090820312,26],"flags":{"collapsed":true},"order":19,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2233}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3570],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":1024,"type":"PreviewImage","pos":[2707.52197265625,-277.8296203613281],"size":[413.7582092285156,445.8081359863281],"flags":{},"order":36,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3569}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[],"color":"#332922","bgcolor":"#593930"},{"id":744,"type":"SaveImage","pos":[1807.2188720703125,-291.30926513671875],"size":[424.53594970703125,455.0760192871094],"flags":{},"order":33,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2241}],"outputs":[],"title":"Save Patch","properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"],"color":"#332922","bgcolor":"#593930"},{"id":1040,"type":"PreviewImage","pos":[-195.9951934814453,694.224609375],"size":[304.98114013671875,265.58380126953125],"flags":{},"order":23,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3607}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":731,"type":"SimpleMath+","pos":[219.2652130126953,804.9601440429688],"size":[315,98],"flags":{"collapsed":true},"order":5,"mode":0,"inputs":[{"name":"a","localized_name":"a","type":"*","shape":7,"link":2108},{"name":"b","localized_name":"b","type":"*","shape":7,"link":2109},{"name":"c","localized_name":"c","type":"*","shape":7,"link":null}],"outputs":[{"name":"INT","localized_name":"INT","type":"INT","links":null},{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[2100],"slot_index":1}],"properties":{"Node name for S&R":"SimpleMath+"},"widgets_values":["a/b"]},{"id":14,"type":"Reroute","pos":[-693.37158203125,-53.713836669921875],"size":[75,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"","type":"*","link":4146}],"outputs":[{"name":"","type":"VAE","links":[2153,3508],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1039,"type":"ImageBlend","pos":[219.2652130126953,954.9601440429688],"size":[210,102],"flags":{"collapsed":true},"order":17,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3606},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3605}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3607],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":1022,"type":"ImageBlend","pos":[2710.7275390625,275.91143798828125],"size":[210,102],"flags":{"collapsed":true},"order":34,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3568},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3570}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3569],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":726,"type":"Mask Bounding Box Aspect Ratio","pos":[216.9475860595703,323.4888610839844],"size":[252,250],"flags":{"collapsed":false},"order":11,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","shape":7,"link":2338},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":4158},{"name":"aspect_ratio","type":"FLOAT","pos":[10,204],"widget":{"name":"aspect_ratio"},"link":2100}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2101,2102,2209,3606,3721],"slot_index":0},{"name":"mask","localized_name":"mask","type":"MASK","links":[2106],"slot_index":1},{"name":"mask_blurred","localized_name":"mask_blurred","type":"MASK","links":[3884],"slot_index":2},{"name":"x","localized_name":"x","type":"INT","links":[2206],"slot_index":3},{"name":"y","localized_name":"y","type":"INT","links":[2207],"slot_index":4},{"name":"width","localized_name":"width","type":"INT","links":[2204],"slot_index":5},{"name":"height","localized_name":"height","type":"INT","links":[2205],"slot_index":6}],"properties":{"Node name for S&R":"Mask Bounding Box Aspect Ratio"},"widgets_values":[100,40,1.75,false]},{"id":760,"type":"SaveImage","pos":[1807.2188720703125,218.6908721923828],"size":[418.26055908203125,456.04608154296875],"flags":{},"order":37,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2199}],"outputs":[],"title":"Save Output","properties":{},"widgets_values":["ComfyUI"],"color":"#232","bgcolor":"#353"},{"id":761,"type":"Image Comparer (rgthree)","pos":[2257.2197265625,228.6908416748047],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":38,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2210},{"name":"image_b","type":"IMAGE","dir":3,"link":2200}],"outputs":[],"title":"Compare Output","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_dluyj_00015_.png&type=temp&subfolder=&rand=0.8734695511873163"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_dluyj_00016_.png&type=temp&subfolder=&rand=0.23774072803641766"}]],"color":"#232","bgcolor":"#353"},{"id":1074,"type":"ClownOptions_SDE_Beta","pos":[790.0368041992188,-161.93728637695312],"size":[315,266],"flags":{"collapsed":true},"order":0,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":13,"type":"Reroute","pos":[-693.37158203125,-133.7138214111328],"size":[75,26],"flags":{},"order":26,"mode":0,"inputs":[{"name":"","type":"*","link":4163}],"outputs":[{"name":"","type":"MODEL","links":[3812],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":745,"type":"VAEDecode","pos":[1818.999755859375,-349.32073974609375],"size":[140,46],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":4031},{"name":"vae","localized_name":"vae","type":"VAE","link":2153}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2201,2208,2241,3568],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":727,"type":"VAEEncodeAdvanced","pos":[219.2652130126953,904.9601440429688],"size":[262.4812927246094,298],"flags":{"collapsed":true},"order":16,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2101},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":2102},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":2103},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":3508},{"name":"width","type":"INT","pos":[10,160],"widget":{"name":"width"},"link":2104},{"name":"height","type":"INT","pos":[10,184],"widget":{"name":"height"},"link":2105}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[3602,3603,3700,3785,3786,4097],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[2233,3604,3901],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2125],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[]}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":729,"type":"SetImageSize","pos":[257.9150695800781,633.8616333007812],"size":[210,102],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[2104,2108],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[2105,2109],"slot_index":1}],"title":"Inpaint Tile Size","properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1024,1024]},{"id":1072,"type":"StyleModelApply","pos":[618.7158813476562,-201.9373016357422],"size":[262,122],"flags":{"collapsed":true},"order":15,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":3724},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":4151},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":3720}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[4088,4102],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":1071,"type":"CLIPVisionEncode","pos":[618.708251953125,-160.76882934570312],"size":[253.60000610351562,78],"flags":{"collapsed":true},"order":13,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":4152},{"name":"image","localized_name":"image","type":"IMAGE","link":3721}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[3720],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":1152,"type":"FluxLoader","pos":[-1424.1221923828125,-136.28652954101562],"size":[315,282],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[4144],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[4150],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[4146],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[4152],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[4151],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["flux1-dev.sft","fp8_e4m3fn_fast","clip_l_flux.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","ae.sft","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":1145,"type":"SharkOptions_GuideCond_Beta","pos":[623.8969116210938,-288.85443115234375],"size":[315,98],"flags":{"collapsed":true},"order":18,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":4088},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":4087},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[4086,4089],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[1]},{"id":762,"type":"Image Comparer (rgthree)","pos":[2254.142822265625,-285.88934326171875],"size":[402.1800842285156,455.1059875488281],"flags":{},"order":32,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2208},{"name":"image_b","type":"IMAGE","dir":3,"link":2209}],"outputs":[],"title":"Compare Inpaint Patch","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_glyrv_00015_.png&type=temp&subfolder=&rand=0.6304345035966803"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_glyrv_00016_.png&type=temp&subfolder=&rand=0.03317535764596258"}]],"color":"#332922","bgcolor":"#593930"},{"id":1073,"type":"CLIPTextEncode","pos":[618.718017578125,-243.58985900878906],"size":[263.280517578125,88.73566436767578],"flags":{"collapsed":true},"order":12,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":4157}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3724,4087],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":759,"type":"ImageCompositeMasked","pos":[2182.82080078125,-351.82415771484375],"size":[210,186],"flags":{"collapsed":true},"order":35,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"IMAGE","link":2211},{"name":"source","localized_name":"source","type":"IMAGE","link":2198},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":3884},{"name":"x","type":"INT","pos":[10,76],"widget":{"name":"x"},"link":2206},{"name":"y","type":"INT","pos":[10,100],"widget":{"name":"y"},"link":2207}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2199,2200],"slot_index":0}],"properties":{"Node name for S&R":"ImageCompositeMasked"},"widgets_values":[712,800,false]},{"id":1102,"type":"LoadImage","pos":[-205.95057678222656,316.025390625],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[4156],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[4158],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-67304674.png [input]","image"]},{"id":725,"type":"Reroute","pos":[126.9476318359375,319.6999206542969],"size":[75,26],"flags":{},"order":8,"mode":0,"inputs":[{"name":"","type":"*","link":4156}],"outputs":[{"name":"","type":"IMAGE","links":[2210,2211,2338],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1038,"type":"ClownGuides_Beta","pos":[-491.9494934082031,-334.2093505859375],"size":[315,450],"flags":{},"order":20,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":3602},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":3603},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":3604},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[4095],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["flow",false,false,1,1,1,1,"constant","constant",0,0,8,8,false],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1069,"type":"ClownsharkChainsampler_Beta","pos":[1011.0429077148438,-95.05850219726562],"size":[315,570],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3711},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":4155},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":4089},{"name":"options 2","type":"OPTIONS","link":4159},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[4104],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"exponential/res_3s",1,1,"resample",true],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1066,"type":"ClownsharKSampler_Beta","pos":[620.0368041992188,-101.93733215332031],"size":[340.55120849609375,730],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3812},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":4102},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3700},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":4096},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":4086},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3711],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0,"exponential/res_3s","beta57",30,7,1,1,0,"fixed","standard",false],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1070,"type":"ClownsharkChainsampler_Beta","pos":[1361.5435791015625,-100.98193359375],"size":[315,570],"flags":{},"order":29,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":4104},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3832},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[4031],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"exponential/res_3s",-1,1,"resample",true],"color":"#232","bgcolor":"#353"},{"id":1143,"type":"ClownOptions_Cycles_Beta","pos":[1023.6978149414062,-356.53753662109375],"size":[282.6300964355469,202],"flags":{},"order":4,"mode":4,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[4159],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0,"none",1,1,false],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1153,"type":"LoraLoader","pos":[-1079.3297119140625,-135.3394012451172],"size":[315,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":4144},{"name":"clip","localized_name":"clip","type":"CLIP","link":4150}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[4160],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[4149],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["FLUX/Raura.safetensors",1,1]},{"id":737,"type":"ModelSamplingAdvancedResolution","pos":[-1125.156005859375,-356.12274169921875],"size":[260.3999938964844,126],"flags":{},"order":22,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":4161},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2125}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[4162],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":1149,"type":"ReFluxPatcher","pos":[-828.3265380859375,-352.2313232421875],"size":[210,82],"flags":{},"order":25,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":4162}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[4163],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true],"color":"#223","bgcolor":"#335"},{"id":1142,"type":"TorchCompileModels","pos":[-1416.9853515625,-362.2281799316406],"size":[210,178],"flags":{},"order":9,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":4160}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[4161],"slot_index":0}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":1150,"type":"ClownGuide_Style_Beta","pos":[-140.6088409423828,-331.50213623046875],"size":[248.69369506835938,286],"flags":{"collapsed":false},"order":24,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":4097},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":4095}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[4096,4155],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false],"color":"#223","bgcolor":"#335"},{"id":1088,"type":"ClownGuides_Beta","pos":[145.70831298828125,-329.6731872558594],"size":[315,450],"flags":{},"order":21,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":3785},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":3786},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":3901},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3832],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["inversion",false,false,0,1,1,1,"constant","constant",0,0,30,30,false],"color":"#232","bgcolor":"#353"}],"links":[[2100,731,1,726,2,"FLOAT"],[2101,726,0,727,0,"IMAGE"],[2102,726,0,727,1,"IMAGE"],[2103,728,0,727,2,"IMAGE"],[2104,729,0,727,5,"INT"],[2105,729,1,727,6,"INT"],[2106,726,1,728,0,"MASK"],[2108,729,0,731,0,"*"],[2109,729,1,731,1,"*"],[2125,727,3,737,1,"LATENT"],[2153,14,0,745,1,"VAE"],[2198,758,0,759,1,"IMAGE"],[2199,759,0,760,0,"IMAGE"],[2200,759,0,761,1,"IMAGE"],[2201,745,0,758,0,"IMAGE"],[2204,726,5,758,1,"INT"],[2205,726,6,758,2,"INT"],[2206,726,3,759,3,"INT"],[2207,726,4,759,4,"INT"],[2208,745,0,762,0,"IMAGE"],[2209,726,0,762,1,"IMAGE"],[2210,725,0,761,0,"IMAGE"],[2211,725,0,759,0,"IMAGE"],[2233,727,2,765,0,"MASK"],[2241,745,0,744,0,"IMAGE"],[2338,725,0,726,0,"IMAGE"],[3508,14,0,727,4,"VAE"],[3568,745,0,1022,0,"IMAGE"],[3569,1022,0,1024,0,"IMAGE"],[3570,765,0,1022,1,"IMAGE"],[3602,727,0,1038,0,"LATENT"],[3603,727,0,1038,1,"LATENT"],[3604,727,2,1038,2,"MASK"],[3605,728,0,1039,1,"IMAGE"],[3606,726,0,1039,0,"IMAGE"],[3607,1039,0,1040,0,"IMAGE"],[3700,727,0,1066,3,"LATENT"],[3711,1066,0,1069,4,"LATENT"],[3720,1071,0,1072,2,"CLIP_VISION_OUTPUT"],[3721,726,0,1071,1,"IMAGE"],[3724,1073,0,1072,0,"CONDITIONING"],[3785,727,0,1088,0,"LATENT"],[3786,727,0,1088,1,"LATENT"],[3812,13,0,1066,0,"MODEL"],[3832,1088,0,1070,5,"GUIDES"],[3884,726,2,759,2,"MASK"],[3901,727,2,1088,2,"MASK"],[4031,1070,0,745,0,"LATENT"],[4086,1145,0,1066,6,"OPTIONS"],[4087,1073,0,1145,1,"CONDITIONING"],[4088,1072,0,1145,0,"CONDITIONING"],[4089,1145,0,1069,6,"OPTIONS"],[4095,1038,0,1150,3,"GUIDES"],[4096,1150,0,1066,5,"GUIDES"],[4097,727,0,1150,0,"LATENT"],[4102,1072,0,1066,1,"CONDITIONING"],[4104,1069,0,1070,4,"LATENT"],[4144,1152,0,1153,0,"MODEL"],[4146,1152,2,14,0,"*"],[4149,1153,1,490,0,"*"],[4150,1152,1,1153,1,"CLIP"],[4151,1152,4,1072,1,"STYLE_MODEL"],[4152,1152,3,1071,0,"CLIP_VISION"],[4155,1150,0,1069,5,"GUIDES"],[4156,1102,0,725,0,"*"],[4157,490,0,1073,0,"CLIP"],[4158,1102,1,726,1,"MASK"],[4159,1143,0,1069,7,"OPTIONS"],[4160,1153,0,1142,0,"MODEL"],[4161,1142,0,737,0,"MODEL"],[4162,737,0,1149,0,"MODEL"],[4163,1149,0,13,0,"*"]],"groups":[{"id":1,"title":"Prepare Input","bounding":[-240.3173828125,230.5765838623047,755.7755737304688,762.867431640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Patch and Stitch","bounding":[1762.0626220703125,-449.59136962890625,1387.1339111328125,1156.21923828125],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Loaders","bounding":[-1451.647216796875,-453.5611877441406,862.5447998046875,635.2009887695312],"color":"#3f789e","font_size":24,"flags":{}},{"id":5,"title":"Sampling","bounding":[565.7752685546875,-449.1409606933594,1147.30712890625,1118.83447265625],"color":"#3f789e","font_size":24,"flags":{}},{"id":6,"title":"Guides","bounding":[-538.6279296875,-451.06854248046875,1052.895263671875,634.7589721679688],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.351305709310398,"offset":[2774.203337270875,600.0170992273368]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux inpaint area.json
================================================
{"last_node_id":698,"last_link_id":1968,"nodes":[{"id":670,"type":"SaveImage","pos":[5481.20751953125,763.7216186523438],"size":[315,270],"flags":{},"order":21,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1883}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":663,"type":"VAEEncodeAdvanced","pos":[4030,1370],"size":[262.4812927246094,278],"flags":{},"order":10,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1957},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1968}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1885,1886],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1854,1869],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":651,"type":"PreviewImage","pos":[4060,1710],"size":[210,246],"flags":{},"order":11,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1963}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":624,"type":"CLIPTextEncode","pos":[4329.92578125,1015.7978515625],"size":[306.2455749511719,162.64158630371094],"flags":{},"order":9,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1966}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1860],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a close up shot of a red coffee mug on a wooden table"]},{"id":346,"type":"ModelSamplingAdvancedResolution","pos":[4034.77978515625,820.2175903320312],"size":[260.3999938964844,126],"flags":{},"order":14,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1965},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1869}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1870],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":674,"type":"Note","pos":[4999.462890625,1603.108642578125],"size":[378.7174377441406,179.35989379882812],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["eta is the amount of noise added after each step. It allows the model to change things more aggressively. Try comparing 0.0 vs 0.75.\n\nres_2m and res_3m will be sufficient quality samplers in most cases. Try res_2s and res_3s (which are 2x and 3x slower) if you want an extra quality boost.\n\nYou can get away with fewer than 40 steps in most cases, but 40 gives the model more time to correct any errors. Mileage may vary, experiment!"],"color":"#432","bgcolor":"#653"},{"id":677,"type":"Note","pos":[3783.440185546875,820.546142578125],"size":[210.66668701171875,91.33430480957031],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I have found these values often work quite well with img2img work with the beta57 scheduler.\n\n"],"color":"#432","bgcolor":"#653"},{"id":678,"type":"Note","pos":[3748.428466796875,1012.2677612304688],"size":[210,107.33900451660156],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["If you wish to inpaint with another model, just replace the model loader and be sure to change CFG to whatever is appropriate for that model.\n\n"],"color":"#432","bgcolor":"#653"},{"id":672,"type":"Note","pos":[3747.097412109375,1187.65576171875],"size":[210,104.00474548339844],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Padding will increase or decrease the amount of area included around your mask that will give the model more context."],"color":"#432","bgcolor":"#653"},{"id":637,"type":"Note","pos":[3412.000732421875,1202.6614990234375],"size":[280.681884765625,88],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Draw your mask on your image for the area you would like to inpaint."],"color":"#432","bgcolor":"#653"},{"id":658,"type":"Image Comparer (rgthree)","pos":[5007.734375,1021.2513427734375],"size":[450.5037841796875,521.7816162109375],"flags":{},"order":20,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1829},{"name":"image_b","type":"IMAGE","dir":3,"link":1823}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_sxifa_00003_.png&type=temp&subfolder=&rand=0.14849022700275727"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_sxifa_00004_.png&type=temp&subfolder=&rand=0.8022985498723256"}]]},{"id":673,"type":"Note","pos":[4330.9345703125,1766.158203125],"size":[488.01611328125,234.97633361816406],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The parameters for \"masked\" will affect your inpainting area. \n\nTry changing weight_masked and end_step_masked. Lower values will allow the model to inpaint more aggressively. Higher will use more information from the original image. \n\n *** You can think of these like a \"denoise\" slider! *** \n\n(With lower weight, lower end_step acting like higher denoise).\n\nweight_scheduler_masked will change how quickly the value in weight_masked drops to zero. \"constant\" will never drop. Try linear_quadratic (drops very gradually, then suddenly at the end) or beta57 (drops earlier). These can make the inpainting process a bit smoother.\n\nHaving some information from the original image helps the model place objects more accurately, if you are replacing something that is already there."],"color":"#432","bgcolor":"#653"},{"id":617,"type":"ClownsharKSampler_Beta","pos":[4660,1020],"size":[315,690],"flags":{},"order":15,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1870},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1860},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1854},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1884},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1936],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","bong_tangent",40,-1,1,1,17,"fixed","standard",true]},{"id":619,"type":"VAEDecode","pos":[4830.248046875,919.5529174804688],"size":[140,46],"flags":{},"order":16,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1936},{"name":"vae","localized_name":"vae","type":"VAE","link":1967}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1882,1902],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":638,"type":"LoadImage","pos":[3390,1370],"size":[315,314],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1828,1829,1955],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1956],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-147694527.20000002.png [input]","image"]},{"id":667,"type":"ImageResize+","pos":[5008.23974609375,755.5714111328125],"size":[210,218],"flags":{},"order":17,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1882},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":1949},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":1950}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1876],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":657,"type":"ImageCompositeMasked","pos":[5242.94482421875,761.7905883789062],"size":[210,186],"flags":{},"order":19,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"IMAGE","link":1828},{"name":"source","localized_name":"source","type":"IMAGE","link":1876},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":1953},{"name":"x","type":"INT","pos":[10,76],"widget":{"name":"x"},"link":1952},{"name":"y","type":"INT","pos":[10,100],"widget":{"name":"y"},"link":1951}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1823,1883],"slot_index":0}],"properties":{"Node name for S&R":"ImageCompositeMasked"},"widgets_values":[712,800,false]},{"id":650,"type":"MaskPreview","pos":[3778.59765625,1707.707763671875],"size":[181.5970001220703,246],"flags":{},"order":12,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1962}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":671,"type":"ClownGuides_Beta","pos":[4331.12109375,1240.1927490234375],"size":[303.2622985839844,450],"flags":{},"order":13,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":1885},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":1886},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":1954},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1884],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["epsilon",false,true,0.5,1,1,1,"beta57","constant",0,0,10,-1,false]},{"id":679,"type":"Image Comparer (rgthree)","pos":[5488.171875,1085.4603271484375],"size":[402.1800842285156,455.1059875488281],"flags":{},"order":18,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1964},{"name":"image_b","type":"IMAGE","dir":3,"link":1902}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_ejvlo_00001_.png&type=temp&subfolder=&rand=0.5455521700112449"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_ejvlo_00002_.png&type=temp&subfolder=&rand=0.8898066636829509"}]]},{"id":676,"type":"Mask Bounding Box Aspect Ratio","pos":[3742.82421875,1383.6278076171875],"size":[252,250],"flags":{},"order":8,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","shape":7,"link":1955},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":1956}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[1957,1963,1964],"slot_index":0},{"name":"mask","localized_name":"mask","type":"MASK","links":[1954,1962],"slot_index":1},{"name":"mask_blurred","localized_name":"mask_blurred","type":"MASK","links":[1953],"slot_index":2},{"name":"x","localized_name":"x","type":"INT","links":[1952],"slot_index":3},{"name":"y","localized_name":"y","type":"INT","links":[1951],"slot_index":4},{"name":"width","localized_name":"width","type":"INT","links":[1949],"slot_index":5},{"name":"height","localized_name":"height","type":"INT","links":[1950],"slot_index":6}],"properties":{"Node name for S&R":"Mask Bounding Box Aspect Ratio"},"widgets_values":[20,20,1,false]},{"id":615,"type":"FluxLoader","pos":[3992.056396484375,1016.4193725585938],"size":[315,282],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1965],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1966],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1967,1968],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":null},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":null}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae",".none",".none"]}],"links":[[1823,657,0,658,1,"IMAGE"],[1828,638,0,657,0,"IMAGE"],[1829,638,0,658,0,"IMAGE"],[1854,663,3,617,3,"LATENT"],[1860,624,0,617,1,"CONDITIONING"],[1869,663,3,346,1,"LATENT"],[1870,346,0,617,0,"MODEL"],[1876,667,0,657,1,"IMAGE"],[1882,619,0,667,0,"IMAGE"],[1883,657,0,670,0,"IMAGE"],[1884,671,0,617,5,"GUIDES"],[1885,663,0,671,0,"LATENT"],[1886,663,0,671,1,"LATENT"],[1902,619,0,679,1,"IMAGE"],[1936,617,0,619,0,"LATENT"],[1949,676,5,667,1,"INT"],[1950,676,6,667,2,"INT"],[1951,676,4,657,4,"INT"],[1952,676,3,657,3,"INT"],[1953,676,2,657,2,"MASK"],[1954,676,1,671,2,"MASK"],[1955,638,0,676,0,"IMAGE"],[1956,638,1,676,1,"MASK"],[1957,676,0,663,0,"IMAGE"],[1962,676,1,650,0,"MASK"],[1963,676,0,651,0,"IMAGE"],[1964,676,0,679,0,"IMAGE"],[1965,615,0,346,0,"MODEL"],[1966,615,1,624,0,"CLIP"],[1967,615,2,619,1,"VAE"],[1968,615,2,663,4,"VAE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.4864362802414468,"offset":[-1333.621998147027,-469.4579733585599]},"node_versions":{"comfy-core":"0.3.26","comfyui_controlnet_aux":"1e9eac6377c882da8bb360c7544607036904362c","ComfyUI-VideoHelperSuite":"c36626c6028faca912eafcedbc71f1d342fb4d2a"},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux inpaint bongmath.json
================================================
{"last_node_id":1057,"last_link_id":3666,"nodes":[{"id":758,"type":"ImageResize+","pos":[1304.9573974609375,-352.7953796386719],"size":[210,218],"flags":{"collapsed":true},"order":24,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":2201},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":2204},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":2205}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2198],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":759,"type":"ImageCompositeMasked","pos":[1494.957763671875,-352.7953796386719],"size":[210,186],"flags":{"collapsed":true},"order":28,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"IMAGE","link":2211},{"name":"source","localized_name":"source","type":"IMAGE","link":2198},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2301},{"name":"x","type":"INT","pos":[10,76],"widget":{"name":"x"},"link":2206},{"name":"y","type":"INT","pos":[10,100],"widget":{"name":"y"},"link":2207}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2199,2200],"slot_index":0}],"properties":{"Node name for S&R":"ImageCompositeMasked"},"widgets_values":[712,800,false]},{"id":13,"type":"Reroute","pos":[-792.117919921875,-60.3060188293457],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[2317],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[-792.117919921875,-20.30602264404297],"size":[75,26],"flags":{},"order":6,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[3656],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":737,"type":"ModelSamplingAdvancedResolution","pos":[-972.117919921875,-330.3060302734375],"size":[260.3999938964844,126],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2318},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2125}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3661],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":786,"type":"TorchCompileModels","pos":[-1262.117919921875,-360.3060302734375],"size":[256.248779296875,178],"flags":{},"order":13,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2317}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2318],"slot_index":0}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":664,"type":"ReFluxPatcher","pos":[-857.81005859375,-103.69645690917969],"size":[210,82],"flags":{"collapsed":true},"order":5,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":14,"type":"Reroute","pos":[-792.117919921875,19.69397735595703],"size":[75,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[2153,3508],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":745,"type":"VAEDecode","pos":[1136.7379150390625,-350.8069152832031],"size":[140,46],"flags":{"collapsed":true},"order":23,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3665},{"name":"vae","localized_name":"vae","type":"VAE","link":2153}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2201,2208,2241,3568],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":663,"type":"FluxLoader","pos":[-1262.117919921875,-130.3060302734375],"size":[374.41741943359375,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":726,"type":"Mask Bounding Box Aspect Ratio","pos":[-153.93637084960938,317.7193298339844],"size":[252,250],"flags":{"collapsed":false},"order":12,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","shape":7,"link":2338},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":3659},{"name":"aspect_ratio","type":"FLOAT","pos":[10,204],"widget":{"name":"aspect_ratio"},"link":2100}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2101,2102,2209,3606],"slot_index":0},{"name":"mask","localized_name":"mask","type":"MASK","links":[2106],"slot_index":1},{"name":"mask_blurred","localized_name":"mask_blurred","type":"MASK","links":[2301],"slot_index":2},{"name":"x","localized_name":"x","type":"INT","links":[2206],"slot_index":3},{"name":"y","localized_name":"y","type":"INT","links":[2207],"slot_index":4},{"name":"width","localized_name":"width","type":"INT","links":[2204],"slot_index":5},{"name":"height","localized_name":"height","type":"INT","links":[2205],"slot_index":6}],"properties":{"Node name for S&R":"Mask Bounding Box Aspect Ratio"},"widgets_values":[300,40,1.75,false]},{"id":728,"type":"MaskToImage","pos":[-151.61874389648438,849.1907348632812],"size":[176.39999389648438,26],"flags":{"collapsed":true},"order":14,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2106}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2103,3605],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":762,"type":"Image Comparer (rgthree)","pos":[1584.957763671875,-282.7954406738281],"size":[402.1800842285156,455.1059875488281],"flags":{},"order":25,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2208},{"name":"image_b","type":"IMAGE","dir":3,"link":2209}],"outputs":[],"title":"Compare Inpaint Patch","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_hkrer_00001_.png&type=temp&subfolder=&rand=0.04538135261092524"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_hkrer_00002_.png&type=temp&subfolder=&rand=0.5206493331921973"}]],"color":"#332922","bgcolor":"#593930"},{"id":765,"type":"MaskToImage","pos":[2025.24755859375,225.29702758789062],"size":[182.28543090820312,26],"flags":{"collapsed":true},"order":17,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2233}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3570],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":1022,"type":"ImageBlend","pos":[2028.46533203125,274.42523193359375],"size":[210,102],"flags":{"collapsed":true},"order":27,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3568},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3570}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3569],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"overlay"]},{"id":1024,"type":"PreviewImage","pos":[2025.259765625,-279.3157958984375],"size":[413.7582092285156,445.8081359863281],"flags":{},"order":29,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3569}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[],"color":"#332922","bgcolor":"#593930"},{"id":760,"type":"SaveImage","pos":[1124.9569091796875,217.20452880859375],"size":[418.26055908203125,456.04608154296875],"flags":{},"order":30,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2199}],"outputs":[],"title":"Save Output","properties":{},"widgets_values":["ComfyUI"],"color":"#232","bgcolor":"#353"},{"id":761,"type":"Image Comparer (rgthree)","pos":[1574.957763671875,227.20449829101562],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":31,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2210},{"name":"image_b","type":"IMAGE","dir":3,"link":2200}],"outputs":[],"title":"Compare Output","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_eoplx_00001_.png&type=temp&subfolder=&rand=0.7495673665351654"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_eoplx_00002_.png&type=temp&subfolder=&rand=0.17529967707052396"}]],"color":"#232","bgcolor":"#353"},{"id":744,"type":"SaveImage","pos":[1124.9569091796875,-292.7954406738281],"size":[424.53594970703125,455.0760192871094],"flags":{},"order":26,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2241}],"outputs":[],"title":"Save Patch","properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"],"color":"#332922","bgcolor":"#593930"},{"id":725,"type":"Reroute","pos":[-243.93637084960938,317.7193298339844],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":3658}],"outputs":[{"name":"","type":"IMAGE","links":[2210,2211,2338],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1040,"type":"PreviewImage","pos":[-566.879150390625,688.4552001953125],"size":[304.98114013671875,265.58380126953125],"flags":{},"order":20,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3607}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1039,"type":"ImageBlend","pos":[-151.61874389648438,949.1907348632812],"size":[210,102],"flags":{"collapsed":true},"order":16,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3606},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3605}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3607],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"overlay"]},{"id":731,"type":"SimpleMath+","pos":[-151.61874389648438,799.1907348632812],"size":[315,98],"flags":{"collapsed":true},"order":8,"mode":0,"inputs":[{"name":"a","localized_name":"a","type":"*","shape":7,"link":2108},{"name":"b","localized_name":"b","type":"*","shape":7,"link":2109},{"name":"c","localized_name":"c","type":"*","shape":7,"link":null}],"outputs":[{"name":"INT","localized_name":"INT","type":"INT","links":null},{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[2100],"slot_index":1}],"properties":{"Node name for S&R":"SimpleMath+"},"widgets_values":["a/b"]},{"id":729,"type":"SetImageSize","pos":[-152.42828369140625,628.09228515625],"size":[210,102],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[2104,2108],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[2105,2109],"slot_index":1}],"title":"Inpaint Tile Size","properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1024,1024]},{"id":727,"type":"VAEEncodeAdvanced","pos":[-151.61874389648438,899.1907348632812],"size":[262.4812927246094,298],"flags":{"collapsed":true},"order":15,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2101},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":2102},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":2103},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":3508},{"name":"width","type":"INT","pos":[10,160],"widget":{"name":"width"},"link":2104},{"name":"height","type":"INT","pos":[10,184],"widget":{"name":"height"},"link":2105}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[3602,3603,3611],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[2233,3604],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2125,3660],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[]}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1038,"type":"ClownGuides_Beta","pos":[-570,-350],"size":[315,450],"flags":{},"order":18,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":3602},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":3603},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":3604},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3609,3641],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["inversion",false,false,0,1,1,1,"constant","constant",0,0,1,-1,false]},{"id":1055,"type":"LoadImage","pos":[-588.5657958984375,310.53521728515625],"size":[315,314],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3658],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[3659],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-264573735.png [input]","image"]},{"id":1041,"type":"ClownGuide_Style_Beta","pos":[-210,-350],"size":[315,286],"flags":{},"order":21,"mode":4,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3611},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3609}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,10,false]},{"id":1056,"type":"CLIPTextEncode","pos":[251.24851989746094,-166.23118591308594],"size":[311.10028076171875,154.46998596191406],"flags":{"collapsed":false},"order":11,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3656}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3662],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a soviet T72 tank driving down the middle of a road in a city, crossing over the crosswalk, aiming its gun at the camera"]},{"id":1043,"type":"ClownOptions_SDE_Beta","pos":[249.46791076660156,47.537593841552734],"size":[315,266],"flags":{},"order":3,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3643],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1018,"type":"ClownOptions_ImplicitSteps_Beta","pos":[611.24853515625,-371.7803649902344],"size":[340.20001220703125,130],"flags":{},"order":4,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3664],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_ImplicitSteps_Beta"},"widgets_values":["bongmath","bongmath",2,0]},{"id":1053,"type":"ClownsharKSampler_Beta","pos":[611.24853515625,-181.78033447265625],"size":[340.55120849609375,730],"flags":{},"order":22,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3661},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3662},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3660},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3641},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3664},{"name":"options 2","type":"OPTIONS","link":3643},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3665],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","beta57",30,-1,1,1,0,"fixed","standard",true]}],"links":[[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[2100,731,1,726,2,"FLOAT"],[2101,726,0,727,0,"IMAGE"],[2102,726,0,727,1,"IMAGE"],[2103,728,0,727,2,"IMAGE"],[2104,729,0,727,5,"INT"],[2105,729,1,727,6,"INT"],[2106,726,1,728,0,"MASK"],[2108,729,0,731,0,"*"],[2109,729,1,731,1,"*"],[2125,727,3,737,1,"LATENT"],[2153,14,0,745,1,"VAE"],[2198,758,0,759,1,"IMAGE"],[2199,759,0,760,0,"IMAGE"],[2200,759,0,761,1,"IMAGE"],[2201,745,0,758,0,"IMAGE"],[2204,726,5,758,1,"INT"],[2205,726,6,758,2,"INT"],[2206,726,3,759,3,"INT"],[2207,726,4,759,4,"INT"],[2208,745,0,762,0,"IMAGE"],[2209,726,0,762,1,"IMAGE"],[2210,725,0,761,0,"IMAGE"],[2211,725,0,759,0,"IMAGE"],[2233,727,2,765,0,"MASK"],[2241,745,0,744,0,"IMAGE"],[2301,726,2,759,2,"MASK"],[2317,13,0,786,0,"MODEL"],[2318,786,0,737,0,"MODEL"],[2338,725,0,726,0,"IMAGE"],[3508,14,0,727,4,"VAE"],[3568,745,0,1022,0,"IMAGE"],[3569,1022,0,1024,0,"IMAGE"],[3570,765,0,1022,1,"IMAGE"],[3602,727,0,1038,0,"LATENT"],[3603,727,0,1038,1,"LATENT"],[3604,727,2,1038,2,"MASK"],[3605,728,0,1039,1,"IMAGE"],[3606,726,0,1039,0,"IMAGE"],[3607,1039,0,1040,0,"IMAGE"],[3609,1038,0,1041,3,"GUIDES"],[3611,727,0,1041,0,"LATENT"],[3641,1038,0,1053,5,"GUIDES"],[3643,1043,0,1053,7,"OPTIONS"],[3656,490,0,1056,0,"CLIP"],[3658,1055,0,725,0,"*"],[3659,1055,1,726,1,"MASK"],[3660,727,3,1053,3,"LATENT"],[3661,737,0,1053,0,"MODEL"],[3662,1056,0,1053,1,"CONDITIONING"],[3664,1018,0,1053,6,"OPTIONS"],[3665,1053,0,745,0,"LATENT"]],"groups":[{"id":1,"title":"Prepare Input","bounding":[-611.2013549804688,224.80706787109375,755.7755737304688,762.867431640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Patch and Stitch","bounding":[1079.80078125,-451.0775451660156,1387.1339111328125,1156.21923828125],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Loaders","bounding":[-1311.103515625,-459.84735107421875,645.1646118164062,640.0969848632812],"color":"#3f789e","font_size":24,"flags":{}},{"id":5,"title":"Sampling","bounding":[204.55885314941406,-455.63134765625,812.3118896484375,1071.2481689453125],"color":"#3f789e","font_size":24,"flags":{}},{"id":6,"title":"Guides","bounding":[-611.8231811523438,-457.95751953125,755.8380737304688,634.3353271484375],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.3072020475058177,"offset":[3303.9392897394673,741.4045019633804]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux inpainting.json
================================================
{"last_node_id":637,"last_link_id":1778,"nodes":[{"id":617,"type":"ClownsharKSampler_Beta","pos":[4647.0654296875,1012.7097778320312],"size":[315,690],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1730},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1754},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1733},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1744},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1756],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",40,30,1,1,15,"fixed","standard",true]},{"id":619,"type":"VAEDecode","pos":[5354.6103515625,907.4140014648438],"size":[210,46],"flags":{},"order":11,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1771},{"name":"vae","localized_name":"vae","type":"VAE","link":1740}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1765],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":631,"type":"SaveImage","pos":[5357.8349609375,1012.29443359375],"size":[315,270],"flags":{},"order":12,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1765}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":624,"type":"CLIPTextEncode","pos":[4233.03955078125,1015.2553100585938],"size":[380.6268615722656,114.73346710205078],"flags":{},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1753}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1754],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a weird alien tripod with a purple woman's head on top "]},{"id":615,"type":"FluxLoader","pos":[3883.31982421875,1018.0260620117188],"size":[315,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1766],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1753],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1723,1740],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":null},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":null}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae",".none",".none"]},{"id":346,"type":"ModelSamplingAdvancedResolution","pos":[3940.993408203125,831.2357177734375],"size":[260.3999938964844,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1766},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1721}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1730],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":620,"type":"ClownGuide_Beta","pos":[4355.02392578125,1383.0733642578125],"size":[264.49530029296875,290],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1767},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":1745},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1744],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["flow",false,false,1,1,"constant",0,40,false]},{"id":626,"type":"ClownsharkChainsampler_Beta","pos":[4988.4580078125,1015.6370239257812],"size":[340.20001220703125,509.99993896484375],"flags":{},"order":10,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1756},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1770},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1771],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,1,"resample",true]},{"id":422,"type":"VAEEncodeAdvanced","pos":[4080.7021484375,1383.7640380859375],"size":[240.29074096679688,278],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1777},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1723}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1767,1772],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1721,1733],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":627,"type":"ClownGuide_Beta","pos":[4701.61572265625,1776.4569091796875],"size":[264.49530029296875,290],"flags":{},"order":8,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1772},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":1778},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1770],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["flow",false,false,1,1,"constant",0,40,false]},{"id":634,"type":"GrowMask","pos":[4102.16650390625,1794.78857421875],"size":[210,82],"flags":{},"order":5,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1774}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1778],"slot_index":0}],"properties":{"Node name for S&R":"GrowMask"},"widgets_values":[20,false]},{"id":621,"type":"LoadImage","pos":[3718.762939453125,1384.687255859375],"size":[319.33538818359375,313.277587890625],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1777],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1745,1774],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-150185841.8.png [input]","image"]},{"id":637,"type":"Note","pos":[3731.639892578125,1771.010009765625],"size":[282.0154113769531,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Draw your mask on your image for the area you would like to inpaint."],"color":"#432","bgcolor":"#653"}],"links":[[1721,422,3,346,1,"LATENT"],[1723,615,2,422,4,"VAE"],[1730,346,0,617,0,"MODEL"],[1733,422,3,617,3,"LATENT"],[1740,615,2,619,1,"VAE"],[1744,620,0,617,5,"GUIDES"],[1745,621,1,620,1,"MASK"],[1753,615,1,624,0,"CLIP"],[1754,624,0,617,1,"CONDITIONING"],[1756,617,0,626,4,"LATENT"],[1765,619,0,631,0,"IMAGE"],[1766,615,0,346,0,"MODEL"],[1767,422,0,620,0,"LATENT"],[1770,627,0,626,5,"GUIDES"],[1771,626,0,619,0,"LATENT"],[1772,422,0,627,0,"LATENT"],[1774,621,1,634,0,"MASK"],[1777,621,0,422,0,"IMAGE"],[1778,634,0,627,1,"MASK"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.3109994191500227,"offset":[-1810.8840558767379,-650.1028379746496]},"node_versions":{"comfy-core":"0.3.26","comfyui_controlnet_aux":"1e9eac6377c882da8bb360c7544607036904362c","ComfyUI-VideoHelperSuite":"c36626c6028faca912eafcedbc71f1d342fb4d2a"},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux regional antiblur.json
================================================
{"last_node_id":723,"last_link_id":2096,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":8,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1939,2092],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":20,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":701,"type":"Note","pos":[80,-520],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[-210,-520],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":676,"type":"InvertMask","pos":[20,-370],"size":[142.42074584960938,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2073}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2083],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":663,"type":"FluxLoader","pos":[630,-720],"size":[374.41741943359375,282],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae",".none",".none"]},{"id":662,"type":"CLIPTextEncode","pos":[460,-370],"size":[210,88],"flags":{"collapsed":false},"order":12,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2094],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat"]},{"id":723,"type":"CLIPTextEncode","pos":[460,-240],"size":[210,88],"flags":{"collapsed":false},"order":13,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2092}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2093],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a college campus"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[719.6110229492188,16.752899169921875],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":14,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":710,"type":"MaskPreview","pos":[180,-190],"size":[210,246],"flags":{},"order":16,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":664,"type":"ReFluxPatcher","pos":[1040,-720],"size":[210,82],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":19,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":715,"type":"SolidMask","pos":[-220,-370],"size":[210,106],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2073],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1024,1024]},{"id":716,"type":"SolidMask","pos":[-220,-220],"size":[210,106],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2065],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,384,864]},{"id":709,"type":"MaskComposite","pos":[190,-370],"size":[210,126],"flags":{},"order":15,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2083},{"name":"source","localized_name":"source","type":"MASK","link":2065}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2091],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[256,160,"add"]},{"id":704,"type":"Note","pos":[101.74818420410156,112.67951965332031],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":18,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2095},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,1,3,"fixed","standard",true]},{"id":722,"type":"ClownRegionalConditioning2","pos":[690,-370],"size":[287.75750732421875,330],"flags":{},"order":17,"mode":0,"inputs":[{"name":"conditioning_masked","localized_name":"conditioning_masked","type":"CONDITIONING","shape":7,"link":2094},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":2093},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2091},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[2095],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning2"},"widgets_values":[1,0,0,"constant",0,-1,"boolean_masked",32,false]},{"id":703,"type":"Note","pos":[423.10699462890625,-96.14085388183594],"size":[241.9689483642578,386.7543640136719],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask.\n\nboolean_masked means that the masked area can \"see\" the rest of the image, but the unmasked area cannot. \"boolean\" would mean neither area could see the rest of the image.\n\nTry setting to boolean_unmasked and see what happens!\n\nIf you still have blur, try reducing edge_width (and if you have seams, try increasing it, or setting end_step to something like 20). \n\nAlso verify that you can generate the background prompt alone without blur (if you can't, this won't work). And don't get stuck on one seed.\n\nVaguely human-shaped masks also tend to work better than the blocky one used here."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[2054,709,0,710,0,"MASK"],[2065,716,0,709,1,"MASK"],[2073,715,0,676,0,"MASK"],[2083,676,0,709,0,"MASK"],[2091,709,0,722,2,"MASK"],[2092,490,0,723,0,"CLIP"],[2093,723,0,722,1,"CONDITIONING"],[2094,662,0,722,0,"CONDITIONING"],[2095,722,0,401,1,"CONDITIONING"],[2096,401,0,397,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.91943424957756,"offset":[1680.6010824178522,841.7668875984083]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux regional redux (2 zone).json
================================================
{"last_node_id":704,"last_link_id":2042,"nodes":[{"id":13,"type":"Reroute","pos":[1300,-790],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1300,-750],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1706,1939],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":541,"type":"CLIPTextEncode","pos":[692.1508178710938,183.7528839111328],"size":[265.775390625,113.01970672607422],"flags":{},"order":17,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1706}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1732],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":14,"type":"Reroute","pos":[1300,-710],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":397,"type":"VAEDecode","pos":[1403.6392822265625,-371.9699401855469],"size":[210,46],"flags":{},"order":31,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1988},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":680,"type":"Reroute","pos":[1310,-660],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":2001}],"outputs":[{"name":"","type":"CLIP_VISION","links":[2004,2009]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":678,"type":"StyleModelApply","pos":[101.3630142211914,-560.2020874023438],"size":[262,122],"flags":{},"order":24,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2005},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1999},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2003}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2002],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":683,"type":"CLIPVisionEncode","pos":[-170,-220],"size":[253.60000610351562,78],"flags":{},"order":21,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2009},{"name":"image","localized_name":"image","type":"IMAGE","link":2035}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2008]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":682,"type":"StyleModelApply","pos":[100,-250],"size":[262,122],"flags":{},"order":25,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2006},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2007},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2008}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2020],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":681,"type":"CLIPVisionEncode","pos":[-173.92124938964844,-524.1537475585938],"size":[253.60000610351562,78],"flags":{},"order":20,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2004},{"name":"image","localized_name":"image","type":"IMAGE","link":2028}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2003]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":694,"type":"LoadImage","pos":[-536.0714111328125,-640.6544189453125],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2028],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image Apr 29, 2025, 07_47_12 PM.png","image"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[696.7778930664062,-164.97328186035156],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":19,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",768,1344,"red",false,"16_channels"]},{"id":596,"type":"ClownRegionalConditioning","pos":[425.9762268066406,-243.12513732910156],"size":[211.60000610351562,122],"flags":{},"order":27,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":null},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2042}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1937],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":30,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1735},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1732},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1988],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","bong_tangent",20,-1,1,1,109,"fixed","standard",true]},{"id":560,"type":"ClownRegionalConditionings","pos":[676.1644897460938,-499.31219482421875],"size":[278.4758605957031,266],"flags":{},"order":29,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1938},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[1735],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditionings"},"widgets_values":[0.5,1,14,"beta57",0,20,"boolean",false]},{"id":690,"type":"LoadImage","pos":[-531.4011840820312,-234.04151916503906],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2035],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00452_.png","image"]},{"id":676,"type":"InvertMask","pos":[-1270,-450],"size":[140,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1990}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1991],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":666,"type":"SolidMask","pos":[-1500,-450],"size":[210,106],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1990],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1344,768]},{"id":667,"type":"MaskPreview","pos":[-840,-570],"size":[210,246],"flags":{},"order":22,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1969}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":670,"type":"MaskPreview","pos":[-840,-280],"size":[210,246],"flags":{},"order":26,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2041}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":661,"type":"ClownRegionalConditioning","pos":[411.9298095703125,-539.053955078125],"size":[211.60000610351562,122],"flags":{},"order":28,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1937},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2002},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2036}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1938],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":665,"type":"MaskComposite","pos":[-1100,-450],"size":[210,126],"flags":{},"order":15,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":1991},{"name":"source","localized_name":"source","type":"MASK","link":1995}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1969,2036,2038],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[0,0,"add"]},{"id":700,"type":"MaskFlip+","pos":[-1098.6136474609375,-267.628173828125],"size":[210,58],"flags":{},"order":23,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2038}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2041,2042],"slot_index":0}],"properties":{"Node name for S&R":"MaskFlip+"},"widgets_values":["x"]},{"id":668,"type":"SolidMask","pos":[-1502.6644287109375,-289.3330993652344],"size":[210,106],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1995],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,768,768]},{"id":701,"type":"Note","pos":[-1378.6959228515625,-637.0702514648438],"size":[342.05950927734375,88],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":663,"type":"FluxLoader","pos":[654.6221923828125,-858.3792724609375],"size":[374.41741943359375,282],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[2001],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[2000],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":664,"type":"ReFluxPatcher","pos":[1064.7325439453125,-863.0516967773438],"size":[210,82],"flags":{},"order":10,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":679,"type":"Reroute","pos":[1300,-610],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":2000}],"outputs":[{"name":"","type":"STYLE_MODEL","links":[1999,2007]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":662,"type":"CLIPTextEncode","pos":[-140.3179168701172,-670.337158203125],"size":[210,88],"flags":{"collapsed":false},"order":18,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2005,2006],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":702,"type":"Note","pos":[-1222.3177490234375,-134.59034729003906],"size":[278.04071044921875,88],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Note that these masks are overlapping."],"color":"#432","bgcolor":"#653"},{"id":703,"type":"Note","pos":[358.4803466796875,-41.564422607421875],"size":[278.04071044921875,88],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask."],"color":"#432","bgcolor":"#653"},{"id":704,"type":"Note","pos":[324.8023986816406,-781.4505004882812],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step, and end_step."],"color":"#432","bgcolor":"#653"},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":32,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1706,490,0,541,0,"CLIP"],[1732,541,0,401,2,"CONDITIONING"],[1735,560,0,401,1,"CONDITIONING"],[1937,596,0,661,0,"COND_REGIONS"],[1938,661,0,560,0,"COND_REGIONS"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[1969,665,0,667,0,"MASK"],[1988,401,0,397,0,"LATENT"],[1990,666,0,676,0,"MASK"],[1991,676,0,665,0,"MASK"],[1995,668,0,665,1,"MASK"],[1999,679,0,678,1,"STYLE_MODEL"],[2000,663,4,679,0,"*"],[2001,663,3,680,0,"*"],[2002,678,0,661,1,"CONDITIONING"],[2003,681,0,678,2,"CLIP_VISION_OUTPUT"],[2004,680,0,681,0,"CLIP_VISION"],[2005,662,0,678,0,"CONDITIONING"],[2006,662,0,682,0,"CONDITIONING"],[2007,679,0,682,1,"STYLE_MODEL"],[2008,683,0,682,2,"CLIP_VISION_OUTPUT"],[2009,680,0,683,0,"CLIP_VISION"],[2020,682,0,596,1,"CONDITIONING"],[2028,694,0,681,1,"IMAGE"],[2035,690,0,683,1,"IMAGE"],[2036,665,0,661,2,"MASK"],[2038,665,0,700,0,"MASK"],[2041,700,0,670,0,"MASK"],[2042,700,0,596,2,"MASK"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7449402268886907,"offset":[2753.5015634091214,978.5823037629943]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux regional redux (3 zone, nested).json
================================================
{"last_node_id":720,"last_link_id":2082,"nodes":[{"id":13,"type":"Reroute","pos":[1300,-790],"size":[75,26],"flags":{},"order":18,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1300,-750],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1706,1939],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":541,"type":"CLIPTextEncode","pos":[692.1508178710938,183.7528839111328],"size":[265.775390625,113.01970672607422],"flags":{},"order":19,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1706}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1732],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":14,"type":"Reroute","pos":[1300,-710],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":680,"type":"Reroute","pos":[1310,-660],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":2001}],"outputs":[{"name":"","type":"CLIP_VISION","links":[2004,2009,2043]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":678,"type":"StyleModelApply","pos":[101.3630142211914,-560.2020874023438],"size":[262,122],"flags":{},"order":28,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2005},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1999},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2003}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2002],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":681,"type":"CLIPVisionEncode","pos":[-173.92124938964844,-524.1537475585938],"size":[253.60000610351562,78],"flags":{},"order":22,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2004},{"name":"image","localized_name":"image","type":"IMAGE","link":2082}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2003]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":663,"type":"FluxLoader","pos":[654.6221923828125,-858.3792724609375],"size":[374.41741943359375,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[2001],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[2000],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":664,"type":"ReFluxPatcher","pos":[1064.7325439453125,-863.0516967773438],"size":[210,82],"flags":{},"order":11,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":679,"type":"Reroute","pos":[1300,-610],"size":[75,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"","type":"*","link":2000}],"outputs":[{"name":"","type":"STYLE_MODEL","links":[1999,2007,2046]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":662,"type":"CLIPTextEncode","pos":[-140.3179168701172,-670.337158203125],"size":[210,88],"flags":{"collapsed":false},"order":20,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2005,2006,2045],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":40,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":683,"type":"CLIPVisionEncode","pos":[-170,-220],"size":[253.60000610351562,78],"flags":{},"order":23,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2009},{"name":"image","localized_name":"image","type":"IMAGE","link":2062}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2008]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":682,"type":"StyleModelApply","pos":[100,-250],"size":[262,122],"flags":{},"order":29,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2006},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2007},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2008}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2020],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":706,"type":"CLIPVisionEncode","pos":[-180,180],"size":[253.60000610351562,78],"flags":{},"order":24,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2043},{"name":"image","localized_name":"image","type":"IMAGE","link":2081}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2047]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[696.7778930664062,-164.97328186035156],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":21,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":690,"type":"LoadImage","pos":[-549.7396240234375,-227.43971252441406],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2062],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00464_.png","image"]},{"id":704,"type":"Note","pos":[324.8023986816406,-781.4505004882812],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":703,"type":"Note","pos":[384.9622802734375,346.1895751953125],"size":[278.04071044921875,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask."],"color":"#432","bgcolor":"#653"},{"id":397,"type":"VAEDecode","pos":[1403.6392822265625,-371.9699401855469],"size":[210,46],"flags":{},"order":39,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2077},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":710,"type":"MaskPreview","pos":[-809.6506958007812,-582.2230834960938],"size":[210,246],"flags":{},"order":26,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":715,"type":"SolidMask","pos":[-1501.8455810546875,-483.931884765625],"size":[210,106],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2064,2073],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1536,1536]},{"id":667,"type":"MaskPreview","pos":[-800.4617309570312,225.60794067382812],"size":[210,246],"flags":{},"order":31,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1969}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":676,"type":"InvertMask","pos":[-1225.793212890625,220.8433380126953],"size":[140,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2073}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1991],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":719,"type":"MaskPreview","pos":[-806.2830810546875,-181.18017578125],"size":[210,246],"flags":{},"order":34,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2072}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":717,"type":"MaskComposite","pos":[-1232.8262939453125,-171.98712158203125],"size":[210,126],"flags":{},"order":27,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2068},{"name":"source","localized_name":"source","type":"MASK","link":2069}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2071],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[512,512,"add"]},{"id":718,"type":"SolidMask","pos":[-1510.0887451171875,-5.13049840927124],"size":[210,106],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2069,2076],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,512,512]},{"id":716,"type":"SolidMask","pos":[-1504.66015625,-322.68243408203125],"size":[210,106],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2065],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1024,1024]},{"id":701,"type":"Note","pos":[-1262.5018310546875,-634.6495971679688],"size":[342.05950927734375,88],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[-1551.669921875,-639.0407104492188],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":720,"type":"InvertMask","pos":[-989.771240234375,-173.28375244140625],"size":[140,26],"flags":{},"order":32,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2071}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2072,2078],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":709,"type":"MaskComposite","pos":[-1250.3681640625,-473.0709228515625],"size":[210,126],"flags":{},"order":17,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2064},{"name":"source","localized_name":"source","type":"MASK","link":2065}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2068,2079],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[256,256,"subtract"]},{"id":665,"type":"MaskComposite","pos":[-1049.337646484375,223.26406860351562],"size":[210,126],"flags":{},"order":25,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":1991},{"name":"source","localized_name":"source","type":"MASK","link":2076}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1969,2080],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[512,512,"add"]},{"id":705,"type":"LoadImage","pos":[-548.5830688476562,-622.7470092773438],"size":[315,314],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2082],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00479_.png","image"]},{"id":694,"type":"LoadImage","pos":[-545.7549438476562,175.12576293945312],"size":[315,314],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2081],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image Apr 29, 2025, 08_07_01 PM.png","image"]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":38,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1735},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1732},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2077],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","bong_tangent",30,-1,1,1,109,"fixed","standard",true]},{"id":560,"type":"ClownRegionalConditionings","pos":[676.1644897460938,-499.31219482421875],"size":[278.4758605957031,266],"flags":{},"order":37,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1938},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[1735],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditionings"},"widgets_values":[0.5,1,15,"beta57",0,30,"boolean",false]},{"id":707,"type":"StyleModelApply","pos":[95.6487045288086,150],"size":[262,122],"flags":{},"order":30,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2045},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2046},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2047}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2048],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":708,"type":"ClownRegionalConditioning","pos":[404.6683044433594,155.1585693359375],"size":[211.60000610351562,122],"flags":{},"order":33,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":null},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2048},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2080}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[2050],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]},{"id":661,"type":"ClownRegionalConditioning","pos":[409.5088806152344,-556.8058471679688],"size":[211.60000610351562,122],"flags":{},"order":36,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1937},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2002},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2079}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1938],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]},{"id":596,"type":"ClownRegionalConditioning","pos":[407.416748046875,-245.54579162597656],"size":[211.60000610351562,122],"flags":{},"order":35,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":2050},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2078}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1937],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1706,490,0,541,0,"CLIP"],[1732,541,0,401,2,"CONDITIONING"],[1735,560,0,401,1,"CONDITIONING"],[1937,596,0,661,0,"COND_REGIONS"],[1938,661,0,560,0,"COND_REGIONS"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[1969,665,0,667,0,"MASK"],[1991,676,0,665,0,"MASK"],[1999,679,0,678,1,"STYLE_MODEL"],[2000,663,4,679,0,"*"],[2001,663,3,680,0,"*"],[2002,678,0,661,1,"CONDITIONING"],[2003,681,0,678,2,"CLIP_VISION_OUTPUT"],[2004,680,0,681,0,"CLIP_VISION"],[2005,662,0,678,0,"CONDITIONING"],[2006,662,0,682,0,"CONDITIONING"],[2007,679,0,682,1,"STYLE_MODEL"],[2008,683,0,682,2,"CLIP_VISION_OUTPUT"],[2009,680,0,683,0,"CLIP_VISION"],[2020,682,0,596,1,"CONDITIONING"],[2043,680,0,706,0,"CLIP_VISION"],[2045,662,0,707,0,"CONDITIONING"],[2046,679,0,707,1,"STYLE_MODEL"],[2047,706,0,707,2,"CLIP_VISION_OUTPUT"],[2048,707,0,708,1,"CONDITIONING"],[2050,708,0,596,0,"COND_REGIONS"],[2054,709,0,710,0,"MASK"],[2062,690,0,683,1,"IMAGE"],[2064,715,0,709,0,"MASK"],[2065,716,0,709,1,"MASK"],[2068,709,0,717,0,"MASK"],[2069,718,0,717,1,"MASK"],[2071,717,0,720,0,"MASK"],[2072,720,0,719,0,"MASK"],[2073,715,0,676,0,"MASK"],[2076,718,0,665,1,"MASK"],[2077,401,0,397,0,"LATENT"],[2078,720,0,596,2,"MASK"],[2079,709,0,661,2,"MASK"],[2080,665,0,708,2,"MASK"],[2081,694,0,706,1,"IMAGE"],[2082,705,0,681,1,"IMAGE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.4420993610650337,"offset":[3089.9291694729854,951.347346350063]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux regional redux (3 zone, overlapping).json
================================================
{"last_node_id":715,"last_link_id":2063,"nodes":[{"id":13,"type":"Reroute","pos":[1300,-790],"size":[75,26],"flags":{},"order":17,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1300,-750],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1706,1939],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":541,"type":"CLIPTextEncode","pos":[692.1508178710938,183.7528839111328],"size":[265.775390625,113.01970672607422],"flags":{},"order":18,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1706}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1732],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":14,"type":"Reroute","pos":[1300,-710],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":680,"type":"Reroute","pos":[1310,-660],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":2001}],"outputs":[{"name":"","type":"CLIP_VISION","links":[2004,2009,2043]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":678,"type":"StyleModelApply","pos":[101.3630142211914,-560.2020874023438],"size":[262,122],"flags":{},"order":26,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2005},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1999},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2003}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2002],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":681,"type":"CLIPVisionEncode","pos":[-173.92124938964844,-524.1537475585938],"size":[253.60000610351562,78],"flags":{},"order":21,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2004},{"name":"image","localized_name":"image","type":"IMAGE","link":2028}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2003]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":676,"type":"InvertMask","pos":[-1270,-450],"size":[140,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1990}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1991,2051],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":667,"type":"MaskPreview","pos":[-840,-570],"size":[210,246],"flags":{},"order":29,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1969}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":661,"type":"ClownRegionalConditioning","pos":[411.9298095703125,-539.053955078125],"size":[211.60000610351562,122],"flags":{},"order":35,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1937},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2002},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2036}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1938],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":701,"type":"Note","pos":[-1378.6959228515625,-637.0702514648438],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":663,"type":"FluxLoader","pos":[654.6221923828125,-858.3792724609375],"size":[374.41741943359375,282],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[2001],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[2000],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":664,"type":"ReFluxPatcher","pos":[1064.7325439453125,-863.0516967773438],"size":[210,82],"flags":{},"order":11,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":679,"type":"Reroute","pos":[1300,-610],"size":[75,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"","type":"*","link":2000}],"outputs":[{"name":"","type":"STYLE_MODEL","links":[1999,2007,2046]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":662,"type":"CLIPTextEncode","pos":[-140.3179168701172,-670.337158203125],"size":[210,88],"flags":{"collapsed":false},"order":19,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2005,2006,2045],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":39,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":683,"type":"CLIPVisionEncode","pos":[-170,-220],"size":[253.60000610351562,78],"flags":{},"order":22,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2009},{"name":"image","localized_name":"image","type":"IMAGE","link":2062}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2008]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":682,"type":"StyleModelApply","pos":[100,-250],"size":[262,122],"flags":{},"order":27,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2006},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2007},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2008}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2020],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":596,"type":"ClownRegionalConditioning","pos":[425.9762268066406,-243.12513732910156],"size":[211.60000610351562,122],"flags":{},"order":34,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":2050},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2042}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1937],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":706,"type":"CLIPVisionEncode","pos":[-180,180],"size":[253.60000610351562,78],"flags":{},"order":23,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2043},{"name":"image","localized_name":"image","type":"IMAGE","link":2061}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2047]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":707,"type":"StyleModelApply","pos":[90,150],"size":[262,122],"flags":{},"order":28,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2045},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2046},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2047}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2048],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":708,"type":"ClownRegionalConditioning","pos":[420,160],"size":[211.60000610351562,122],"flags":{},"order":32,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":null},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2048},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2057}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[2050],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":665,"type":"MaskComposite","pos":[-1100,-450],"size":[210,126],"flags":{},"order":24,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":1991},{"name":"source","localized_name":"source","type":"MASK","link":1995}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1969,2036,2038],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[0,0,"add"]},{"id":670,"type":"MaskPreview","pos":[-840.8076782226562,-235.62042236328125],"size":[210,246],"flags":{},"order":33,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2041}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":700,"type":"MaskFlip+","pos":[-1099.420166015625,-236.15890502929688],"size":[210,58],"flags":{},"order":30,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2038}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2041,2042],"slot_index":0}],"properties":{"Node name for S&R":"MaskFlip+"},"widgets_values":["x"]},{"id":710,"type":"MaskPreview","pos":[-847.5751953125,166.58413696289062],"size":[210,246],"flags":{},"order":31,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":397,"type":"VAEDecode","pos":[1403.6392822265625,-371.9699401855469],"size":[210,46],"flags":{},"order":38,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2056},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":37,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1735},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1732},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2056],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","bong_tangent",20,-1,1,1,109,"fixed","standard",true]},{"id":7,"type":"VAEEncodeAdvanced","pos":[696.7778930664062,-164.97328186035156],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":20,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":694,"type":"LoadImage","pos":[-536.0714111328125,-640.6544189453125],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2028],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image Apr 29, 2025, 08_07_01 PM.png","image"]},{"id":666,"type":"SolidMask","pos":[-1500,-450],"size":[210,106],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1990],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1536,512]},{"id":712,"type":"Note","pos":[-1511.985107421875,-66.87181854248047],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":668,"type":"SolidMask","pos":[-1502.6644287109375,-289.3330993652344],"size":[210,106],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1995],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,512,512]},{"id":690,"type":"LoadImage","pos":[-549.7396240234375,-227.43971252441406],"size":[315,314],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2062],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00464_.png","image"]},{"id":705,"type":"LoadImage","pos":[-551.003662109375,157.5296173095703],"size":[315,314],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2061],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00479_.png","image"]},{"id":560,"type":"ClownRegionalConditionings","pos":[676.1644897460938,-499.31219482421875],"size":[278.4758605957031,266],"flags":{},"order":36,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1938},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[1735],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditionings"},"widgets_values":[0.5,1,10,"beta57",0,20,"boolean",false]},{"id":704,"type":"Note","pos":[324.8023986816406,-781.4505004882812],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":715,"type":"SolidMask","pos":[-1486.6612548828125,192.47415161132812],"size":[210,106],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2063],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1280,512]},{"id":709,"type":"MaskComposite","pos":[-1104.1712646484375,170.6186981201172],"size":[210,126],"flags":{},"order":25,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2051},{"name":"source","localized_name":"source","type":"MASK","link":2063}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2057],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[128,0,"add"]},{"id":703,"type":"Note","pos":[384.9622802734375,346.1895751953125],"size":[278.04071044921875,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1706,490,0,541,0,"CLIP"],[1732,541,0,401,2,"CONDITIONING"],[1735,560,0,401,1,"CONDITIONING"],[1937,596,0,661,0,"COND_REGIONS"],[1938,661,0,560,0,"COND_REGIONS"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[1969,665,0,667,0,"MASK"],[1990,666,0,676,0,"MASK"],[1991,676,0,665,0,"MASK"],[1995,668,0,665,1,"MASK"],[1999,679,0,678,1,"STYLE_MODEL"],[2000,663,4,679,0,"*"],[2001,663,3,680,0,"*"],[2002,678,0,661,1,"CONDITIONING"],[2003,681,0,678,2,"CLIP_VISION_OUTPUT"],[2004,680,0,681,0,"CLIP_VISION"],[2005,662,0,678,0,"CONDITIONING"],[2006,662,0,682,0,"CONDITIONING"],[2007,679,0,682,1,"STYLE_MODEL"],[2008,683,0,682,2,"CLIP_VISION_OUTPUT"],[2009,680,0,683,0,"CLIP_VISION"],[2020,682,0,596,1,"CONDITIONING"],[2028,694,0,681,1,"IMAGE"],[2036,665,0,661,2,"MASK"],[2038,665,0,700,0,"MASK"],[2041,700,0,670,0,"MASK"],[2042,700,0,596,2,"MASK"],[2043,680,0,706,0,"CLIP_VISION"],[2045,662,0,707,0,"CONDITIONING"],[2046,679,0,707,1,"STYLE_MODEL"],[2047,706,0,707,2,"CLIP_VISION_OUTPUT"],[2048,707,0,708,1,"CONDITIONING"],[2050,708,0,596,0,"COND_REGIONS"],[2051,676,0,709,0,"MASK"],[2054,709,0,710,0,"MASK"],[2056,401,0,397,0,"LATENT"],[2057,709,0,708,2,"MASK"],[2061,705,0,706,1,"IMAGE"],[2062,690,0,683,1,"IMAGE"],[2063,715,0,709,1,"MASK"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.5863092971715371,"offset":[2841.6279889989714,922.4028503570233]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux regional redux (3 zones).json
================================================
{"last_node_id":714,"last_link_id":2062,"nodes":[{"id":13,"type":"Reroute","pos":[1300,-790],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1300,-750],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1706,1939],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":541,"type":"CLIPTextEncode","pos":[692.1508178710938,183.7528839111328],"size":[265.775390625,113.01970672607422],"flags":{},"order":17,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1706}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1732],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":14,"type":"Reroute","pos":[1300,-710],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":680,"type":"Reroute","pos":[1310,-660],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":2001}],"outputs":[{"name":"","type":"CLIP_VISION","links":[2004,2009,2043]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":678,"type":"StyleModelApply","pos":[101.3630142211914,-560.2020874023438],"size":[262,122],"flags":{},"order":25,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2005},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1999},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2003}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2002],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":681,"type":"CLIPVisionEncode","pos":[-173.92124938964844,-524.1537475585938],"size":[253.60000610351562,78],"flags":{},"order":20,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2004},{"name":"image","localized_name":"image","type":"IMAGE","link":2028}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2003]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":560,"type":"ClownRegionalConditionings","pos":[676.1644897460938,-499.31219482421875],"size":[278.4758605957031,266],"flags":{},"order":35,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1938},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[1735],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditionings"},"widgets_values":[0.5,1,14,"beta57",0,20,"boolean",false]},{"id":676,"type":"InvertMask","pos":[-1270,-450],"size":[140,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1990}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1991,2051],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":667,"type":"MaskPreview","pos":[-840,-570],"size":[210,246],"flags":{},"order":28,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1969}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":661,"type":"ClownRegionalConditioning","pos":[411.9298095703125,-539.053955078125],"size":[211.60000610351562,122],"flags":{},"order":34,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1937},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2002},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2036}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1938],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":701,"type":"Note","pos":[-1378.6959228515625,-637.0702514648438],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":663,"type":"FluxLoader","pos":[654.6221923828125,-858.3792724609375],"size":[374.41741943359375,282],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[2001],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[2000],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":664,"type":"ReFluxPatcher","pos":[1064.7325439453125,-863.0516967773438],"size":[210,82],"flags":{},"order":10,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":679,"type":"Reroute","pos":[1300,-610],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":2000}],"outputs":[{"name":"","type":"STYLE_MODEL","links":[1999,2007,2046]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":662,"type":"CLIPTextEncode","pos":[-140.3179168701172,-670.337158203125],"size":[210,88],"flags":{"collapsed":false},"order":18,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2005,2006,2045],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":704,"type":"Note","pos":[324.8023986816406,-781.4505004882812],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step, and end_step."],"color":"#432","bgcolor":"#653"},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":38,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":703,"type":"Note","pos":[-84.50921630859375,-859.7656860351562],"size":[278.04071044921875,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask."],"color":"#432","bgcolor":"#653"},{"id":683,"type":"CLIPVisionEncode","pos":[-170,-220],"size":[253.60000610351562,78],"flags":{},"order":21,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2009},{"name":"image","localized_name":"image","type":"IMAGE","link":2062}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2008]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":682,"type":"StyleModelApply","pos":[100,-250],"size":[262,122],"flags":{},"order":26,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2006},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2007},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2008}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2020],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":596,"type":"ClownRegionalConditioning","pos":[425.9762268066406,-243.12513732910156],"size":[211.60000610351562,122],"flags":{},"order":33,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":2050},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2042}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1937],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":706,"type":"CLIPVisionEncode","pos":[-180,180],"size":[253.60000610351562,78],"flags":{},"order":22,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2043},{"name":"image","localized_name":"image","type":"IMAGE","link":2061}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2047]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":707,"type":"StyleModelApply","pos":[90,150],"size":[262,122],"flags":{},"order":27,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2045},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2046},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2047}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2048],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":708,"type":"ClownRegionalConditioning","pos":[420,160],"size":[211.60000610351562,122],"flags":{},"order":31,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":null},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2048},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2057}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[2050],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":665,"type":"MaskComposite","pos":[-1100,-450],"size":[210,126],"flags":{},"order":23,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":1991},{"name":"source","localized_name":"source","type":"MASK","link":1995}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1969,2036,2038],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[0,0,"add"]},{"id":670,"type":"MaskPreview","pos":[-840.8076782226562,-235.62042236328125],"size":[210,246],"flags":{},"order":32,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2041}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":700,"type":"MaskFlip+","pos":[-1099.420166015625,-236.15890502929688],"size":[210,58],"flags":{},"order":29,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2038}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2041,2042],"slot_index":0}],"properties":{"Node name for S&R":"MaskFlip+"},"widgets_values":["x"]},{"id":710,"type":"MaskPreview","pos":[-847.5751953125,166.58413696289062],"size":[210,246],"flags":{},"order":30,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":397,"type":"VAEDecode","pos":[1403.6392822265625,-371.9699401855469],"size":[210,46],"flags":{},"order":37,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2056},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":36,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1735},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1732},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2056],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","bong_tangent",20,-1,1,1,109,"fixed","standard",true]},{"id":7,"type":"VAEEncodeAdvanced","pos":[696.7778930664062,-164.97328186035156],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":19,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":694,"type":"LoadImage","pos":[-536.0714111328125,-640.6544189453125],"size":[315,314],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2028],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image Apr 29, 2025, 08_07_01 PM.png","image"]},{"id":666,"type":"SolidMask","pos":[-1500,-450],"size":[210,106],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1990],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1536,512]},{"id":712,"type":"Note","pos":[-1511.985107421875,-66.87181854248047],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":709,"type":"MaskComposite","pos":[-1104.1712646484375,170.6186981201172],"size":[210,126],"flags":{},"order":24,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2051},{"name":"source","localized_name":"source","type":"MASK","link":2060}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2057],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[512,0,"add"]},{"id":668,"type":"SolidMask","pos":[-1502.6644287109375,-289.3330993652344],"size":[210,106],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1995,2060],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,512,512]},{"id":690,"type":"LoadImage","pos":[-549.7396240234375,-227.43971252441406],"size":[315,314],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2062],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00464_.png","image"]},{"id":705,"type":"LoadImage","pos":[-551.003662109375,157.5296173095703],"size":[315,314],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2061],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00479_.png","image"]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1706,490,0,541,0,"CLIP"],[1732,541,0,401,2,"CONDITIONING"],[1735,560,0,401,1,"CONDITIONING"],[1937,596,0,661,0,"COND_REGIONS"],[1938,661,0,560,0,"COND_REGIONS"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[1969,665,0,667,0,"MASK"],[1990,666,0,676,0,"MASK"],[1991,676,0,665,0,"MASK"],[1995,668,0,665,1,"MASK"],[1999,679,0,678,1,"STYLE_MODEL"],[2000,663,4,679,0,"*"],[2001,663,3,680,0,"*"],[2002,678,0,661,1,"CONDITIONING"],[2003,681,0,678,2,"CLIP_VISION_OUTPUT"],[2004,680,0,681,0,"CLIP_VISION"],[2005,662,0,678,0,"CONDITIONING"],[2006,662,0,682,0,"CONDITIONING"],[2007,679,0,682,1,"STYLE_MODEL"],[2008,683,0,682,2,"CLIP_VISION_OUTPUT"],[2009,680,0,683,0,"CLIP_VISION"],[2020,682,0,596,1,"CONDITIONING"],[2028,694,0,681,1,"IMAGE"],[2036,665,0,661,2,"MASK"],[2038,665,0,700,0,"MASK"],[2041,700,0,670,0,"MASK"],[2042,700,0,596,2,"MASK"],[2043,680,0,706,0,"CLIP_VISION"],[2045,662,0,707,0,"CONDITIONING"],[2046,679,0,707,1,"STYLE_MODEL"],[2047,706,0,707,2,"CLIP_VISION_OUTPUT"],[2048,707,0,708,1,"CONDITIONING"],[2050,708,0,596,0,"COND_REGIONS"],[2051,676,0,709,0,"MASK"],[2054,709,0,710,0,"MASK"],[2056,401,0,397,0,"LATENT"],[2057,709,0,708,2,"MASK"],[2060,668,0,709,1,"MASK"],[2061,705,0,706,1,"IMAGE"],[2062,690,0,683,1,"IMAGE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.586309297171537,"offset":[2736.1731738476205,939.9577246808323]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux style antiblur.json
================================================
{"last_node_id":739,"last_link_id":2113,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":5,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1939],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":6,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":13,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":663,"type":"FluxLoader","pos":[630,-720],"size":[374.41741943359375,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae",".none",".none"]},{"id":664,"type":"ReFluxPatcher","pos":[1040,-720],"size":[210,82],"flags":{},"order":4,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":12,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":7,"type":"VAEEncodeAdvanced","pos":[412.2475280761719,-199.0681915283203],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":9,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2113},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2100],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":662,"type":"CLIPTextEncode","pos":[761.3005981445312,-357.2689208984375],"size":[210,102.54972839355469],"flags":{"collapsed":false},"order":8,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2098],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat, a college campus, brick buildings"]},{"id":727,"type":"Note","pos":[412.8926086425781,-351.8606872558594],"size":[272.4425048828125,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This approach can be combined with the regional conditioning anti-blur approach for an even more powerful effect."],"color":"#432","bgcolor":"#653"},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":11,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2098},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2099},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,1,7,"fixed","standard",true]},{"id":724,"type":"ClownGuide_Style_Beta","pos":[703.7374267578125,-198.63233947753906],"size":[262.8634033203125,286],"flags":{},"order":10,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2100},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2099],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,10,false]},{"id":739,"type":"LoadImage","pos":[70.82455444335938,-201.66342163085938],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2113],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (655).png","image"]},{"id":726,"type":"Note","pos":[415.7740478515625,153.59271240234375],"size":[364.5906677246094,164.38613891601562],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The best style guide images will share the lighting and color composition of your desired scene. Some are just inexplicably ineffective at killing blur. Just gather up a bunch of images to try, you'll find some good ones that can be reused for many things. I'm including the one used here in the example_workflows directory, be sure to check for it.\n\nAnd don't forget to change seeds. Don't optimize for one seed only. Don't get stuck on one seed! Sometimes one is just not going to work out for whatever you're doing."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[2096,401,0,397,0,"LATENT"],[2098,662,0,401,1,"CONDITIONING"],[2099,724,0,401,5,"GUIDES"],[2100,7,0,724,0,"LATENT"],[2113,739,0,7,0,"IMAGE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.91943424957756,"offset":[1140.4413839969193,798.117449447068]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux style transfer gguf.json
================================================
{"last_node_id":1392,"last_link_id":3739,"nodes":[{"id":13,"type":"Reroute","pos":[13508.9013671875,-109.2831802368164],"size":[75,26],"flags":{},"order":20,"mode":0,"inputs":[{"name":"","type":"*","link":3737}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[13508.9013671875,-29.283178329467773],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":3739}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[13508.9013671875,-69.28317260742188],"size":[75,26],"flags":{},"order":17,"mode":0,"inputs":[{"name":"","type":"*","link":3738}],"outputs":[{"name":"","type":"CLIP","links":[2881,3581],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[14108.255859375,675.60693359375],"size":[246.31312561035156,286],"flags":{},"order":29,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3709},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3699}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3604],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[13218.9013671875,-309.28314208984375],"size":[260.3999938964844,126],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2692],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":970,"type":"CLIPTextEncode","pos":[13688.255859375,165.60690307617188],"size":[281.9206848144531,109.87118530273438],"flags":{},"order":21,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3627],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, jpeg artifacts, low quality, bad quality, unsharp"]},{"id":1378,"type":"Reroute","pos":[13184.07421875,533.128662109375],"size":[75,26],"flags":{},"order":19,"mode":0,"inputs":[{"name":"","type":"*","link":3721}],"outputs":[{"name":"","type":"IMAGE","links":[3724,3729],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1379,"type":"Reroute","pos":[13185.853515625,168.15780639648438],"size":[75,26],"flags":{},"order":18,"mode":0,"inputs":[{"name":"","type":"*","link":3725}],"outputs":[{"name":"","type":"IMAGE","links":[3726],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[15220,-259.5838928222656],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":34,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13400,560],"size":[261.2217712402344,298],"flags":{"collapsed":true},"order":26,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3688},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3727},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18},{"name":"width","type":"INT","pos":[10,160.00003051757812],"widget":{"name":"width"},"link":3732},{"name":"height","type":"INT","pos":[10,184.00003051757812],"widget":{"name":"height"},"link":3733}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983,3710],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3709],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1371,"type":"Image Repeat Tile To Size","pos":[13390,500],"size":[210,146],"flags":{"collapsed":true},"order":23,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3726},{"name":"width","type":"INT","pos":[10,36],"widget":{"name":"width"},"link":3730},{"name":"height","type":"INT","pos":[10,60],"widget":{"name":"height"},"link":3731}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3727,3728],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":1380,"type":"SetImageSize","pos":[13380,320],"size":[210,102],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[3730,3732],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[3731,3733],"slot_index":1}],"properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1344,768]},{"id":1377,"type":"Image Comparer (rgthree)","pos":[15742.4619140625,-253.3526153564453],"size":[461.9190368652344,413.5953369140625],"flags":{},"order":35,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":3720},{"name":"image_b","type":"IMAGE","dir":3,"link":3729}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_clqis_00009_.png&type=temp&subfolder=&rand=0.8606788093916207"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_clqis_00010_.png&type=temp&subfolder=&rand=0.7775594190958295"}]]},{"id":908,"type":"VAEDecode","pos":[15217.7802734375,-312.1965637207031],"size":[210,46],"flags":{"collapsed":true},"order":33,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697,3720],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1383,"type":"Note","pos":[14428.40234375,580.1749877929688],"size":[261.9539489746094,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Samplers like res_2s in this cycling node will also work and are faster. res_2m and res_3m are even faster, but sometimes the effect takes longer in wall time to fully kick in."],"color":"#432","bgcolor":"#653"},{"id":1384,"type":"Note","pos":[14793.0322265625,518.4120483398438],"size":[261.9539489746094,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_2m or res_3m can be used here instead and are faster, but are less likely to fully clean up lingering artifacts."],"color":"#432","bgcolor":"#653"},{"id":1385,"type":"Note","pos":[14398.345703125,768.2096557617188],"size":[261.9539489746094,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["method = AdaIN is faster and uses less memory, but is less accurate. Some prefer the effect."],"color":"#432","bgcolor":"#653"},{"id":1328,"type":"ClownOptions_SDE_Beta","pos":[14186.4755859375,-132.6126251220703],"size":[315,266],"flags":{"collapsed":true},"order":4,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3707],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1381,"type":"Note","pos":[13881.6279296875,-217.62835693359375],"size":[261.9539489746094,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease \"steps_to_run\" in ClownsharKSampler to change the effective denoise level."],"color":"#432","bgcolor":"#653"},{"id":1382,"type":"Note","pos":[14718.0498046875,-295.4144592285156],"size":[268.1851806640625,124.49711608886719],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increasing cycles will increase the amount of change, but take longer.\n\nCycles will rerun the same step over and over, forwards and backwards, iteratively refining an image at a controlled noise level."],"color":"#432","bgcolor":"#653"},{"id":1387,"type":"ReFluxPatcher","pos":[13262.294921875,-130.79653930664062],"size":[210,82],"flags":{},"order":15,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3736}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3737],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":1386,"type":"UnetLoaderGGUF","pos":[12817.208984375,-323.9640808105469],"size":[315,58],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[3736],"slot_index":0}],"properties":{"Node name for S&R":"UnetLoaderGGUF"},"widgets_values":["flux1-dev-Q4_K_S.gguf"]},{"id":1389,"type":"VAELoader","pos":[12824.330078125,-56.021827697753906],"size":[315,58],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[3739],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["ae.sft"]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[14378.255859375,-64.39308166503906],"size":[340.20001220703125,570],"flags":{},"order":31,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3626},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3627},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3578},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3604},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":3707},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3698],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",1,1,"resample",true]},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14758.255859375,-64.39308166503906],"size":[340.20001220703125,510],"flags":{},"order":32,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3698},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,1,"resample",true]},{"id":1388,"type":"DualCLIPLoaderGGUF","pos":[12819.8798828125,-213.58253479003906],"size":[315,106],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[3738],"slot_index":0}],"properties":{"Node name for S&R":"DualCLIPLoaderGGUF"},"widgets_values":["clip_l_flux.safetensors","t5xxl_fp16.safetensors","flux"]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[14008.255859375,-64.39308166503906],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":30,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2692},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3602},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3708},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3578],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","beta57",20,14,1,1,201,"fixed","unsample",true]},{"id":1333,"type":"CLIPTextEncode","pos":[13688.255859375,-44.393089294433594],"size":[280.6252746582031,164.06936645507812],"flags":{"collapsed":false},"order":22,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3581}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3602,3626],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["black and white anime cartoon of the inside of a car driving down a creepy road"]},{"id":1374,"type":"LoadImage","pos":[12805.896484375,167.56053161621094],"size":[315,314],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3725],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Style Guide)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14651_.png","image"]},{"id":1373,"type":"LoadImage","pos":[12810.2314453125,534.0346069335938],"size":[315,314],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3721],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Composition)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (476).png","image"]},{"id":1362,"type":"PreviewImage","pos":[13380,620],"size":[210,246],"flags":{},"order":25,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3682}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1390,"type":"Note","pos":[13148.0439453125,257.643310546875],"size":[210,88],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Color Match SOMETIMES helps accelerate style transfer.\n"],"color":"#432","bgcolor":"#653"},{"id":1318,"type":"ClownGuide_Beta","pos":[13828.255859375,675.60693359375],"size":[263.102783203125,290],"flags":{},"order":27,"mode":4,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3710},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3699,3708],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.7,1,"constant",0,-1,false]},{"id":1376,"type":"Note","pos":[13710.3271484375,473.56817626953125],"size":[265.1909484863281,137.36415100097656],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease weight in ClownGuide to alter adherence to the input image.\n\nFor now, set to low weights or bypass if using any model except HiDream. The HiDream code was adapted so that this composition guide doesn't fight the style guide. Others will be added soon."],"color":"#432","bgcolor":"#653"},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[14418.0478515625,-325.06365966796875],"size":[265.2884826660156,202],"flags":{},"order":14,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,-1,"none",-1,1,false]},{"id":1350,"type":"ColorMatch","pos":[13380,160],"size":[210,102],"flags":{"collapsed":false},"order":24,"mode":0,"inputs":[{"name":"image_ref","localized_name":"image_ref","type":"IMAGE","link":3728},{"name":"image_target","localized_name":"image_target","type":"IMAGE","link":3724}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3682,3688],"slot_index":0}],"properties":{"Node name for S&R":"ColorMatch"},"widgets_values":["mkl",0]}],"links":[[18,14,0,7,4,"VAE"],[1395,13,0,431,0,"MODEL"],[1398,7,3,431,1,"LATENT"],[2692,431,0,907,0,"MODEL"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2983,7,0,907,3,"LATENT"],[3469,981,0,908,0,"LATENT"],[3533,1317,0,980,6,"OPTIONS"],[3578,907,0,980,4,"LATENT"],[3581,490,0,1333,0,"CLIP"],[3602,1333,0,907,1,"CONDITIONING"],[3604,1308,0,980,5,"GUIDES"],[3626,1333,0,980,1,"CONDITIONING"],[3627,970,0,980,2,"CONDITIONING"],[3682,1350,0,1362,0,"IMAGE"],[3688,1350,0,7,0,"IMAGE"],[3698,980,0,981,4,"LATENT"],[3699,1318,0,1308,3,"GUIDES"],[3707,1328,0,980,7,"OPTIONS"],[3708,1318,0,907,5,"GUIDES"],[3709,7,1,1308,0,"LATENT"],[3710,7,0,1318,0,"LATENT"],[3720,908,0,1377,0,"IMAGE"],[3721,1373,0,1378,0,"*"],[3724,1378,0,1350,1,"IMAGE"],[3725,1374,0,1379,0,"*"],[3726,1379,0,1371,0,"IMAGE"],[3727,1371,0,7,1,"IMAGE"],[3728,1371,0,1350,0,"IMAGE"],[3729,1378,0,1377,1,"IMAGE"],[3730,1380,0,1371,1,"INT"],[3731,1380,1,1371,2,"INT"],[3732,1380,0,7,5,"INT"],[3733,1380,1,7,6,"INT"],[3736,1386,0,1387,0,"MODEL"],[3737,1387,0,13,0,"*"],[3738,1388,0,490,0,"*"],[3739,1389,0,14,0,"*"]],"groups":[{"id":1,"title":"Model Loaders","bounding":[12796.72265625,-401.9004211425781,822.762451171875,436.0693359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Sampling","bounding":[13652.6533203125,-402.70721435546875,1470.8076171875,1409.0289306640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Input Prep","bounding":[12797.1396484375,77.69412231445312,817.4218139648438,820.6239624023438],"color":"#3f789e","font_size":24,"flags":{}},{"id":4,"title":"Save and Compare","bounding":[15180.705078125,-399.09112548828125,1050.6468505859375,615.8845825195312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.4379222522564015,"offset":[-11124.689104031433,546.0824398349012]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/flux upscale thumbnail large multistage.json
================================================
{"last_node_id":431,"last_link_id":1176,"nodes":[{"id":361,"type":"CLIPVisionEncode","pos":[860,820],"size":[253.60000610351562,78],"flags":{},"order":17,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":1004},{"name":"image","localized_name":"image","type":"IMAGE","link":1107}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[1006],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":364,"type":"CLIPTextEncode","pos":[899.5093383789062,952.8309936523438],"size":[210,88],"flags":{},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1007}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1008,1055],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":369,"type":"ClownGuide_Style_Beta","pos":[1138.06640625,1574.328857421875],"size":[231.30213928222656,286],"flags":{},"order":25,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1101},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1099],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":374,"type":"ClownsharkChainsampler_Beta","pos":[2403.98583984375,1081.333740234375],"size":[274.9878234863281,528.6721801757812],"flags":{},"order":30,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1134},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1097},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1088],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,1,"resample",true]},{"id":372,"type":"SaveImage","pos":[2740,1080],"size":[442.38494873046875,530.0809936523438],"flags":{},"order":32,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1030}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":355,"type":"ModelSamplingAdvancedResolution","pos":[1134.0809326171875,1057.9874267578125],"size":[260.3999938964844,126],"flags":{},"order":24,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1047},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1111}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1024],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":368,"type":"ReFluxPatcher","pos":[897.4150390625,1095.9840087890625],"size":[210,82],"flags":{},"order":12,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1022}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1047],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":349,"type":"FluxLoader","pos":[554.6767578125,1099.277099609375],"size":[315,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1022,1144],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1007,1137],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1029,1038,1058,1155,1164,1168],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[1004,1135],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[1009,1172]}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":373,"type":"ClownsharkChainsampler_Beta","pos":[1740,1080],"size":[272.9876403808594,526.665771484375],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1118},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1031},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1099},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1044},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1053],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":370,"type":"ClownsharKSampler_Beta","pos":[1417.3414306640625,1078.0023193359375],"size":[277.65570068359375,627.99951171875],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1024},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1117},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1102},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1031],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,14,1,1,0,"fixed","unsample",true]},{"id":380,"type":"ClownsharkChainsampler_Beta","pos":[2078.66015625,1080.6669921875],"size":[263.6514892578125,527.99951171875],"flags":{},"order":29,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1053},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1051},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1097],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":403,"type":"Note","pos":[2098.053466796875,680.7237548828125],"size":[215.7804412841797,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Raise cycles here if you see halos. It doesn't hurt to go as high as 20. (About 20 seconds on a 4090 at 1024x1024)."],"color":"#432","bgcolor":"#653"},{"id":402,"type":"Note","pos":[1755.3779296875,678.1484985351562],"size":[241.524658203125,132.7487030029297],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Lower cycles here if you see halos.\n\nThese step(s)/cycle(s) (that use the ClownGuide Style node) are needed to prevent blurring when upscaling tiny thumbnail images."],"color":"#432","bgcolor":"#653"},{"id":382,"type":"ControlNetApplyAdvanced","pos":[1440,830],"size":[210,186],"flags":{},"order":23,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":1108},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":1055},{"name":"control_net","localized_name":"control_net","type":"CONTROL_NET","link":1056},{"name":"image","localized_name":"image","type":"IMAGE","link":1112},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1058}],"outputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","links":[1118],"slot_index":0},{"name":"negative","localized_name":"negative","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ControlNetApplyAdvanced"},"widgets_values":[1,0,1]},{"id":404,"type":"Image Repeat Tile To Size","pos":[899.620361328125,1259.9044189453125],"size":[210,106],"flags":{},"order":18,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1123}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[1124],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,false]},{"id":375,"type":"VAEEncodeAdvanced","pos":[1140,1240],"size":[228.90342712402344,278],"flags":{},"order":21,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1113},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":1124},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1038}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1102,1111],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[1101],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":null,"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":401,"type":"LoadImage","pos":[608.10400390625,1453.0382080078125],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1122],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (579).png","image"]},{"id":359,"type":"ControlNetLoader","pos":[596.1650390625,977.5371704101562],"size":[270.0880432128906,58],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"CONTROL_NET","localized_name":"CONTROL_NET","type":"CONTROL_NET","links":[1056,1162],"slot_index":0}],"properties":{"Node name for S&R":"ControlNetLoader"},"widgets_values":["flux_tile.safetensors"]},{"id":362,"type":"StyleModelApply","pos":[1138.0474853515625,827.8412475585938],"size":[270.06890869140625,122],"flags":{},"order":20,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":1008},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1009},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":1006}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1108,1117,1134],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":408,"type":"CLIPVisionEncode","pos":[3300,810],"size":[253.60000610351562,78],"flags":{},"order":19,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":1135},{"name":"image","localized_name":"image","type":"IMAGE","link":1176}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[1173],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":409,"type":"CLIPTextEncode","pos":[3340,940],"size":[210,88],"flags":{},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1137}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1161,1171],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":410,"type":"ClownGuide_Style_Beta","pos":[3570,1560],"size":[231.30213928222656,286],"flags":{},"order":38,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1138},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1147],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":411,"type":"ClownsharkChainsampler_Beta","pos":[4840,1070],"size":[274.9878234863281,528.6721801757812],"flags":{},"order":42,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1139},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1140},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1154],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,1,"resample",true]},{"id":412,"type":"SaveImage","pos":[5180,1070],"size":[442.38494873046875,530.0809936523438],"flags":{},"order":44,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1141}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":413,"type":"ModelSamplingAdvancedResolution","pos":[3570,1050],"size":[260.3999938964844,126],"flags":{},"order":37,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1142},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1143}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1149],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":414,"type":"ReFluxPatcher","pos":[3330,1080],"size":[210,82],"flags":{},"order":13,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1144}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1142],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":415,"type":"ClownsharkChainsampler_Beta","pos":[4180,1070],"size":[272.9876403808594,526.665771484375],"flags":{},"order":40,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1145},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1146},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1147},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1148},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1152],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":417,"type":"ClownsharkChainsampler_Beta","pos":[4510,1070],"size":[263.6514892578125,527.99951171875],"flags":{},"order":41,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1152},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1153},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1140],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":418,"type":"Note","pos":[4530,670],"size":[215.7804412841797,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Raise cycles here if you see halos. It doesn't hurt to go as high as 20. (About 20 seconds on a 4090 at 1024x1024)."],"color":"#432","bgcolor":"#653"},{"id":419,"type":"Note","pos":[4190,670],"size":[241.524658203125,132.7487030029297],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Lower cycles here if you see halos.\n\nThese step(s)/cycle(s) (that use the ClownGuide Style node) are needed to prevent blurring when upscaling tiny thumbnail images."],"color":"#432","bgcolor":"#653"},{"id":420,"type":"VAEDecode","pos":[5180,960],"size":[140,46],"flags":{},"order":43,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1154},{"name":"vae","localized_name":"vae","type":"VAE","link":1155}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1141,1169],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.26","widget_ue_connectable":{}},"widgets_values":[]},{"id":421,"type":"Reroute","pos":[3470,1450],"size":[75,26],"flags":{},"order":34,"mode":0,"inputs":[{"name":"","type":"*","link":1174}],"outputs":[{"name":"","type":"IMAGE","links":[1165,1166,1170]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":425,"type":"ControlNetApplyAdvanced","pos":[3880,820],"size":[210,186],"flags":{},"order":26,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":1160},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":1161},{"name":"control_net","localized_name":"control_net","type":"CONTROL_NET","link":1162},{"name":"image","localized_name":"image","type":"IMAGE","link":1175},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1164}],"outputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","links":[1145],"slot_index":0},{"name":"negative","localized_name":"negative","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ControlNetApplyAdvanced"},"widgets_values":[1,0,1]},{"id":429,"type":"Image Comparer (rgthree)","pos":[5170,1650],"size":[446.2193603515625,494.8704528808594],"flags":{},"order":45,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1169},{"name":"image_b","type":"IMAGE","dir":3,"link":1170}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_txgkm_00005_.png&type=temp&subfolder=&rand=0.44944358112719196"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_txgkm_00006_.png&type=temp&subfolder=&rand=0.15903319456700227"}]]},{"id":430,"type":"StyleModelApply","pos":[3570,820],"size":[270.06890869140625,122],"flags":{},"order":22,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":1171},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1172},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":1173}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1139,1150,1160],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":387,"type":"Image Comparer (rgthree)","pos":[2732.6875,1661.954833984375],"size":[446.2193603515625,494.8704528808594],"flags":{},"order":33,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1068},{"name":"image_b","type":"IMAGE","dir":3,"link":1115}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lvxiv_00017_.png&type=temp&subfolder=&rand=0.23193425033461956"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lvxiv_00018_.png&type=temp&subfolder=&rand=0.4600603671403143"}]]},{"id":416,"type":"ClownsharKSampler_Beta","pos":[3850,1070],"size":[277.65570068359375,627.99951171875],"flags":{},"order":39,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1149},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1150},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1151},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1146],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,14,1,1,0,"fixed","unsample",true]},{"id":427,"type":"Image Repeat Tile To Size","pos":[3340,1250],"size":[210,106],"flags":{},"order":35,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1165}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[1167],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1536,1536,false]},{"id":428,"type":"VAEEncodeAdvanced","pos":[3580,1230],"size":[228.90342712402344,278],"flags":{},"order":36,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1166},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":1167},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1168}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1143,1151],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[1138],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":null,"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1536,1536,"red",false,"16_channels"]},{"id":371,"type":"VAEDecode","pos":[2741.197265625,974.4011840820312],"size":[140,46],"flags":{},"order":31,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1088},{"name":"vae","localized_name":"vae","type":"VAE","link":1029}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1030,1068,1174],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.26","widget_ue_connectable":{}},"widgets_values":[]},{"id":378,"type":"ClownOptions_Cycles_Beta","pos":[1768.675537109375,881.3336791992188],"size":[210,130],"flags":{},"order":7,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1044]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":381,"type":"ClownOptions_Cycles_Beta","pos":[2103.203857421875,881.467041015625],"size":[210,130],"flags":{},"order":8,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1051]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":426,"type":"ClownOptions_Cycles_Beta","pos":[4200,870],"size":[210,130],"flags":{},"order":9,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1148]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":424,"type":"ClownOptions_Cycles_Beta","pos":[4540,870],"size":[210,130],"flags":{},"order":10,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1153]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":398,"type":"Reroute","pos":[1034.667724609375,1458.654541015625],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":1122}],"outputs":[{"name":"","type":"IMAGE","links":[1107,1112,1113,1115,1123,1175,1176],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":431,"type":"Note","pos":[356.2033386230469,1583.169677734375],"size":[210,88],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Used a 384x384 image.\n\nAny size will work."],"color":"#432","bgcolor":"#653"}],"links":[[141,151,0,8,1,"VAE"],[142,151,0,72,1,"VAE"],[143,151,0,35,1,"VAE"],[144,151,0,154,7,"VAE"],[159,151,0,72,1,"VAE"],[160,151,0,157,7,"VAE"],[161,151,0,8,1,"VAE"],[162,151,0,154,7,"VAE"],[163,151,0,72,1,"VAE"],[164,151,0,8,1,"VAE"],[165,151,0,154,7,"VAE"],[171,151,0,8,1,"VAE"],[172,151,0,72,1,"VAE"],[173,151,0,154,7,"VAE"],[174,151,0,157,7,"VAE"],[176,151,0,8,1,"VAE"],[177,151,0,72,1,"VAE"],[178,151,0,154,7,"VAE"],[179,151,0,157,7,"VAE"],[195,151,0,8,1,"VAE"],[196,151,0,72,1,"VAE"],[197,151,0,154,7,"VAE"],[198,151,0,157,7,"VAE"],[199,151,0,160,7,"VAE"],[200,151,0,8,1,"VAE"],[201,151,0,72,1,"VAE"],[202,151,0,154,7,"VAE"],[203,151,0,157,7,"VAE"],[204,151,0,160,7,"VAE"],[217,151,0,8,1,"VAE"],[218,151,0,72,1,"VAE"],[219,151,0,154,7,"VAE"],[220,151,0,157,7,"VAE"],[221,151,0,160,7,"VAE"],[222,151,0,8,1,"VAE"],[223,151,0,72,1,"VAE"],[224,151,0,157,7,"VAE"],[225,151,0,8,1,"VAE"],[226,151,0,72,1,"VAE"],[227,151,0,157,7,"VAE"],[250,151,0,62,1,"VAE"],[251,151,0,157,7,"VAE"],[252,151,0,8,1,"VAE"],[253,151,0,72,1,"VAE"],[254,151,0,62,1,"VAE"],[255,151,0,157,7,"VAE"],[256,151,0,8,1,"VAE"],[257,151,0,72,1,"VAE"],[258,151,0,160,7,"VAE"],[271,151,0,62,1,"VAE"],[272,151,0,157,7,"VAE"],[273,151,0,8,1,"VAE"],[274,151,0,72,1,"VAE"],[275,151,0,160,7,"VAE"],[276,151,0,154,7,"VAE"],[277,151,0,62,1,"VAE"],[278,151,0,157,7,"VAE"],[279,151,0,8,1,"VAE"],[280,151,0,72,1,"VAE"],[281,151,0,160,7,"VAE"],[282,151,0,154,7,"VAE"],[294,151,0,157,7,"VAE"],[295,151,0,72,1,"VAE"],[296,151,0,160,7,"VAE"],[297,151,0,154,7,"VAE"],[298,151,0,8,1,"VAE"],[299,151,0,313,1,"VAE"],[300,151,0,62,1,"VAE"],[301,151,0,157,7,"VAE"],[302,151,0,72,1,"VAE"],[303,151,0,160,7,"VAE"],[304,151,0,8,1,"VAE"],[305,151,0,313,1,"VAE"],[306,151,0,62,1,"VAE"],[307,151,0,154,7,"VAE"],[309,151,0,157,7,"VAE"],[310,151,0,72,1,"VAE"],[311,151,0,160,7,"VAE"],[312,151,0,8,1,"VAE"],[313,151,0,313,1,"VAE"],[314,151,0,62,1,"VAE"],[315,151,0,154,7,"VAE"],[316,151,0,157,7,"VAE"],[317,151,0,72,1,"VAE"],[318,151,0,160,7,"VAE"],[319,151,0,8,1,"VAE"],[320,151,0,313,1,"VAE"],[321,151,0,62,1,"VAE"],[322,151,0,154,7,"VAE"],[327,151,0,157,7,"VAE"],[328,151,0,72,1,"VAE"],[329,151,0,8,1,"VAE"],[330,151,0,313,1,"VAE"],[331,151,0,62,1,"VAE"],[332,151,0,154,7,"VAE"],[333,151,0,160,7,"VAE"],[343,151,0,157,7,"VAE"],[344,151,0,72,1,"VAE"],[345,151,0,8,1,"VAE"],[346,151,0,313,1,"VAE"],[347,151,0,62,1,"VAE"],[348,151,0,160,7,"VAE"],[349,151,0,154,7,"VAE"],[351,151,0,157,7,"VAE"],[352,151,0,72,1,"VAE"],[353,151,0,8,1,"VAE"],[354,151,0,313,1,"VAE"],[355,151,0,62,1,"VAE"],[356,151,0,160,7,"VAE"],[357,151,0,154,7,"VAE"],[363,151,0,157,7,"VAE"],[364,151,0,72,1,"VAE"],[365,151,0,8,1,"VAE"],[366,151,0,160,7,"VAE"],[367,151,0,154,7,"VAE"],[368,151,0,62,1,"VAE"],[370,151,0,157,7,"VAE"],[371,151,0,72,1,"VAE"],[372,151,0,8,1,"VAE"],[373,151,0,160,7,"VAE"],[374,151,0,154,7,"VAE"],[375,151,0,62,1,"VAE"],[377,151,0,157,7,"VAE"],[378,151,0,72,1,"VAE"],[379,151,0,8,1,"VAE"],[380,151,0,160,7,"VAE"],[381,151,0,154,7,"VAE"],[382,151,0,62,1,"VAE"],[383,151,0,157,7,"VAE"],[384,151,0,72,1,"VAE"],[385,151,0,8,1,"VAE"],[386,151,0,160,7,"VAE"],[387,151,0,154,7,"VAE"],[388,151,0,62,1,"VAE"],[391,151,0,157,7,"VAE"],[392,151,0,72,1,"VAE"],[393,151,0,8,1,"VAE"],[394,151,0,160,7,"VAE"],[395,151,0,154,7,"VAE"],[396,151,0,62,1,"VAE"],[402,151,0,157,7,"VAE"],[403,151,0,72,1,"VAE"],[404,151,0,8,1,"VAE"],[405,151,0,160,7,"VAE"],[406,151,0,154,7,"VAE"],[407,151,0,62,1,"VAE"],[408,151,0,157,7,"VAE"],[409,151,0,72,1,"VAE"],[410,151,0,8,1,"VAE"],[411,151,0,160,7,"VAE"],[412,151,0,154,7,"VAE"],[413,151,0,62,1,"VAE"],[421,151,0,157,7,"VAE"],[422,151,0,72,1,"VAE"],[423,151,0,8,1,"VAE"],[424,151,0,160,7,"VAE"],[425,151,0,154,7,"VAE"],[426,151,0,62,1,"VAE"],[427,151,0,157,7,"VAE"],[428,151,0,72,1,"VAE"],[429,151,0,8,1,"VAE"],[430,151,0,160,7,"VAE"],[431,151,0,154,7,"VAE"],[432,151,0,62,1,"VAE"],[1004,349,3,361,0,"CLIP_VISION"],[1006,361,0,362,2,"CLIP_VISION_OUTPUT"],[1007,349,1,364,0,"CLIP"],[1008,364,0,362,0,"CONDITIONING"],[1009,349,4,362,1,"STYLE_MODEL"],[1022,349,0,368,0,"MODEL"],[1024,355,0,370,0,"MODEL"],[1029,349,2,371,1,"VAE"],[1030,371,0,372,0,"IMAGE"],[1031,370,0,373,4,"LATENT"],[1038,349,2,375,4,"VAE"],[1044,378,0,373,6,"OPTIONS"],[1047,368,0,355,0,"MODEL"],[1051,381,0,380,6,"OPTIONS"],[1053,373,0,380,4,"LATENT"],[1055,364,0,382,1,"CONDITIONING"],[1056,359,0,382,2,"CONTROL_NET"],[1058,349,2,382,4,"VAE"],[1068,371,0,387,0,"IMAGE"],[1088,374,0,371,0,"LATENT"],[1097,380,0,374,4,"LATENT"],[1099,369,0,373,5,"GUIDES"],[1101,375,1,369,0,"LATENT"],[1102,375,0,370,3,"LATENT"],[1107,398,0,361,1,"IMAGE"],[1108,362,0,382,0,"CONDITIONING"],[1111,375,0,355,1,"LATENT"],[1112,398,0,382,3,"IMAGE"],[1113,398,0,375,0,"IMAGE"],[1115,398,0,387,1,"IMAGE"],[1117,362,0,370,1,"CONDITIONING"],[1118,382,0,373,1,"CONDITIONING"],[1122,401,0,398,0,"*"],[1123,398,0,404,0,"IMAGE"],[1124,404,0,375,1,"IMAGE"],[1134,362,0,374,1,"CONDITIONING"],[1135,349,3,408,0,"CLIP_VISION"],[1137,349,1,409,0,"CLIP"],[1138,428,1,410,0,"LATENT"],[1139,430,0,411,1,"CONDITIONING"],[1140,417,0,411,4,"LATENT"],[1141,420,0,412,0,"IMAGE"],[1142,414,0,413,0,"MODEL"],[1143,428,0,413,1,"LATENT"],[1144,349,0,414,0,"MODEL"],[1145,425,0,415,1,"CONDITIONING"],[1146,416,0,415,4,"LATENT"],[1147,410,0,415,5,"GUIDES"],[1148,426,0,415,6,"OPTIONS"],[1149,413,0,416,0,"MODEL"],[1150,430,0,416,1,"CONDITIONING"],[1151,428,0,416,3,"LATENT"],[1152,415,0,417,4,"LATENT"],[1153,424,0,417,6,"OPTIONS"],[1154,411,0,420,0,"LATENT"],[1155,349,2,420,1,"VAE"],[1160,430,0,425,0,"CONDITIONING"],[1161,409,0,425,1,"CONDITIONING"],[1162,359,0,425,2,"CONTROL_NET"],[1164,349,2,425,4,"VAE"],[1165,421,0,427,0,"IMAGE"],[1166,421,0,428,0,"IMAGE"],[1167,427,0,428,1,"IMAGE"],[1168,349,2,428,4,"VAE"],[1169,420,0,429,0,"IMAGE"],[1170,421,0,429,1,"IMAGE"],[1171,409,0,430,0,"CONDITIONING"],[1172,349,4,430,1,"STYLE_MODEL"],[1173,408,0,430,2,"CLIP_VISION_OUTPUT"],[1174,371,0,421,0,"*"],[1175,398,0,425,3,"IMAGE"],[1176,398,0,408,1,"IMAGE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.3109994191500252,"offset":[916.9662500305632,-478.4961303433991]},"ue_links":[{"downstream":157,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":154,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":72,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":62,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"}],"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true,"links_added_by_ue":[959,960,961,962],"frontendVersion":"1.18.6"},"version":0.4}
================================================
FILE: example_workflows/flux upscale thumbnail large.json
================================================
{"last_node_id":408,"last_link_id":1127,"nodes":[{"id":369,"type":"ClownGuide_Style_Beta","pos":[1138.06640625,1574.328857421875],"size":[231.30213928222656,286],"flags":{},"order":18,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1101},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1099],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":374,"type":"ClownsharkChainsampler_Beta","pos":[2403.98583984375,1081.333740234375],"size":[274.9878234863281,528.6721801757812],"flags":{},"order":22,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1109},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1097},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1088],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,1,"resample",true]},{"id":372,"type":"SaveImage","pos":[2740,1080],"size":[442.38494873046875,530.0809936523438],"flags":{},"order":24,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1030}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":355,"type":"ModelSamplingAdvancedResolution","pos":[1134.0809326171875,1057.9874267578125],"size":[260.3999938964844,126],"flags":{},"order":17,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1047},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1111}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1024],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":368,"type":"ReFluxPatcher","pos":[897.4150390625,1095.9840087890625],"size":[210,82],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1022}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1047],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":349,"type":"FluxLoader","pos":[554.6767578125,1099.277099609375],"size":[315,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1022],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1007],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1029,1038,1058],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[1004],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[1009]}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":387,"type":"Image Comparer (rgthree)","pos":[3228.67529296875,1082.0006103515625],"size":[502.8477478027344,526.1139526367188],"flags":{},"order":25,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1068},{"name":"image_b","type":"IMAGE","dir":3,"link":1115}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lvxiv_00003_.png&type=temp&subfolder=&rand=0.3715711256758052"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lvxiv_00004_.png&type=temp&subfolder=&rand=0.9911994449338102"}]]},{"id":373,"type":"ClownsharkChainsampler_Beta","pos":[1740,1080],"size":[272.9876403808594,526.665771484375],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1118},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1031},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1099},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1044},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1053],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":370,"type":"ClownsharKSampler_Beta","pos":[1417.3414306640625,1078.0023193359375],"size":[277.65570068359375,627.99951171875],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1024},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1117},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1102},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1031],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,14,1,1,0,"fixed","unsample",true]},{"id":382,"type":"ControlNetApplyAdvanced","pos":[1440,830],"size":[210,186],"flags":{},"order":16,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":1108},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":1055},{"name":"control_net","localized_name":"control_net","type":"CONTROL_NET","link":1056},{"name":"image","localized_name":"image","type":"IMAGE","link":1112},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1058}],"outputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","links":[1118],"slot_index":0},{"name":"negative","localized_name":"negative","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ControlNetApplyAdvanced"},"widgets_values":[1,0,1]},{"id":380,"type":"ClownsharkChainsampler_Beta","pos":[2078.66015625,1080.6669921875],"size":[263.6514892578125,527.99951171875],"flags":{},"order":21,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1053},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1051},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1097],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":401,"type":"LoadImage","pos":[660.8270874023438,1457.920166015625],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1122],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (579).png","image"]},{"id":371,"type":"VAEDecode","pos":[2741.197265625,974.4011840820312],"size":[140,46],"flags":{},"order":23,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1088},{"name":"vae","localized_name":"vae","type":"VAE","link":1029}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1030,1068],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.26","widget_ue_connectable":{}},"widgets_values":[]},{"id":378,"type":"ClownOptions_Cycles_Beta","pos":[1768.675537109375,881.3336791992188],"size":[210,130],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1044]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":398,"type":"Reroute","pos":[1034.667724609375,1458.654541015625],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":1122}],"outputs":[{"name":"","type":"IMAGE","links":[1107,1112,1113,1115,1123],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":404,"type":"Image Repeat Tile To Size","pos":[899.620361328125,1259.9044189453125],"size":[210,106],"flags":{},"order":13,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1123}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[1124],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1536,1536,false]},{"id":375,"type":"VAEEncodeAdvanced","pos":[1140,1240],"size":[228.90342712402344,278],"flags":{},"order":15,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1113},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":1124},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1038}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1102,1111],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[1101],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":null,"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1536,1536,"red",false,"16_channels"]},{"id":381,"type":"ClownOptions_Cycles_Beta","pos":[2103.203857421875,881.467041015625],"size":[210,130],"flags":{},"order":3,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1051]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[20,1,0.5,1]},{"id":403,"type":"Note","pos":[2098.053466796875,680.7237548828125],"size":[215.7804412841797,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Raise cycles here if you see halos. It doesn't hurt to go as high as 20. Minimum of 5 recommended."],"color":"#432","bgcolor":"#653"},{"id":402,"type":"Note","pos":[1755.3779296875,678.1484985351562],"size":[241.524658203125,132.7487030029297],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Lower cycles here if you see halos. Minimum of 1 or 2 recommended.\n\nThese step(s)/cycle(s) (that use the ClownGuide Style node) are needed to prevent blurring when upscaling tiny thumbnail images."],"color":"#432","bgcolor":"#653"},{"id":359,"type":"ControlNetLoader","pos":[597.9067993164062,977.3353881835938],"size":[270.0880432128906,58],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"CONTROL_NET","localized_name":"CONTROL_NET","type":"CONTROL_NET","links":[1056],"slot_index":0}],"properties":{"Node name for S&R":"ControlNetLoader"},"widgets_values":["flux_tile.safetensors"]},{"id":362,"type":"StyleModelApply","pos":[1141.4669189453125,829.1477661132812],"size":[270.06890869140625,122],"flags":{},"order":14,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":1008},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1009},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":1006}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1108,1109,1117],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":361,"type":"CLIPVisionEncode","pos":[862.2003784179688,825.134765625],"size":[253.60000610351562,78],"flags":{},"order":12,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":1004},{"name":"image","localized_name":"image","type":"IMAGE","link":1107}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[1006],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":364,"type":"CLIPTextEncode","pos":[899.5093383789062,952.8309936523438],"size":[210,88],"flags":{},"order":10,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1007}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1008,1055],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":408,"type":"Note","pos":[583.3265380859375,830.6437377929688],"size":[248.87789916992188,88],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Jasper's tile controlnet was used."],"color":"#432","bgcolor":"#653"},{"id":407,"type":"Note","pos":[424.7425537109375,1579.1385498046875],"size":[210,88],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Input image was 384x384.\n\nAny size can be used."],"color":"#432","bgcolor":"#653"}],"links":[[141,151,0,8,1,"VAE"],[142,151,0,72,1,"VAE"],[143,151,0,35,1,"VAE"],[144,151,0,154,7,"VAE"],[159,151,0,72,1,"VAE"],[160,151,0,157,7,"VAE"],[161,151,0,8,1,"VAE"],[162,151,0,154,7,"VAE"],[163,151,0,72,1,"VAE"],[164,151,0,8,1,"VAE"],[165,151,0,154,7,"VAE"],[171,151,0,8,1,"VAE"],[172,151,0,72,1,"VAE"],[173,151,0,154,7,"VAE"],[174,151,0,157,7,"VAE"],[176,151,0,8,1,"VAE"],[177,151,0,72,1,"VAE"],[178,151,0,154,7,"VAE"],[179,151,0,157,7,"VAE"],[195,151,0,8,1,"VAE"],[196,151,0,72,1,"VAE"],[197,151,0,154,7,"VAE"],[198,151,0,157,7,"VAE"],[199,151,0,160,7,"VAE"],[200,151,0,8,1,"VAE"],[201,151,0,72,1,"VAE"],[202,151,0,154,7,"VAE"],[203,151,0,157,7,"VAE"],[204,151,0,160,7,"VAE"],[217,151,0,8,1,"VAE"],[218,151,0,72,1,"VAE"],[219,151,0,154,7,"VAE"],[220,151,0,157,7,"VAE"],[221,151,0,160,7,"VAE"],[222,151,0,8,1,"VAE"],[223,151,0,72,1,"VAE"],[224,151,0,157,7,"VAE"],[225,151,0,8,1,"VAE"],[226,151,0,72,1,"VAE"],[227,151,0,157,7,"VAE"],[250,151,0,62,1,"VAE"],[251,151,0,157,7,"VAE"],[252,151,0,8,1,"VAE"],[253,151,0,72,1,"VAE"],[254,151,0,62,1,"VAE"],[255,151,0,157,7,"VAE"],[256,151,0,8,1,"VAE"],[257,151,0,72,1,"VAE"],[258,151,0,160,7,"VAE"],[271,151,0,62,1,"VAE"],[272,151,0,157,7,"VAE"],[273,151,0,8,1,"VAE"],[274,151,0,72,1,"VAE"],[275,151,0,160,7,"VAE"],[276,151,0,154,7,"VAE"],[277,151,0,62,1,"VAE"],[278,151,0,157,7,"VAE"],[279,151,0,8,1,"VAE"],[280,151,0,72,1,"VAE"],[281,151,0,160,7,"VAE"],[282,151,0,154,7,"VAE"],[294,151,0,157,7,"VAE"],[295,151,0,72,1,"VAE"],[296,151,0,160,7,"VAE"],[297,151,0,154,7,"VAE"],[298,151,0,8,1,"VAE"],[299,151,0,313,1,"VAE"],[300,151,0,62,1,"VAE"],[301,151,0,157,7,"VAE"],[302,151,0,72,1,"VAE"],[303,151,0,160,7,"VAE"],[304,151,0,8,1,"VAE"],[305,151,0,313,1,"VAE"],[306,151,0,62,1,"VAE"],[307,151,0,154,7,"VAE"],[309,151,0,157,7,"VAE"],[310,151,0,72,1,"VAE"],[311,151,0,160,7,"VAE"],[312,151,0,8,1,"VAE"],[313,151,0,313,1,"VAE"],[314,151,0,62,1,"VAE"],[315,151,0,154,7,"VAE"],[316,151,0,157,7,"VAE"],[317,151,0,72,1,"VAE"],[318,151,0,160,7,"VAE"],[319,151,0,8,1,"VAE"],[320,151,0,313,1,"VAE"],[321,151,0,62,1,"VAE"],[322,151,0,154,7,"VAE"],[327,151,0,157,7,"VAE"],[328,151,0,72,1,"VAE"],[329,151,0,8,1,"VAE"],[330,151,0,313,1,"VAE"],[331,151,0,62,1,"VAE"],[332,151,0,154,7,"VAE"],[333,151,0,160,7,"VAE"],[343,151,0,157,7,"VAE"],[344,151,0,72,1,"VAE"],[345,151,0,8,1,"VAE"],[346,151,0,313,1,"VAE"],[347,151,0,62,1,"VAE"],[348,151,0,160,7,"VAE"],[349,151,0,154,7,"VAE"],[351,151,0,157,7,"VAE"],[352,151,0,72,1,"VAE"],[353,151,0,8,1,"VAE"],[354,151,0,313,1,"VAE"],[355,151,0,62,1,"VAE"],[356,151,0,160,7,"VAE"],[357,151,0,154,7,"VAE"],[363,151,0,157,7,"VAE"],[364,151,0,72,1,"VAE"],[365,151,0,8,1,"VAE"],[366,151,0,160,7,"VAE"],[367,151,0,154,7,"VAE"],[368,151,0,62,1,"VAE"],[370,151,0,157,7,"VAE"],[371,151,0,72,1,"VAE"],[372,151,0,8,1,"VAE"],[373,151,0,160,7,"VAE"],[374,151,0,154,7,"VAE"],[375,151,0,62,1,"VAE"],[377,151,0,157,7,"VAE"],[378,151,0,72,1,"VAE"],[379,151,0,8,1,"VAE"],[380,151,0,160,7,"VAE"],[381,151,0,154,7,"VAE"],[382,151,0,62,1,"VAE"],[383,151,0,157,7,"VAE"],[384,151,0,72,1,"VAE"],[385,151,0,8,1,"VAE"],[386,151,0,160,7,"VAE"],[387,151,0,154,7,"VAE"],[388,151,0,62,1,"VAE"],[391,151,0,157,7,"VAE"],[392,151,0,72,1,"VAE"],[393,151,0,8,1,"VAE"],[394,151,0,160,7,"VAE"],[395,151,0,154,7,"VAE"],[396,151,0,62,1,"VAE"],[402,151,0,157,7,"VAE"],[403,151,0,72,1,"VAE"],[404,151,0,8,1,"VAE"],[405,151,0,160,7,"VAE"],[406,151,0,154,7,"VAE"],[407,151,0,62,1,"VAE"],[408,151,0,157,7,"VAE"],[409,151,0,72,1,"VAE"],[410,151,0,8,1,"VAE"],[411,151,0,160,7,"VAE"],[412,151,0,154,7,"VAE"],[413,151,0,62,1,"VAE"],[421,151,0,157,7,"VAE"],[422,151,0,72,1,"VAE"],[423,151,0,8,1,"VAE"],[424,151,0,160,7,"VAE"],[425,151,0,154,7,"VAE"],[426,151,0,62,1,"VAE"],[427,151,0,157,7,"VAE"],[428,151,0,72,1,"VAE"],[429,151,0,8,1,"VAE"],[430,151,0,160,7,"VAE"],[431,151,0,154,7,"VAE"],[432,151,0,62,1,"VAE"],[1004,349,3,361,0,"CLIP_VISION"],[1006,361,0,362,2,"CLIP_VISION_OUTPUT"],[1007,349,1,364,0,"CLIP"],[1008,364,0,362,0,"CONDITIONING"],[1009,349,4,362,1,"STYLE_MODEL"],[1022,349,0,368,0,"MODEL"],[1024,355,0,370,0,"MODEL"],[1029,349,2,371,1,"VAE"],[1030,371,0,372,0,"IMAGE"],[1031,370,0,373,4,"LATENT"],[1038,349,2,375,4,"VAE"],[1044,378,0,373,6,"OPTIONS"],[1047,368,0,355,0,"MODEL"],[1051,381,0,380,6,"OPTIONS"],[1053,373,0,380,4,"LATENT"],[1055,364,0,382,1,"CONDITIONING"],[1056,359,0,382,2,"CONTROL_NET"],[1058,349,2,382,4,"VAE"],[1068,371,0,387,0,"IMAGE"],[1088,374,0,371,0,"LATENT"],[1097,380,0,374,4,"LATENT"],[1099,369,0,373,5,"GUIDES"],[1101,375,1,369,0,"LATENT"],[1102,375,0,370,3,"LATENT"],[1107,398,0,361,1,"IMAGE"],[1108,362,0,382,0,"CONDITIONING"],[1109,362,0,374,1,"CONDITIONING"],[1111,375,0,355,1,"LATENT"],[1112,398,0,382,3,"IMAGE"],[1113,398,0,375,0,"IMAGE"],[1115,398,0,387,1,"IMAGE"],[1117,362,0,370,1,"CONDITIONING"],[1118,382,0,373,1,"CONDITIONING"],[1122,401,0,398,0,"*"],[1123,398,0,404,0,"IMAGE"],[1124,404,0,375,1,"IMAGE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.3109994191500252,"offset":[1512.0539235106066,-356.0468640337415]},"ue_links":[{"downstream":157,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":154,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":72,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":62,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"}],"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true,"links_added_by_ue":[959,960,961,962],"frontendVersion":"1.18.6"},"version":0.4}
================================================
FILE: example_workflows/flux upscale thumbnail widescreen.json
================================================
{"last_node_id":411,"last_link_id":1130,"nodes":[{"id":369,"type":"ClownGuide_Style_Beta","pos":[1138.06640625,1574.328857421875],"size":[231.30213928222656,286],"flags":{},"order":18,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1101},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1099],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":374,"type":"ClownsharkChainsampler_Beta","pos":[2403.98583984375,1081.333740234375],"size":[274.9878234863281,528.6721801757812],"flags":{},"order":22,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1109},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1097},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1088],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,1,"resample",true]},{"id":372,"type":"SaveImage","pos":[2740,1080],"size":[442.38494873046875,530.0809936523438],"flags":{},"order":24,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1030}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":355,"type":"ModelSamplingAdvancedResolution","pos":[1134.0809326171875,1057.9874267578125],"size":[260.3999938964844,126],"flags":{},"order":17,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1047},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1111}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1024],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":368,"type":"ReFluxPatcher","pos":[897.4150390625,1095.9840087890625],"size":[210,82],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1022}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1047],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":349,"type":"FluxLoader","pos":[554.6767578125,1099.277099609375],"size":[315,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1022],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1007],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1029,1038,1058],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[1004],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[1009]}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":387,"type":"Image Comparer (rgthree)","pos":[3228.67529296875,1082.0006103515625],"size":[502.8477478027344,526.1139526367188],"flags":{},"order":25,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1068},{"name":"image_b","type":"IMAGE","dir":3,"link":1115}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_klodp_00033_.png&type=temp&subfolder=&rand=0.5892199958912905"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_klodp_00034_.png&type=temp&subfolder=&rand=0.10900460801823297"}]]},{"id":373,"type":"ClownsharkChainsampler_Beta","pos":[1740,1080],"size":[272.9876403808594,526.665771484375],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1118},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1031},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1099},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1044},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1053],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":382,"type":"ControlNetApplyAdvanced","pos":[1440,830],"size":[210,186],"flags":{},"order":16,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":1108},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":1055},{"name":"control_net","localized_name":"control_net","type":"CONTROL_NET","link":1056},{"name":"image","localized_name":"image","type":"IMAGE","link":1112},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1058}],"outputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","links":[1118],"slot_index":0},{"name":"negative","localized_name":"negative","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ControlNetApplyAdvanced"},"widgets_values":[1,0,1]},{"id":380,"type":"ClownsharkChainsampler_Beta","pos":[2078.66015625,1080.6669921875],"size":[263.6514892578125,527.99951171875],"flags":{},"order":21,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1053},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1051},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1097],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":371,"type":"VAEDecode","pos":[2741.197265625,974.4011840820312],"size":[140,46],"flags":{},"order":23,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1088},{"name":"vae","localized_name":"vae","type":"VAE","link":1029}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1030,1068],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.26","widget_ue_connectable":{}},"widgets_values":[]},{"id":378,"type":"ClownOptions_Cycles_Beta","pos":[1768.675537109375,881.3336791992188],"size":[210,130],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1044]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":381,"type":"ClownOptions_Cycles_Beta","pos":[2103.203857421875,881.467041015625],"size":[210,130],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1051]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[20,1,0.5,1]},{"id":403,"type":"Note","pos":[2098.053466796875,680.7237548828125],"size":[215.7804412841797,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Raise cycles here if you see halos. It doesn't hurt to go as high as 20. Minimum of 5 recommended."],"color":"#432","bgcolor":"#653"},{"id":402,"type":"Note","pos":[1755.3779296875,678.1484985351562],"size":[241.524658203125,132.7487030029297],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Lower cycles here if you see halos. Minimum of 1 or 2 recommended.\n\nThese step(s)/cycle(s) (that use the ClownGuide Style node) are needed to prevent blurring when upscaling tiny thumbnail images."],"color":"#432","bgcolor":"#653"},{"id":359,"type":"ControlNetLoader","pos":[597.9067993164062,977.3353881835938],"size":[270.0880432128906,58],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"CONTROL_NET","localized_name":"CONTROL_NET","type":"CONTROL_NET","links":[1056],"slot_index":0}],"properties":{"Node name for S&R":"ControlNetLoader"},"widgets_values":["flux_tile.safetensors"]},{"id":362,"type":"StyleModelApply","pos":[1141.4669189453125,829.1477661132812],"size":[270.06890869140625,122],"flags":{},"order":14,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":1008},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1009},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":1006}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1108,1109,1117],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":361,"type":"CLIPVisionEncode","pos":[862.2003784179688,825.134765625],"size":[253.60000610351562,78],"flags":{},"order":12,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":1004},{"name":"image","localized_name":"image","type":"IMAGE","link":1107}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[1006],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":364,"type":"CLIPTextEncode","pos":[899.5093383789062,952.8309936523438],"size":[210,88],"flags":{},"order":10,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1007}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1008,1055],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":408,"type":"Note","pos":[549.5983276367188,826.2056884765625],"size":[294.1452331542969,99.538818359375],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Jasper's tile controlnet was used.\n\nhttps://huggingface.co/jasperai/Flux.1-dev-Controlnet-Upscaler/blob/main/diffusion_pytorch_model.safetensors"],"color":"#432","bgcolor":"#653"},{"id":370,"type":"ClownsharKSampler_Beta","pos":[1417.3414306640625,1078.0023193359375],"size":[277.65570068359375,627.99951171875],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1024},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1117},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1102},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1031],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,14,1,1,0,"fixed","unsample",true]},{"id":404,"type":"Image Repeat Tile To Size","pos":[899.620361328125,1259.9044189453125],"size":[210,106],"flags":{},"order":13,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1123}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[1124],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1792,1024,true]},{"id":375,"type":"VAEEncodeAdvanced","pos":[1140,1240],"size":[228.90342712402344,278],"flags":{},"order":15,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1113},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":1124},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1038}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1102,1111],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[1101],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":null,"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["image_2",1,1,"red",false,"16_channels"]},{"id":398,"type":"Reroute","pos":[1034.0006103515625,1404.638671875],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":1130}],"outputs":[{"name":"","type":"IMAGE","links":[1107,1112,1113,1115,1123],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":411,"type":"LoadImage","pos":[791.842041015625,1491.6041259765625],"size":[315,314],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1130],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (595).png","image"]},{"id":407,"type":"Note","pos":[552.9491577148438,1493.21923828125],"size":[210.6668243408203,166.69004821777344],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Input image was 672x384.\n\nAny size can be used. Just be sure to keep the aspect ratio the same, per usual.\n\nBest results will be with minimum size = 384 (height and/or width), due to that being what SigCLIP was trained on (which is what Redux uses)."],"color":"#432","bgcolor":"#653"}],"links":[[141,151,0,8,1,"VAE"],[142,151,0,72,1,"VAE"],[143,151,0,35,1,"VAE"],[144,151,0,154,7,"VAE"],[159,151,0,72,1,"VAE"],[160,151,0,157,7,"VAE"],[161,151,0,8,1,"VAE"],[162,151,0,154,7,"VAE"],[163,151,0,72,1,"VAE"],[164,151,0,8,1,"VAE"],[165,151,0,154,7,"VAE"],[171,151,0,8,1,"VAE"],[172,151,0,72,1,"VAE"],[173,151,0,154,7,"VAE"],[174,151,0,157,7,"VAE"],[176,151,0,8,1,"VAE"],[177,151,0,72,1,"VAE"],[178,151,0,154,7,"VAE"],[179,151,0,157,7,"VAE"],[195,151,0,8,1,"VAE"],[196,151,0,72,1,"VAE"],[197,151,0,154,7,"VAE"],[198,151,0,157,7,"VAE"],[199,151,0,160,7,"VAE"],[200,151,0,8,1,"VAE"],[201,151,0,72,1,"VAE"],[202,151,0,154,7,"VAE"],[203,151,0,157,7,"VAE"],[204,151,0,160,7,"VAE"],[217,151,0,8,1,"VAE"],[218,151,0,72,1,"VAE"],[219,151,0,154,7,"VAE"],[220,151,0,157,7,"VAE"],[221,151,0,160,7,"VAE"],[222,151,0,8,1,"VAE"],[223,151,0,72,1,"VAE"],[224,151,0,157,7,"VAE"],[225,151,0,8,1,"VAE"],[226,151,0,72,1,"VAE"],[227,151,0,157,7,"VAE"],[250,151,0,62,1,"VAE"],[251,151,0,157,7,"VAE"],[252,151,0,8,1,"VAE"],[253,151,0,72,1,"VAE"],[254,151,0,62,1,"VAE"],[255,151,0,157,7,"VAE"],[256,151,0,8,1,"VAE"],[257,151,0,72,1,"VAE"],[258,151,0,160,7,"VAE"],[271,151,0,62,1,"VAE"],[272,151,0,157,7,"VAE"],[273,151,0,8,1,"VAE"],[274,151,0,72,1,"VAE"],[275,151,0,160,7,"VAE"],[276,151,0,154,7,"VAE"],[277,151,0,62,1,"VAE"],[278,151,0,157,7,"VAE"],[279,151,0,8,1,"VAE"],[280,151,0,72,1,"VAE"],[281,151,0,160,7,"VAE"],[282,151,0,154,7,"VAE"],[294,151,0,157,7,"VAE"],[295,151,0,72,1,"VAE"],[296,151,0,160,7,"VAE"],[297,151,0,154,7,"VAE"],[298,151,0,8,1,"VAE"],[299,151,0,313,1,"VAE"],[300,151,0,62,1,"VAE"],[301,151,0,157,7,"VAE"],[302,151,0,72,1,"VAE"],[303,151,0,160,7,"VAE"],[304,151,0,8,1,"VAE"],[305,151,0,313,1,"VAE"],[306,151,0,62,1,"VAE"],[307,151,0,154,7,"VAE"],[309,151,0,157,7,"VAE"],[310,151,0,72,1,"VAE"],[311,151,0,160,7,"VAE"],[312,151,0,8,1,"VAE"],[313,151,0,313,1,"VAE"],[314,151,0,62,1,"VAE"],[315,151,0,154,7,"VAE"],[316,151,0,157,7,"VAE"],[317,151,0,72,1,"VAE"],[318,151,0,160,7,"VAE"],[319,151,0,8,1,"VAE"],[320,151,0,313,1,"VAE"],[321,151,0,62,1,"VAE"],[322,151,0,154,7,"VAE"],[327,151,0,157,7,"VAE"],[328,151,0,72,1,"VAE"],[329,151,0,8,1,"VAE"],[330,151,0,313,1,"VAE"],[331,151,0,62,1,"VAE"],[332,151,0,154,7,"VAE"],[333,151,0,160,7,"VAE"],[343,151,0,157,7,"VAE"],[344,151,0,72,1,"VAE"],[345,151,0,8,1,"VAE"],[346,151,0,313,1,"VAE"],[347,151,0,62,1,"VAE"],[348,151,0,160,7,"VAE"],[349,151,0,154,7,"VAE"],[351,151,0,157,7,"VAE"],[352,151,0,72,1,"VAE"],[353,151,0,8,1,"VAE"],[354,151,0,313,1,"VAE"],[355,151,0,62,1,"VAE"],[356,151,0,160,7,"VAE"],[357,151,0,154,7,"VAE"],[363,151,0,157,7,"VAE"],[364,151,0,72,1,"VAE"],[365,151,0,8,1,"VAE"],[366,151,0,160,7,"VAE"],[367,151,0,154,7,"VAE"],[368,151,0,62,1,"VAE"],[370,151,0,157,7,"VAE"],[371,151,0,72,1,"VAE"],[372,151,0,8,1,"VAE"],[373,151,0,160,7,"VAE"],[374,151,0,154,7,"VAE"],[375,151,0,62,1,"VAE"],[377,151,0,157,7,"VAE"],[378,151,0,72,1,"VAE"],[379,151,0,8,1,"VAE"],[380,151,0,160,7,"VAE"],[381,151,0,154,7,"VAE"],[382,151,0,62,1,"VAE"],[383,151,0,157,7,"VAE"],[384,151,0,72,1,"VAE"],[385,151,0,8,1,"VAE"],[386,151,0,160,7,"VAE"],[387,151,0,154,7,"VAE"],[388,151,0,62,1,"VAE"],[391,151,0,157,7,"VAE"],[392,151,0,72,1,"VAE"],[393,151,0,8,1,"VAE"],[394,151,0,160,7,"VAE"],[395,151,0,154,7,"VAE"],[396,151,0,62,1,"VAE"],[402,151,0,157,7,"VAE"],[403,151,0,72,1,"VAE"],[404,151,0,8,1,"VAE"],[405,151,0,160,7,"VAE"],[406,151,0,154,7,"VAE"],[407,151,0,62,1,"VAE"],[408,151,0,157,7,"VAE"],[409,151,0,72,1,"VAE"],[410,151,0,8,1,"VAE"],[411,151,0,160,7,"VAE"],[412,151,0,154,7,"VAE"],[413,151,0,62,1,"VAE"],[421,151,0,157,7,"VAE"],[422,151,0,72,1,"VAE"],[423,151,0,8,1,"VAE"],[424,151,0,160,7,"VAE"],[425,151,0,154,7,"VAE"],[426,151,0,62,1,"VAE"],[427,151,0,157,7,"VAE"],[428,151,0,72,1,"VAE"],[429,151,0,8,1,"VAE"],[430,151,0,160,7,"VAE"],[431,151,0,154,7,"VAE"],[432,151,0,62,1,"VAE"],[1004,349,3,361,0,"CLIP_VISION"],[1006,361,0,362,2,"CLIP_VISION_OUTPUT"],[1007,349,1,364,0,"CLIP"],[1008,364,0,362,0,"CONDITIONING"],[1009,349,4,362,1,"STYLE_MODEL"],[1022,349,0,368,0,"MODEL"],[1024,355,0,370,0,"MODEL"],[1029,349,2,371,1,"VAE"],[1030,371,0,372,0,"IMAGE"],[1031,370,0,373,4,"LATENT"],[1038,349,2,375,4,"VAE"],[1044,378,0,373,6,"OPTIONS"],[1047,368,0,355,0,"MODEL"],[1051,381,0,380,6,"OPTIONS"],[1053,373,0,380,4,"LATENT"],[1055,364,0,382,1,"CONDITIONING"],[1056,359,0,382,2,"CONTROL_NET"],[1058,349,2,382,4,"VAE"],[1068,371,0,387,0,"IMAGE"],[1088,374,0,371,0,"LATENT"],[1097,380,0,374,4,"LATENT"],[1099,369,0,373,5,"GUIDES"],[1101,375,1,369,0,"LATENT"],[1102,375,0,370,3,"LATENT"],[1107,398,0,361,1,"IMAGE"],[1108,362,0,382,0,"CONDITIONING"],[1109,362,0,374,1,"CONDITIONING"],[1111,375,0,355,1,"LATENT"],[1112,398,0,382,3,"IMAGE"],[1113,398,0,375,0,"IMAGE"],[1115,398,0,387,1,"IMAGE"],[1117,362,0,370,1,"CONDITIONING"],[1118,382,0,373,1,"CONDITIONING"],[1123,398,0,404,0,"IMAGE"],[1124,404,0,375,1,"IMAGE"],[1130,411,0,398,0,"*"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7449402268886842,"offset":[634.5784677482833,-682.7929436822943]},"ue_links":[{"downstream":157,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":154,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":72,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":62,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"}],"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true,"links_added_by_ue":[959,960,961,962],"frontendVersion":"1.18.6"},"version":0.4}
================================================
FILE: example_workflows/hidream guide data projection.json
================================================
{"last_node_id":641,"last_link_id":2035,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":8,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":9,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,true,1,1,"beta57",0,2,false]}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[1686.8845871920696,637.6012821508443]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/hidream guide epsilon projection.json
================================================
{"last_node_id":641,"last_link_id":2035,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":8,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":9,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,true,1,1,"beta57",0,6,false]}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[1138.2513303928165,621.4269926638877]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/hidream guide flow.json
================================================
{"last_node_id":640,"last_link_id":2035,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028,2034],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":11,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":12,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":6,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":8,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["flow",false,false,1,1,"beta57",0,10,false]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":10,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":2032},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":638,"type":"SharkOptions_GuideCond_Beta","pos":[955.9966430664062,585.7319946289062],"size":[284.5923156738281,98],"flags":{},"order":7,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2035},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2033},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2032],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[4]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":4,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029,2033],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":639,"type":"CLIPTextEncode","pos":[599.5145263671875,565.6756591796875],"size":[315.33026123046875,117.94475555419922],"flags":{"collapsed":false},"order":5,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2034}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2035],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["illustration of a singing clock with huge teeth in a surreal forest with torquiose mountains and a red and yellow sky, ragged trees and a pool of black oil on the ground, dripping paint oozing off the clock"]},{"id":640,"type":"Note","pos":[246.91494750976562,519.0934448242188],"size":[323.0928649902344,167.39759826660156],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["With the \"flow\" mode it is usually beneficial to use the supplemental GuideCond node, which allows you to set conditionings for the guide itself. With \"flow\", the guide changes during the sampling process. Without GuideCond in use, it will default to reusing your main prompt, which may result in some loss of adherence to the guide image.\n\n\"Lure\" is the only other mode that will use GuideCond."],"color":"#432","bgcolor":"#653"}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"],[2032,638,0,630,6,"OPTIONS"],[2033,637,0,638,1,"CONDITIONING"],[2034,636,1,639,0,"CLIP"],[2035,639,0,638,0,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[1119.4904101845082,499.1497204604395]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/hidream guide fully_pseudoimplicit.json
================================================
{"last_node_id":643,"last_link_id":2036,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":9,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":10,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":5,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":4,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"fully_implicit/gauss-legendre_2s","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["fully_pseudoimplicit",false,false,0.75,1,"linear_quadratic",0,10,false]},{"id":643,"type":"Note","pos":[1599.7352294921875,-422.8976135253906],"size":[258.39599609375,111.11077880859375],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["fully_pseudoimplicit only works with \"fully_implicit\" sampler types. With all others, it will revert automatically to pseudoimplicit.\n\npseudoimplicit may, however, be used with \"fully_implicit\" samplers."],"color":"#432","bgcolor":"#653"}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[840.6440644823947,678.3605934631012]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/hidream guide lure.json
================================================
{"last_node_id":640,"last_link_id":2035,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028,2034],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":11,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":12,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":6,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":10,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":2032},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":638,"type":"SharkOptions_GuideCond_Beta","pos":[955.9966430664062,585.7319946289062],"size":[284.5923156738281,98],"flags":{},"order":7,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2035},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2033},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2032],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[4]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":4,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029,2033],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":639,"type":"CLIPTextEncode","pos":[599.5145263671875,565.6756591796875],"size":[315.33026123046875,117.94475555419922],"flags":{"collapsed":false},"order":5,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2034}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2035],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["illustration of a singing clock with huge teeth in a surreal forest with torquiose mountains and a red and yellow sky, ragged trees and a pool of black oil on the ground, dripping paint oozing off the clock"]},{"id":640,"type":"Note","pos":[245.6206512451172,517.1527709960938],"size":[323.0928649902344,167.39759826660156],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["With the \"flow\" mode it is usually beneficial to use the supplemental GuideCond node, which allows you to set conditionings for the guide itself. With \"flow\", the guide changes during the sampling process. Without GuideCond in use, it will default to reusing your main prompt, which may result in some loss of adherence to the guide image.\n\n\"Lure\" is the only other mode that will use GuideCond."],"color":"#432","bgcolor":"#653"},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":8,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["lure",false,false,1,1,"linear_quadratic",0,13,false]}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"],[2032,638,0,630,6,"OPTIONS"],[2033,637,0,638,1,"CONDITIONING"],[2034,636,1,639,0,"CLIP"],[2035,639,0,638,0,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[1342.694620988285,531.4979770514516]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/hidream guide pseudoimplicit.json
================================================
{"last_node_id":641,"last_link_id":2035,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":8,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":9,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["pseudoimplicit",false,false,0.1,1,"beta57",0,5,false]}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[1182.8926069221118,636.9542766363238]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/hidream hires fix.json
================================================
{"last_node_id":1358,"last_link_id":3624,"nodes":[{"id":490,"type":"Reroute","pos":[13130,-70],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":3534}],"outputs":[{"name":"","type":"CLIP","links":[2881,3323],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1317,"type":"ClownModelLoader","pos":[12770,-90],"size":[315,266],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3539],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[3534],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[3535],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp16.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13253.044921875,283.4559020996094],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":18,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[3540],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1536,768,"red",false,"16_channels"]},{"id":13,"type":"Reroute","pos":[13130,-110],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":3539}],"outputs":[{"name":"","type":"MODEL","links":[3548,3597],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1224,"type":"CLIPTextEncode","pos":[13250,-90],"size":[269.0397644042969,155.65545654296875],"flags":{"collapsed":false},"order":17,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3323}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3480,3599],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a cold war era photograph from 1983 of a group of four friends holding up their hands inside an antique living room in a victorian era mansion"]},{"id":970,"type":"CLIPTextEncode","pos":[13253.0546875,116.28263854980469],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":16,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3600],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":14,"type":"Reroute","pos":[13130,-30],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":3535}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1322,"type":"ClownsharkChainsampler_Beta","pos":[14503.9365234375,-99.09358978271484],"size":[281.6568603515625,542.124755859375],"flags":{},"order":24,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3612},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3610},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3550],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,4,"resample",true]},{"id":1350,"type":"ClownOptions_Tile_Beta","pos":[14700,540],"size":[210,82],"flags":{},"order":19,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3614}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3615],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[1216,832]},{"id":1351,"type":"ClownOptions_Tile_Beta","pos":[14940,540],"size":[210,82],"flags":{},"order":21,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3615}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[1152,896]},{"id":1349,"type":"ClownOptions_Tile_Beta","pos":[14470,540],"size":[210,82],"flags":{},"order":15,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3616}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3614],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[1536,768]},{"id":1352,"type":"ClownOptions_Tile_Beta","pos":[14233.716796875,538.3314819335938],"size":[210,82],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3616],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[2048,1024]},{"id":1353,"type":"ClownOptions_Tile_Beta","pos":[14232.0498046875,680.947998046875],"size":[210,82],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[-1,-1]},{"id":1354,"type":"Note","pos":[14476.6044921875,675.5231323242188],"size":[258.67279052734375,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["As with the rest of RES4LYF nodes, \"-1\" means \"go to the end\" or \"max value\". In this case, that means \"use full image sizes\". So, the node to the left will be equivalent to the one above."],"color":"#432","bgcolor":"#653"},{"id":907,"type":"ClownsharKSampler_Beta","pos":[13550.5615234375,-92.92960357666016],"size":[301.752197265625,657.727294921875],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3548},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3480},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3540},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3618],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_3m","bong_tangent",30,15,1,4,4,"fixed","standard",true]},{"id":1355,"type":"LatentUpscale","pos":[13877.537109375,-92.35859680175781],"size":[286.32501220703125,130],"flags":{},"order":22,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3618}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3619],"slot_index":0}],"properties":{"Node name for S&R":"LatentUpscale"},"widgets_values":["nearest-exact",2048,1024,"disabled"]},{"id":1345,"type":"ClownOptions_Tile_Beta","pos":[13953.123046875,285.76708984375],"size":[210,82],"flags":{},"order":4,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3609,3610],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[1536,768]},{"id":909,"type":"SaveImage","pos":[14811.001953125,-99.0184555053711],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":26,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":908,"type":"VAEDecode","pos":[14808.998046875,-201.5235595703125],"size":[140,46],"flags":{},"order":25,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3550},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1356,"type":"Note","pos":[12793.412109375,-250.5360870361328],"size":[276.617431640625,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Want to use this workflow with another model? Just hook up a different model! You may need to set CFG = 1.0 if you're going to use a distilled model, such as HiDream Dev (or Fast) or Flux Dev."],"color":"#432","bgcolor":"#653"},{"id":1321,"type":"Note","pos":[12769.740234375,239.9431915283203],"size":[345.97113037109375,161.35496520996094],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["There are many samplers to try, but res_2m, res_3m, res_2s, and res_3s are very reliable. If you want to push quality a bit higher in exchange for time, you could even try res_5s.\n\nres_2m and res_3m begin with higher order steps (one res_2s step, and two res_3s steps, respectively) to initialize the sampling process. Ultimately, the result is faster convergence in terms of wall time, as fewer steps end up being necessary."],"color":"#432","bgcolor":"#653"},{"id":1347,"type":"Note","pos":[13505.927734375,-326.1947937011719],"size":[348.3962097167969,172.26731872558594],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Connect \"Upscale Latent\" directly to the last chainsampler to skip the iterative refinement steps (which is what implicit steps are: they use the output of a step as the input, then re-run it to refine). They help minimize mutations with a \"hires fix\" workflow like this.\n\n\"rebound\" is the highest quality implicit_type, but is also slightly slower.\n\nYou may also use ClownOptions Cycles instead of ClownOptions Implicit Steps."],"color":"#432","bgcolor":"#653"},{"id":1346,"type":"Note","pos":[13898.4658203125,421.6622314453125],"size":[261.7038269042969,363.83868408203125],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["If you use tiled sampling, be sure to choose tile sizes that will need to overlap each other, or you might see seams. For example, for 2048x1024, it would be unwise to choose 1024x1024 or 512x512 as your only tile size, as 2048 / 1024 = 1.0, 2048 / 512 = 4.0, etc.\n\nThis workflow will upscale to 2048x1024. 2048 is not divisible by 1536, and 1024 is not divisible by 768, thereofer they will have overlapping areas.\n\nIt's best to pick tile sizes that you know the model is trained at, with which you can generate txt2img without hallucination, doubling, mutations, \"grid\" artifacts, etc.\n\nTiled sampling will be slower, but can prevent drifts in luminosity, hue, artifacts around the edge of the image, and mutations, while reducing VRAM use. However, it can also cause parts of the image to look \"out of sync\". You can alternate tile sizes like shown to the right, which can sometimes help."],"color":"#432","bgcolor":"#653"},{"id":1324,"type":"ClownsharkChainsampler_Beta","pos":[14189.3935546875,-89.69397735595703],"size":[285.5440673828125,552.053955078125],"flags":{},"order":23,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3597},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3599},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3600},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3619},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3624},{"name":"options 2","type":"OPTIONS","link":3609},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3612],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,4,"resample",true]},{"id":1325,"type":"ClownOptions_ImplicitSteps_Beta","pos":[13884.9677734375,94.86456298828125],"size":[278.0316467285156,130],"flags":{},"order":9,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_ImplicitSteps_Beta"},"widgets_values":["rebound","bongmath",10,0]},{"id":1357,"type":"Note","pos":[14184.4599609375,-302.5225830078125],"size":[305.0502014160156,150.26080322265625],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The sampling will appear to froze for a minute at this node, but it is not actually frozen. Reducing implicit_steps or cycles will speed things up.\n\nIf you are willing to use a slower sampler to improve quality, the biggest bang for your buck will be with this first chainsampler. Try changing the sampler_name to res_3s, or gauss-legendre_2s.\n"],"color":"#432","bgcolor":"#653"},{"id":1358,"type":"ClownOptions_Cycles_Beta","pos":[13880.7060546875,-310.925537109375],"size":[280.4444274902344,154],"flags":{},"order":11,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3624],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[6,1,0.5,"none",4]}],"links":[[18,14,0,7,4,"VAE"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[3323,490,0,1224,0,"CLIP"],[3480,1224,0,907,1,"CONDITIONING"],[3534,1317,1,490,0,"*"],[3535,1317,2,14,0,"*"],[3539,1317,0,13,0,"*"],[3540,7,3,907,3,"LATENT"],[3548,13,0,907,0,"MODEL"],[3550,1322,0,908,0,"LATENT"],[3597,13,0,1324,0,"MODEL"],[3599,1224,0,1324,1,"CONDITIONING"],[3600,970,0,1324,2,"CONDITIONING"],[3609,1345,0,1324,7,"OPTIONS"],[3610,1345,0,1322,6,"OPTIONS"],[3612,1324,0,1322,4,"LATENT"],[3614,1349,0,1350,0,"OPTIONS"],[3615,1350,0,1351,0,"OPTIONS"],[3616,1352,0,1349,0,"OPTIONS"],[3618,907,0,1355,0,"LATENT"],[3619,1355,0,1324,4,"LATENT"],[3624,1358,0,1324,6,"OPTIONS"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.9194342495775452,"offset":[-11744.076730306608,403.1731222243355]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/hidream regional 3 zones.json
================================================
{"last_node_id":612,"last_link_id":1834,"nodes":[{"id":13,"type":"Reroute","pos":[580,-180],"size":[75,26],"flags":{},"order":18,"mode":0,"inputs":[{"name":"","type":"*","link":1611}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":404,"type":"VAELoader","pos":[328.6705627441406,5.664919376373291],"size":[210,58],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[1344],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["ae.sft"]},{"id":402,"type":"QuadrupleCLIPLoader","pos":[130,-170],"size":[407.7720031738281,130],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[1552],"slot_index":0}],"properties":{"Node name for S&R":"QuadrupleCLIPLoader"},"widgets_values":["clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors"]},{"id":403,"type":"UNETLoader","pos":[216.5030059814453,-297.7170715332031],"size":[320.7802429199219,82],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[1610],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn"]},{"id":14,"type":"Reroute","pos":[580,-100],"size":[75,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"","type":"*","link":1344}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[695.769287109375,-369.69635009765625],"size":[260.3999938964844,126],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1680],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":490,"type":"Reroute","pos":[580.390380859375,-139.51483154296875],"size":[75,26],"flags":{},"order":8,"mode":0,"inputs":[{"name":"","type":"*","link":1552}],"outputs":[{"name":"","type":"CLIP","links":[1559,1691,1693,1707],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":394,"type":"CLIPTextEncode","pos":[694.6102905273438,168.60507202148438],"size":[264.9925842285156,127.11075592041016],"flags":{},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1559}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1355],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["bright light, well-lit, daylight, monotone, desaturated, professional photography, blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":398,"type":"SaveImage","pos":[1387.6151123046875,-268.26824951171875],"size":[603.7825927734375,598.39404296875],"flags":{},"order":23,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":608,"type":"ImageToMask","pos":[478.4993896484375,-645.0528564453125],"size":[210,58],"flags":{},"order":12,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1809}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1807],"slot_index":0}],"properties":{"Node name for S&R":"ImageToMask"},"widgets_values":["red"]},{"id":397,"type":"VAEDecode","pos":[1388.41064453125,-374.6264953613281],"size":[210,46],"flags":{},"order":22,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1815},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":605,"type":"LoadImage","pos":[-140,-900],"size":[210,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1810],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (446).png","image"]},{"id":603,"type":"LoadImage","pos":[-130,-1280],"size":[210,314],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1811],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (444).png","image"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[696.7778930664062,-164.97328186035156],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":13,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398,1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,2048,"red",false,"16_channels"]},{"id":540,"type":"CLIPTextEncode","pos":[743.9880981445312,-978.6345825195312],"size":[275.3782653808594,125.7564697265625],"flags":{},"order":17,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1707}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1814],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a charcoal drawing of the top of a skyscraper"]},{"id":520,"type":"CLIPTextEncode","pos":[740,-790],"size":[275.3782653808594,125.7564697265625],"flags":{},"order":16,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1693}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1813],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a children's messy crayon drawing of the middle floors of a skyscraper"]},{"id":455,"type":"CLIPTextEncode","pos":[740,-600],"size":[285.3899230957031,125.00720977783203],"flags":{"collapsed":false},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1691}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1812],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a close up high quality cinematic color photograph of the base of an office building in a city park in wisconsin"]},{"id":606,"type":"ImageToMask","pos":[484.2362976074219,-962.7913818359375],"size":[210,58],"flags":{},"order":11,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1811}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1806],"slot_index":0}],"properties":{"Node name for S&R":"ImageToMask"},"widgets_values":["red"]},{"id":607,"type":"ImageToMask","pos":[478.7450256347656,-798.6764526367188],"size":[210,58],"flags":{},"order":10,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1810}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1808],"slot_index":0}],"properties":{"Node name for S&R":"ImageToMask"},"widgets_values":["red"]},{"id":604,"type":"LoadImage","pos":[-150,-510],"size":[210,314],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1809],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (445).png","image"]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":21,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1680},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1834},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1355},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1815],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",20,-1,1,4,86,"fixed","standard",true]},{"id":533,"type":"ClownRegionalConditioning_ABC","pos":[1087.326904296875,-873.5692138671875],"size":[243.60000610351562,390],"flags":{},"order":19,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":1814},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":1813},{"name":"conditioning_C","localized_name":"conditioning_C","type":"CONDITIONING","shape":7,"link":1812},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":1806},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":1808},{"name":"mask_C","localized_name":"mask_C","type":"MASK","shape":7,"link":1807},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[1834],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning_ABC"},"widgets_values":[-0.9,-0.25,0,"constant",0,-1,"boolean",256,false]},{"id":612,"type":"Note","pos":[159.41253662109375,-707.9190063476562],"size":[210,99.94182586669922],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["It is critical that each part of the image is covered by one of these masks."],"color":"#432","bgcolor":"#653"},{"id":512,"type":"ReHiDreamPatcher","pos":[212.8125762939453,-444.52001953125],"size":[320.9115295410156,82],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1610}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1611],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float32",true]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1344,404,0,14,0,"*"],[1355,394,0,401,2,"CONDITIONING"],[1395,13,0,431,0,"MODEL"],[1398,7,3,431,1,"LATENT"],[1399,7,3,401,3,"LATENT"],[1552,402,0,490,0,"*"],[1559,490,0,394,0,"CLIP"],[1610,403,0,512,0,"MODEL"],[1611,512,0,13,0,"*"],[1680,431,0,401,0,"MODEL"],[1691,490,0,455,0,"CLIP"],[1693,490,0,520,0,"CLIP"],[1707,490,0,540,0,"CLIP"],[1806,606,0,533,3,"MASK"],[1807,608,0,533,5,"MASK"],[1808,607,0,533,4,"MASK"],[1809,604,0,608,0,"IMAGE"],[1810,605,0,607,0,"IMAGE"],[1811,603,0,606,0,"IMAGE"],[1812,455,0,533,2,"CONDITIONING"],[1813,520,0,533,1,"CONDITIONING"],[1814,540,0,533,0,"CONDITIONING"],[1815,401,0,397,0,"LATENT"],[1834,533,0,401,1,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.3109994191500227,"offset":[2330.291089462677,1329.1104989082662]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/hidream regional antiblur.json
================================================
{"last_node_id":727,"last_link_id":2103,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":2098}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":2100}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":21,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":701,"type":"Note","pos":[80,-520],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[-210,-520],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":676,"type":"InvertMask","pos":[20,-370],"size":[142.42074584960938,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2073}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2083],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":662,"type":"CLIPTextEncode","pos":[460,-370],"size":[210,88],"flags":{"collapsed":false},"order":13,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2094],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[719.6110229492188,16.752899169921875],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":16,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":710,"type":"MaskPreview","pos":[180,-190],"size":[210,246],"flags":{},"order":17,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":20,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":715,"type":"SolidMask","pos":[-220,-370],"size":[210,106],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2073],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1024,1024]},{"id":716,"type":"SolidMask","pos":[-220,-220],"size":[210,106],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2065],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,384,864]},{"id":709,"type":"MaskComposite","pos":[190,-370],"size":[210,126],"flags":{},"order":11,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2083},{"name":"source","localized_name":"source","type":"MASK","link":2065}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2091],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[256,160,"add"]},{"id":704,"type":"Note","pos":[101.74818420410156,112.67951965332031],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":703,"type":"Note","pos":[423.10699462890625,-96.14085388183594],"size":[241.9689483642578,386.7543640136719],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask.\n\nboolean_masked means that the masked area can \"see\" the rest of the image, but the unmasked area cannot. \"boolean\" would mean neither area could see the rest of the image.\n\nTry setting to boolean_unmasked and see what happens!\n\nIf you still have blur, try reducing edge_width (and if you have seams, try increasing it, or setting end_step to something like 20). \n\nAlso verify that you can generate the background prompt alone without blur (if you can't, this won't work). And don't get stuck on one seed.\n\nVaguely human-shaped masks also tend to work better than the blocky one used here."],"color":"#432","bgcolor":"#653"},{"id":725,"type":"ReHiDreamPatcher","pos":[1009.8884887695312,-694.5361328125],"size":[210,82],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2097}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2098],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float64",true]},{"id":724,"type":"ClownModelLoader","pos":[660.0880126953125,-695.142333984375],"size":[315,266],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2097],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2099],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2100],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":722,"type":"ClownRegionalConditioning2","pos":[690,-370],"size":[287.75750732421875,330],"flags":{},"order":18,"mode":0,"inputs":[{"name":"conditioning_masked","localized_name":"conditioning_masked","type":"CONDITIONING","shape":7,"link":2094},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":2093},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2091},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[2095],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning2"},"widgets_values":[0.9,0.1,0,"constant",0,-1,"boolean_masked",32,false]},{"id":723,"type":"CLIPTextEncode","pos":[460,-240],"size":[210,88],"flags":{"collapsed":false},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2092}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2093],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a college campus"]},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":2099}],"outputs":[{"name":"","type":"CLIP","links":[1939,2092,2102],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":727,"type":"CLIPTextEncode","pos":[721.318359375,349.4079895019531],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2102}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2103],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2095},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2103},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,4,0,"fixed","standard",true]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1967,13,0,401,0,"MODEL"],[2054,709,0,710,0,"MASK"],[2065,716,0,709,1,"MASK"],[2073,715,0,676,0,"MASK"],[2083,676,0,709,0,"MASK"],[2091,709,0,722,2,"MASK"],[2092,490,0,723,0,"CLIP"],[2093,723,0,722,1,"CONDITIONING"],[2094,662,0,722,0,"CONDITIONING"],[2095,722,0,401,1,"CONDITIONING"],[2096,401,0,397,0,"LATENT"],[2097,724,0,725,0,"MODEL"],[2098,725,0,13,0,"*"],[2099,724,1,490,0,"*"],[2100,724,2,14,0,"*"],[2102,490,0,727,0,"CLIP"],[2103,727,0,401,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.91943424957756,"offset":[1345.3511333682184,704.1505917671295]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/hidream style antiblur.json
================================================
{"last_node_id":742,"last_link_id":2119,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"","type":"*","link":2115}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":5,"mode":0,"inputs":[{"name":"","type":"*","link":2116}],"outputs":[{"name":"","type":"CLIP","links":[1939,2119],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":6,"mode":0,"inputs":[{"name":"","type":"*","link":2117}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":14,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":13,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":7,"type":"VAEEncodeAdvanced","pos":[412.2475280761719,-199.0681915283203],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":10,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2113},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2100],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":662,"type":"CLIPTextEncode","pos":[761.3005981445312,-357.2689208984375],"size":[210,102.54972839355469],"flags":{"collapsed":false},"order":8,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2098],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat, a college campus, brick buildings"]},{"id":727,"type":"Note","pos":[412.8926086425781,-351.8606872558594],"size":[272.4425048828125,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This approach can be combined with the regional conditioning anti-blur approach for an even more powerful effect."],"color":"#432","bgcolor":"#653"},{"id":724,"type":"ClownGuide_Style_Beta","pos":[703.7374267578125,-198.63233947753906],"size":[262.8634033203125,286],"flags":{},"order":11,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2100},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2099],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,10,false]},{"id":739,"type":"LoadImage","pos":[70.82455444335938,-201.66342163085938],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2113],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (655).png","image"]},{"id":741,"type":"ReHiDreamPatcher","pos":[1000,-680],"size":[210,82],"flags":{},"order":4,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2114}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2115],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float64",true]},{"id":740,"type":"ClownModelLoader","pos":[650,-680],"size":[315,266],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2114],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2116],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2117],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":12,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2098},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2118},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2099},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,4,7,"fixed","standard",true]},{"id":742,"type":"CLIPTextEncode","pos":[703.5707397460938,144.26979064941406],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":9,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2119}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2118],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":726,"type":"Note","pos":[305.74163818359375,169.59754943847656],"size":[364.5906677246094,164.38613891601562],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The best style guide images will share the lighting and color composition of your desired scene. Some are just inexplicably ineffective at killing blur. Just gather up a bunch of images to try, you'll find some good ones that can be reused for many things. I'm including the one used here in the example_workflows directory, be sure to check for it.\n\nAnd don't forget to change seeds. Don't optimize for one seed only. Don't get stuck on one seed! Sometimes one is just not going to work out for whatever you're doing."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1967,13,0,401,0,"MODEL"],[2096,401,0,397,0,"LATENT"],[2098,662,0,401,1,"CONDITIONING"],[2099,724,0,401,5,"GUIDES"],[2100,7,0,724,0,"LATENT"],[2113,739,0,7,0,"IMAGE"],[2114,740,0,741,0,"MODEL"],[2115,741,0,13,0,"*"],[2116,740,1,490,0,"*"],[2117,740,2,14,0,"*"],[2118,742,0,401,2,"CONDITIONING"],[2119,490,0,742,0,"CLIP"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7449402268886909,"offset":[1731.8135682982838,807.2501654184575]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/hidream style transfer txt2img.json
================================================
{"last_node_id":1385,"last_link_id":3733,"nodes":[{"id":13,"type":"Reroute","pos":[13508.9013671875,-109.2831802368164],"size":[75,26],"flags":{},"order":17,"mode":0,"inputs":[{"name":"","type":"*","link":3686}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[13508.9013671875,-29.283178329467773],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":3671}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[13508.9013671875,-69.28317260742188],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":3670}],"outputs":[{"name":"","type":"CLIP","links":[2881,3581],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1363,"type":"ReHiDreamPatcher","pos":[13268.9013671875,-109.2831802368164],"size":[210,82],"flags":{},"order":12,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3685}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3686],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float64",true]},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14758.255859375,-64.39308166503906],"size":[340.20001220703125,510],"flags":{},"order":29,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3698},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,4,"resample",true]},{"id":1318,"type":"ClownGuide_Beta","pos":[13828.255859375,675.60693359375],"size":[263.102783203125,290],"flags":{},"order":24,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3710},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3699,3708],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.7,1,"constant",0,-1,false]},{"id":1333,"type":"CLIPTextEncode","pos":[13688.255859375,-44.393089294433594],"size":[280.6252746582031,164.06936645507812],"flags":{"collapsed":false},"order":19,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3581}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3602,3626],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["messy blackboard chalk drawing of the inside of a car driving down a creepy road. colorful chalk with shading that shows the chalk textures from drawing with the side of the chalk\n"]},{"id":1358,"type":"ClownModelLoader","pos":[12828.9013671875,-299.2831726074219],"size":[341.7054443359375,266],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3685],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[3670],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[3671],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","clip_g_hidream.safetensors","clip_l_hidream.safetensors","hidream","ae.sft"]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[13218.9013671875,-309.28314208984375],"size":[260.3999938964844,126],"flags":{},"order":25,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2692],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":970,"type":"CLIPTextEncode","pos":[13688.255859375,165.60690307617188],"size":[281.9206848144531,109.87118530273438],"flags":{},"order":18,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3627],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, jpeg artifacts, low quality, bad quality, unsharp"]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[14008.255859375,-64.39308166503906],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2692},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3602},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3708},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3578],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","beta57",20,11,1,1,201,"fixed","unsample",true]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[14378.255859375,-64.39308166503906],"size":[340.20001220703125,570],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3626},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3627},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3578},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3604},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":3707},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3698],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_3s_non-monotonic",1,4,"resample",true]},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[14408.255859375,-294.3930969238281],"size":[265.2884826660156,178],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,0.5,"none",-1,4]},{"id":1373,"type":"LoadImage","pos":[12848.2666015625,531.6068115234375],"size":[315,314],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3721],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Composition)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (476).png","image"]},{"id":1374,"type":"LoadImage","pos":[12838.2666015625,171.6068115234375],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3725],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Style Guide)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14627_.png","image"]},{"id":1378,"type":"Reroute","pos":[13184.07421875,533.128662109375],"size":[75,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"","type":"*","link":3721}],"outputs":[{"name":"","type":"IMAGE","links":[3724,3729],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1379,"type":"Reroute","pos":[13185.853515625,168.15780639648438],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":3725}],"outputs":[{"name":"","type":"IMAGE","links":[3726],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[15220,-259.5838928222656],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":31,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1362,"type":"PreviewImage","pos":[13317.849609375,617.1558837890625],"size":[210,246],"flags":{},"order":22,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3682}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1350,"type":"ColorMatch","pos":[13709.701171875,316.05731201171875],"size":[210,102],"flags":{"collapsed":false},"order":21,"mode":0,"inputs":[{"name":"image_ref","localized_name":"image_ref","type":"IMAGE","link":3728},{"name":"image_target","localized_name":"image_target","type":"IMAGE","link":3724}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3682,3688],"slot_index":0}],"properties":{"Node name for S&R":"ColorMatch"},"widgets_values":["mkl",1]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13343.19140625,556.8784790039062],"size":[261.2217712402344,298],"flags":{"collapsed":true},"order":23,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3688},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3727},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18},{"name":"width","type":"INT","pos":[10,160.00003051757812],"widget":{"name":"width"},"link":3732},{"name":"height","type":"INT","pos":[10,184.00003051757812],"widget":{"name":"height"},"link":3733}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983,3710],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3709],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1371,"type":"Image Repeat Tile To Size","pos":[13329.5947265625,497.8262939453125],"size":[210,146],"flags":{"collapsed":true},"order":20,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3726},{"name":"width","type":"INT","pos":[10,36],"widget":{"name":"width"},"link":3730},{"name":"height","type":"INT","pos":[10,60],"widget":{"name":"height"},"link":3731}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3727,3728],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":1380,"type":"SetImageSize","pos":[13324.7197265625,323.0480041503906],"size":[210,102],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[3730,3732],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[3731,3733],"slot_index":1}],"properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1344,768]},{"id":1377,"type":"Image Comparer (rgthree)","pos":[15742.4619140625,-253.3526153564453],"size":[461.9190368652344,413.5953369140625],"flags":{},"order":32,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":3720},{"name":"image_b","type":"IMAGE","dir":3,"link":3729}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_pzczy_00003_.png&type=temp&subfolder=&rand=0.543351218901418"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_pzczy_00004_.png&type=temp&subfolder=&rand=0.38178761627111313"}]]},{"id":908,"type":"VAEDecode","pos":[15217.7802734375,-312.1965637207031],"size":[210,46],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697,3720],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1376,"type":"Note","pos":[13703.0439453125,536.6895751953125],"size":[261.9539489746094,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease weight in ClownGuide to alter adherence to the input image."],"color":"#432","bgcolor":"#653"},{"id":1383,"type":"Note","pos":[14428.40234375,580.1749877929688],"size":[261.9539489746094,88],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Samplers like res_2s in this cycling node will also work and are faster. res_2m and res_3m are even faster, but sometimes the effect takes longer in wall time to fully kick in."],"color":"#432","bgcolor":"#653"},{"id":1384,"type":"Note","pos":[14793.0322265625,518.4120483398438],"size":[261.9539489746094,88],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_2m or res_3m can be used here instead and are faster, but are less likely to fully clean up lingering artifacts."],"color":"#432","bgcolor":"#653"},{"id":1328,"type":"ClownOptions_SDE_Beta","pos":[14186.4755859375,-132.6126251220703],"size":[315,266],"flags":{"collapsed":true},"order":8,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3707],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1381,"type":"Note","pos":[13881.6279296875,-217.62835693359375],"size":[261.9539489746094,88],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease \"steps_to_run\" in ClownsharKSampler to change the effective denoise level."],"color":"#432","bgcolor":"#653"},{"id":1382,"type":"Note","pos":[14718.0498046875,-295.4144592285156],"size":[268.1851806640625,124.49711608886719],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increasing cycles will increase the amount of change, but take longer.\n\nCycles will rerun the same step over and over, forwards and backwards, iteratively refining an image at a controlled noise level."],"color":"#432","bgcolor":"#653"},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[14108.255859375,675.60693359375],"size":[246.31312561035156,286],"flags":{},"order":26,"mode":4,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3709},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3699}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3604],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":1385,"type":"Note","pos":[14396.5634765625,742.3948364257812],"size":[261.9539489746094,88],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["method = AdaIN is faster and uses less memory, but is less accurate. Some prefer the effect."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1395,13,0,431,0,"MODEL"],[1398,7,3,431,1,"LATENT"],[2692,431,0,907,0,"MODEL"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2983,7,0,907,3,"LATENT"],[3469,981,0,908,0,"LATENT"],[3533,1317,0,980,6,"OPTIONS"],[3578,907,0,980,4,"LATENT"],[3581,490,0,1333,0,"CLIP"],[3602,1333,0,907,1,"CONDITIONING"],[3604,1308,0,980,5,"GUIDES"],[3626,1333,0,980,1,"CONDITIONING"],[3627,970,0,980,2,"CONDITIONING"],[3670,1358,1,490,0,"*"],[3671,1358,2,14,0,"*"],[3682,1350,0,1362,0,"IMAGE"],[3685,1358,0,1363,0,"MODEL"],[3686,1363,0,13,0,"*"],[3688,1350,0,7,0,"IMAGE"],[3698,980,0,981,4,"LATENT"],[3699,1318,0,1308,3,"GUIDES"],[3707,1328,0,980,7,"OPTIONS"],[3708,1318,0,907,5,"GUIDES"],[3709,7,1,1308,0,"LATENT"],[3710,7,0,1318,0,"LATENT"],[3720,908,0,1377,0,"IMAGE"],[3721,1373,0,1378,0,"*"],[3724,1378,0,1350,1,"IMAGE"],[3725,1374,0,1379,0,"*"],[3726,1379,0,1371,0,"IMAGE"],[3727,1371,0,7,1,"IMAGE"],[3728,1371,0,1350,0,"IMAGE"],[3729,1378,0,1377,1,"IMAGE"],[3730,1380,0,1371,1,"INT"],[3731,1380,1,1371,2,"INT"],[3732,1380,0,7,5,"INT"],[3733,1380,1,7,6,"INT"]],"groups":[{"id":1,"title":"Model Loaders","bounding":[12796.72265625,-401.9004211425781,822.762451171875,436.0693359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Sampling","bounding":[13652.6533203125,-402.70721435546875,1470.8076171875,1409.0289306640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Input Prep","bounding":[12797.1396484375,77.69412231445312,817.4218139648438,820.6239624023438],"color":"#3f789e","font_size":24,"flags":{}},{"id":4,"title":"Save and Compare","bounding":[15180.705078125,-399.09112548828125,1050.6468505859375,615.8845825195312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.3072020475058237,"offset":[-11012.049075449982,623.0809311059861]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/hidream style transfer v2.json
================================================
{"last_node_id":1385,"last_link_id":3733,"nodes":[{"id":13,"type":"Reroute","pos":[13508.9013671875,-109.2831802368164],"size":[75,26],"flags":{},"order":17,"mode":0,"inputs":[{"name":"","type":"*","link":3686}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[13508.9013671875,-29.283178329467773],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":3671}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[13508.9013671875,-69.28317260742188],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":3670}],"outputs":[{"name":"","type":"CLIP","links":[2881,3581],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1363,"type":"ReHiDreamPatcher","pos":[13268.9013671875,-109.2831802368164],"size":[210,82],"flags":{},"order":12,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3685}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3686],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float64",true]},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14758.255859375,-64.39308166503906],"size":[340.20001220703125,510],"flags":{},"order":29,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3698},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,4,"resample",true]},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[14108.255859375,675.60693359375],"size":[246.31312561035156,286],"flags":{},"order":26,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3709},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3699}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3604],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":1318,"type":"ClownGuide_Beta","pos":[13828.255859375,675.60693359375],"size":[263.102783203125,290],"flags":{},"order":24,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3710},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3699,3708],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.7,1,"constant",0,-1,false]},{"id":1333,"type":"CLIPTextEncode","pos":[13688.255859375,-44.393089294433594],"size":[280.6252746582031,164.06936645507812],"flags":{"collapsed":false},"order":19,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3581}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3602,3626],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["messy blackboard chalk drawing of the inside of a car driving down a creepy road. colorful chalk with shading that shows the chalk textures from drawing with the side of the chalk\n"]},{"id":1358,"type":"ClownModelLoader","pos":[12828.9013671875,-299.2831726074219],"size":[341.7054443359375,266],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3685],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[3670],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[3671],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","clip_g_hidream.safetensors","clip_l_hidream.safetensors","hidream","ae.sft"]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[13218.9013671875,-309.28314208984375],"size":[260.3999938964844,126],"flags":{},"order":25,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2692],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":970,"type":"CLIPTextEncode","pos":[13688.255859375,165.60690307617188],"size":[281.9206848144531,109.87118530273438],"flags":{},"order":18,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3627],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, jpeg artifacts, low quality, bad quality, unsharp"]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[14008.255859375,-64.39308166503906],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2692},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3602},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3708},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3578],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","beta57",20,11,1,1,201,"fixed","unsample",true]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[14378.255859375,-64.39308166503906],"size":[340.20001220703125,570],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3626},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3627},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3578},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3604},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":3707},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3698],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_3s_non-monotonic",1,4,"resample",true]},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[14408.255859375,-294.3930969238281],"size":[265.2884826660156,178],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,0.5,"none",-1,4]},{"id":1373,"type":"LoadImage","pos":[12848.2666015625,531.6068115234375],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3721],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Composition)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (476).png","image"]},{"id":1374,"type":"LoadImage","pos":[12838.2666015625,171.6068115234375],"size":[315,314],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3725],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Style Guide)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14627_.png","image"]},{"id":1378,"type":"Reroute","pos":[13184.07421875,533.128662109375],"size":[75,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"","type":"*","link":3721}],"outputs":[{"name":"","type":"IMAGE","links":[3724,3729],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1379,"type":"Reroute","pos":[13185.853515625,168.15780639648438],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":3725}],"outputs":[{"name":"","type":"IMAGE","links":[3726],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[15220,-259.5838928222656],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":31,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1362,"type":"PreviewImage","pos":[13317.849609375,617.1558837890625],"size":[210,246],"flags":{},"order":22,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3682}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1350,"type":"ColorMatch","pos":[13709.701171875,316.05731201171875],"size":[210,102],"flags":{"collapsed":false},"order":21,"mode":0,"inputs":[{"name":"image_ref","localized_name":"image_ref","type":"IMAGE","link":3728},{"name":"image_target","localized_name":"image_target","type":"IMAGE","link":3724}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3682,3688],"slot_index":0}],"properties":{"Node name for S&R":"ColorMatch"},"widgets_values":["mkl",1]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13343.19140625,556.8784790039062],"size":[261.2217712402344,298],"flags":{"collapsed":true},"order":23,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3688},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3727},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18},{"name":"width","type":"INT","pos":[10,160.00003051757812],"widget":{"name":"width"},"link":3732},{"name":"height","type":"INT","pos":[10,184.00003051757812],"widget":{"name":"height"},"link":3733}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983,3710],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3709],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1371,"type":"Image Repeat Tile To Size","pos":[13329.5947265625,497.8262939453125],"size":[210,146],"flags":{"collapsed":true},"order":20,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3726},{"name":"width","type":"INT","pos":[10,36],"widget":{"name":"width"},"link":3730},{"name":"height","type":"INT","pos":[10,60],"widget":{"name":"height"},"link":3731}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3727,3728],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":1380,"type":"SetImageSize","pos":[13324.7197265625,323.0480041503906],"size":[210,102],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[3730,3732],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[3731,3733],"slot_index":1}],"properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1344,768]},{"id":1377,"type":"Image Comparer (rgthree)","pos":[15742.4619140625,-253.3526153564453],"size":[461.9190368652344,413.5953369140625],"flags":{},"order":32,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":3720},{"name":"image_b","type":"IMAGE","dir":3,"link":3729}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_pzczy_00001_.png&type=temp&subfolder=&rand=0.2568823425587843"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_pzczy_00002_.png&type=temp&subfolder=&rand=0.9444625525852213"}]]},{"id":908,"type":"VAEDecode","pos":[15217.7802734375,-312.1965637207031],"size":[210,46],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697,3720],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1376,"type":"Note","pos":[13703.0439453125,536.6895751953125],"size":[261.9539489746094,88],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease weight in ClownGuide to alter adherence to the input image."],"color":"#432","bgcolor":"#653"},{"id":1383,"type":"Note","pos":[14428.40234375,580.1749877929688],"size":[261.9539489746094,88],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Samplers like res_2s in this cycling node will also work and are faster. res_2m and res_3m are even faster, but sometimes the effect takes longer in wall time to fully kick in."],"color":"#432","bgcolor":"#653"},{"id":1384,"type":"Note","pos":[14793.0322265625,518.4120483398438],"size":[261.9539489746094,88],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_2m or res_3m can be used here instead and are faster, but are less likely to fully clean up lingering artifacts."],"color":"#432","bgcolor":"#653"},{"id":1385,"type":"Note","pos":[14398.345703125,768.2096557617188],"size":[261.9539489746094,88],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["method = AdaIN is faster and uses less memory, but is less accurate. Some prefer the effect."],"color":"#432","bgcolor":"#653"},{"id":1328,"type":"ClownOptions_SDE_Beta","pos":[14186.4755859375,-132.6126251220703],"size":[315,266],"flags":{"collapsed":true},"order":1,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3707],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1381,"type":"Note","pos":[13881.6279296875,-217.62835693359375],"size":[261.9539489746094,88],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease \"steps_to_run\" in ClownsharKSampler to change the effective denoise level."],"color":"#432","bgcolor":"#653"},{"id":1382,"type":"Note","pos":[14718.0498046875,-295.4144592285156],"size":[268.1851806640625,124.49711608886719],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increasing cycles will increase the amount of change, but take longer.\n\nCycles will rerun the same step over and over, forwards and backwards, iteratively refining an image at a controlled noise level."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1395,13,0,431,0,"MODEL"],[1398,7,3,431,1,"LATENT"],[2692,431,0,907,0,"MODEL"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2983,7,0,907,3,"LATENT"],[3469,981,0,908,0,"LATENT"],[3533,1317,0,980,6,"OPTIONS"],[3578,907,0,980,4,"LATENT"],[3581,490,0,1333,0,"CLIP"],[3602,1333,0,907,1,"CONDITIONING"],[3604,1308,0,980,5,"GUIDES"],[3626,1333,0,980,1,"CONDITIONING"],[3627,970,0,980,2,"CONDITIONING"],[3670,1358,1,490,0,"*"],[3671,1358,2,14,0,"*"],[3682,1350,0,1362,0,"IMAGE"],[3685,1358,0,1363,0,"MODEL"],[3686,1363,0,13,0,"*"],[3688,1350,0,7,0,"IMAGE"],[3698,980,0,981,4,"LATENT"],[3699,1318,0,1308,3,"GUIDES"],[3707,1328,0,980,7,"OPTIONS"],[3708,1318,0,907,5,"GUIDES"],[3709,7,1,1308,0,"LATENT"],[3710,7,0,1318,0,"LATENT"],[3720,908,0,1377,0,"IMAGE"],[3721,1373,0,1378,0,"*"],[3724,1378,0,1350,1,"IMAGE"],[3725,1374,0,1379,0,"*"],[3726,1379,0,1371,0,"IMAGE"],[3727,1371,0,7,1,"IMAGE"],[3728,1371,0,1350,0,"IMAGE"],[3729,1378,0,1377,1,"IMAGE"],[3730,1380,0,1371,1,"INT"],[3731,1380,1,1371,2,"INT"],[3732,1380,0,7,5,"INT"],[3733,1380,1,7,6,"INT"]],"groups":[{"id":1,"title":"Model Loaders","bounding":[12796.72265625,-401.9004211425781,822.762451171875,436.0693359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Sampling","bounding":[13652.6533203125,-402.70721435546875,1470.8076171875,1409.0289306640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Input Prep","bounding":[12797.1396484375,77.69412231445312,817.4218139648438,820.6239624023438],"color":"#3f789e","font_size":24,"flags":{}},{"id":4,"title":"Save and Compare","bounding":[15180.705078125,-399.09112548828125,1050.6468505859375,615.8845825195312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.3072020475058237,"offset":[-10982.673431174471,526.9422127403179]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/hidream style transfer.json
================================================
{"last_node_id":1317,"last_link_id":3533,"nodes":[{"id":13,"type":"Reroute","pos":[13140,110],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":3509}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":402,"type":"QuadrupleCLIPLoader","pos":[12690,150],"size":[407.7720031738281,130],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[1552],"slot_index":0}],"properties":{"Node name for S&R":"QuadrupleCLIPLoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors"]},{"id":490,"type":"Reroute","pos":[13140,150],"size":[75,26],"flags":{},"order":6,"mode":0,"inputs":[{"name":"","type":"*","link":1552}],"outputs":[{"name":"","type":"CLIP","links":[2881,3323],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14277.9453125,-92.8893051147461],"size":[340.20001220703125,510],"flags":{},"order":17,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3250},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,4,"resample",true]},{"id":908,"type":"VAEDecode","pos":[14640.490234375,-94.68604278564453],"size":[210,46],"flags":{},"order":18,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":909,"type":"SaveImage","pos":[14635.966796875,4.407815933227539],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":19,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[13253.2275390625,-90.14451599121094],"size":[260.3999938964844,126],"flags":{},"order":14,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2692],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":14,"type":"Reroute","pos":[13140,190],"size":[75,26],"flags":{},"order":8,"mode":0,"inputs":[{"name":"","type":"*","link":1344}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":403,"type":"UNETLoader","pos":[12780,20],"size":[320.7802429199219,82],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[3508],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn"]},{"id":404,"type":"VAELoader","pos":[12887.7998046875,328.069091796875],"size":[210,58],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[1344],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ae.sft"]},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[13637.08984375,660.7327270507812],"size":[246.31312561035156,286],"flags":{},"order":13,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3531},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3530],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[13918.0234375,-98.65141296386719],"size":[340.20001220703125,570],"flags":{},"order":16,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2971},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3530},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3250],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",1,4,"resample",true]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13250.6240234375,672.3837890625],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":12,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3515},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3532},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3531],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",896,1152,"red",false,"16_channels"]},{"id":1285,"type":"LoadImage","pos":[12887.7626953125,444.2932434082031],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3515],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (544).png","image"]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[13550.5615234375,-92.92960357666016],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":15,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2692},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3480},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2971],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","beta57",20,14,1,4,201,"fixed","unsample",true]},{"id":1309,"type":"LoadImage","pos":[12889.3486328125,815.3554077148438],"size":[315,314],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3532],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image Apr 29, 2025, 09_18_46 PM.png","image"]},{"id":1297,"type":"ReHiDreamPatcher","pos":[12779.865234375,-110.67424774169922],"size":[321.6453552246094,82],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3508}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3509],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["float32",true]},{"id":1224,"type":"CLIPTextEncode","pos":[13247.2734375,95.37741088867188],"size":[269.0397644042969,155.65545654296875],"flags":{"collapsed":false},"order":10,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3323}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3480],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a gritty illustration of a japanese woman with traditional hair in traditional clothes"]},{"id":970,"type":"CLIPTextEncode","pos":[13257.970703125,316.4944152832031],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":9,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[13959.880859375,541.2625122070312],"size":[265.2884826660156,178],"flags":{},"order":5,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[20,1,0.5,"none",-1,4]}],"links":[[18,14,0,7,4,"VAE"],[1344,404,0,14,0,"*"],[1395,13,0,431,0,"MODEL"],[1398,7,3,431,1,"LATENT"],[1552,402,0,490,0,"*"],[2692,431,0,907,0,"MODEL"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2971,907,0,980,4,"LATENT"],[2983,7,0,907,3,"LATENT"],[3250,980,0,981,4,"LATENT"],[3323,490,0,1224,0,"CLIP"],[3469,981,0,908,0,"LATENT"],[3480,1224,0,907,1,"CONDITIONING"],[3508,403,0,1297,0,"MODEL"],[3509,1297,0,13,0,"*"],[3515,1285,0,7,0,"IMAGE"],[3530,1308,0,980,5,"GUIDES"],[3531,7,1,1308,0,"LATENT"],[3532,1309,0,7,1,"IMAGE"],[3533,1317,0,980,6,"OPTIONS"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7398859252302459,"offset":[-10583.206320408986,234.77974623579652]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/hidream txt2img.json
================================================
{"last_node_id":1321,"last_link_id":3548,"nodes":[{"id":490,"type":"Reroute","pos":[13130,-70],"size":[75,26],"flags":{},"order":3,"mode":0,"inputs":[{"name":"","type":"*","link":3534}],"outputs":[{"name":"","type":"CLIP","links":[2881,3323],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1317,"type":"ClownModelLoader","pos":[12770,-90],"size":[315,266],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3539],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[3534],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[3535],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp16.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":14,"type":"Reroute","pos":[13130,-30],"size":[75,26],"flags":{},"order":4,"mode":0,"inputs":[{"name":"","type":"*","link":3535}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":970,"type":"CLIPTextEncode","pos":[13253.0546875,116.28263854980469],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":5,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13253.044921875,283.4559020996094],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":7,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[3540],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1224,"type":"CLIPTextEncode","pos":[13250,-90],"size":[269.0397644042969,155.65545654296875],"flags":{"collapsed":false},"order":6,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3323}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3480],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a cold war era photograph from 1983 of a group of four friends holding up their hands inside an antique living room in a victorian era mansion"]},{"id":13,"type":"Reroute","pos":[13130,-110],"size":[75,26],"flags":{},"order":2,"mode":0,"inputs":[{"name":"","type":"*","link":3539}],"outputs":[{"name":"","type":"MODEL","links":[3548],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[13936.2919921875,12.050485610961914],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":10,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":908,"type":"VAEDecode","pos":[13934.587890625,-92.61396026611328],"size":[210,46],"flags":{},"order":9,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3537},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[13550.5615234375,-92.92960357666016],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3548},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3480},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3540},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3537],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_3m","bong_tangent",20,-1,1,4,0,"fixed","standard",true]},{"id":1321,"type":"Note","pos":[12769.740234375,239.9431915283203],"size":[345.97113037109375,161.35496520996094],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["There are many samplers to try, but res_2m, res_3m, res_2s, and res_3s are very reliable. If you want to push quality a bit higher in exchange for time, you could even try res_5s.\n\nres_2m and res_3m begin with higher order steps (one res_2s step, and two res_3s steps, respectively) to initialize the sampling process. Ultimately, the result is faster convergence in terms of wall time, as fewer steps end up being necessary."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[3323,490,0,1224,0,"CLIP"],[3480,1224,0,907,1,"CONDITIONING"],[3534,1317,1,490,0,"*"],[3535,1317,2,14,0,"*"],[3537,907,0,908,0,"LATENT"],[3539,1317,0,13,0,"*"],[3540,7,3,907,3,"LATENT"],[3548,13,0,907,0,"MODEL"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.9194342495775452,"offset":[-11336.810477400342,443.2870544682993]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/hidream unsampling data WF.json
================================================
{"last_node_id":637,"last_link_id":2029,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":11,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,5.5,"resample",true]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,false,0.5,1,"constant",0,-1,false]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[210,46],"flags":{"collapsed":false},"order":10,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",60,-1,1,1,0,"fixed","unsample",true]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":637,"type":"CLIPTextEncode","pos":[963.5917358398438,453.83306884765625],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,true,0.5,1,"beta57",0,10,false]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":2.1762913579017154,"offset":[427.0670817937978,488.9238245904811]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/hidream unsampling data.json
================================================
{"last_node_id":637,"last_link_id":2029,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":11,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,5.5,"resample",true]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,false,0.5,1,"constant",0,-1,false]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[210,46],"flags":{"collapsed":false},"order":10,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",60,-1,1,1,0,"fixed","unsample",true]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":637,"type":"CLIPTextEncode","pos":[963.5917358398438,453.83306884765625],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,true,0.5,1,"beta57",0,10,false]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":2.1762913579017154,"offset":[427.0670817937978,488.9238245904811]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/hidream unsampling pseudoimplicit.json
================================================
{"last_node_id":637,"last_link_id":2029,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":11,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[210,46],"flags":{"collapsed":false},"order":10,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":637,"type":"CLIPTextEncode","pos":[963.5917358398438,453.83306884765625],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,1,0,"fixed","unsample",true]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,4,"resample",true]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["pseudoimplicit",false,false,0.5,1,"beta57",0,30,false]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["pseudoimplicit",false,false,0.5,1,"beta57",0,4,false]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7449402268886909,"offset":[544.7968662691544,737.2296697550046]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/hidream unsampling.json
================================================
{"last_node_id":637,"last_link_id":2029,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":11,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,5.5,"resample",true]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[210,46],"flags":{"collapsed":false},"order":10,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",60,-1,1,1,0,"fixed","unsample",true]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":637,"type":"CLIPTextEncode","pos":[963.5917358398438,453.83306884765625],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.5,1,"constant",0,-1,false]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,1,1,"beta57",0,15,false]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7449402268886909,"offset":[802.8733998149229,690.5491177830577]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/intro to clownsampling.json
================================================
{"last_node_id":876,"last_link_id":2046,"nodes":[{"id":453,"type":"VAEDecode","pos":[-303.0476379394531,3073.681640625],"size":[210,46],"flags":{"collapsed":false},"order":228,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1923},{"name":"vae","localized_name":"vae","type":"VAE","link":1940,"slot_index":1}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","shape":3,"links":[1365],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":606,"type":"LoraLoader","pos":[-2194.87353515625,3180.94482421875],"size":[359.7619323730469,126],"flags":{},"order":177,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1890},{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[1904],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[1937,1938],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["csbw_cascade_dark_ema.safetensors",1,1]},{"id":454,"type":"SaveImage","pos":[-303.3555603027344,3184.454345703125],"size":[753.4503784179688,734.7869262695312],"flags":{},"order":229,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1365}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":625,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[-648.0692138671875,3944.1982421875],"size":[310.79998779296875,82],"flags":{},"order":0,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1948],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[1536,1536]},{"id":626,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[-1372.7569580078125,3947.591064453125],"size":[310.79998779296875,82],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1951],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[24,24]},{"id":624,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[-1013.2625732421875,3947.5908203125],"size":[310.79998779296875,82],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1947],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[36,36]},{"id":609,"type":"UNETLoader","pos":[-1020.5138549804688,3045.097412109375],"size":[356.544677734375,82],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[1926],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader"},"widgets_values":["stage_b_lite_CSBW_v1.1.safetensors","default"]},{"id":621,"type":"VAELoader","pos":[-637.3134765625,3068.5341796875],"size":[294.6280212402344,58],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[1940],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["stage_a_ft_hq.safetensors"]},{"id":620,"type":"CLIPLoader","pos":[-2564.87353515625,3272.8349609375],"size":[344.635498046875,98],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[1939],"slot_index":0}],"properties":{"Node name for S&R":"CLIPLoader"},"widgets_values":["cascade_text_encoder.safetensors","stable_cascade","default"]},{"id":627,"type":"Note","pos":[-1381.849365234375,4086.07421875],"size":[331.63720703125,415.29815673828125],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage C: the original Stable Cascade version. \n\nStable Cascade latents are actually quite small: typically, a 1024x1024 image will be generated from a stage C latent that is only 24x24 (for comparison, with SDXL or SD1.5, the dimensions are 128x128). \n\n\"Compression\" is just a shorthand method of determining these dimensions, such as 24x24 (1024 / 42 = 24.38, which means a \"compression\" of 42).\n\nThis poses a problem though: Cascade was only trained on a handful of resolutions. The difference between 24x24 and 25x25 is a significant drop in quality and coherence. Therefore, it is best to just set these dimensions directly.\n\nThe best trained resolutions are:\n\n24x24 > 32x32\n30x16 > 40x24 \n\n48x24 also works, but seems to result in more doubling problems than the others.\n\n\n"],"color":"#432","bgcolor":"#653"},{"id":628,"type":"Note","pos":[-1012.45947265625,4084.7783203125],"size":[331.63720703125,415.29815673828125],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage UP: a patched version of Stable Cascade stage C (\"UltraPixel\"). \n\nThe key with these dimensions is to keep the aspect ratio the same as the stage C latent. Typically, best results are with a 1.5x upscale. 2.0x works, but will result in somewhat more issues with doubling, and can be a lot slower. However, the detail level will also be very high.\n\nSome viable resolutions are listed below. Asterisks signify ones that have been verified to work particularly well.\n\n32x32\n36x36 **\n40x40\n42x42\n48x48 *\n\n40x24\n50x30\n60x36 **\n70x42\n80x48 *\n\n72x36 \n80x40 *\n96x48 (very slow!)\n\n\n\n"],"color":"#432","bgcolor":"#653"},{"id":632,"type":"CheckpointLoaderSimple","pos":[-1073.474609375,2726.673583984375],"size":[452.7829895019531,102.89583587646484],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":null},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":null},{"name":"VAE","localized_name":"VAE","type":"VAE","links":null}],"properties":{"Node name for S&R":"CheckpointLoaderSimple"},"widgets_values":["cascade_B-lite_refined_CSBW_v1.1.safetensors"]},{"id":633,"type":"Note","pos":[-1075.468994140625,2892.701416015625],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is the stage B lite CSBW finetune (model only).\n\nhttps://huggingface.co/ClownsharkBatwing/Cascade_Stage_B_CSBW_Refined/blob/main/stage_b_lite_CSBW_v1.1.safetensors"],"color":"#432","bgcolor":"#653"},{"id":634,"type":"Note","pos":[-575.989501953125,2895.603271484375],"size":[547.0546875,91.47331237792969],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is a finetune of stage A. You will get a sharper image, but in images with large white areas, small circular grey halos are sometimes visible.\n\nhttps://huggingface.co/madebyollin/stage-a-ft-hq/blob/main/stage_a_ft_hq.safetensors"],"color":"#432","bgcolor":"#653"},{"id":630,"type":"Note","pos":[-3309.3076171875,3048.958984375],"size":[717.709228515625,165.61032104492188],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I recommend the BF16 version of stage C. There is no visible difference vs. the full precision weights, and it halves the disk space requirements.\n\nhttps://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_bf16.safetensors\n\nIMPORTANT: The original UltraPixel \"safetensors\" is not a safetensors at all - it is a PICKLE, where they lazily (at best) changed the file extension to \".safetensors\"!\n\nI converted it to a real safetensors file, and it's available below:\n\nhttps://huggingface.co/ClownsharkBatwing/ultrapixel_convert/blob/main/ultrapixel_t2i.safetensors"],"color":"#432","bgcolor":"#653"},{"id":584,"type":"UltraCascade_Loader","pos":[-2564.4580078125,3133.043212890625],"size":[345.5117492675781,82.95540618896484],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","shape":3,"links":[1890],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_Loader"},"widgets_values":["stage_c_bf16.safetensors","ultrapixel_t2i.safetensors"]},{"id":635,"type":"Note","pos":[-3307.105712890625,3272.173095703125],"size":[715.61083984375,89.37511444091797],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Any clip G will do. The Cascade version is available at:\n\nhttps://huggingface.co/stabilityai/stable-cascade/blob/main/text_encoder/model.bf16.safetensors\n\n"],"color":"#432","bgcolor":"#653"},{"id":636,"type":"Note","pos":[-3306.760009765625,3418.6708984375],"size":[715.61083984375,113.57872772216797],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The LORA was trained with OneTrainer (https://github.com/Nerogar/OneTrainer) on some of my own SDXL generations. It has deep colors and is strong with wacky paint, illustration, and vector art styles. \n\nCascade learns extremely quickly and is very adept with artistic styles (it knows many artist names).\n\nhttps://huggingface.co/ClownsharkBatwing/CSBW_Style/blob/main/csbw_cascade_dark_ema.safetensors\n"],"color":"#432","bgcolor":"#653"},{"id":629,"type":"Note","pos":[-647.965087890625,4084.8818359375],"size":[331.63720703125,415.29815673828125],"flags":{},"order":15,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage B: the Stable Cascade superresolution model.\n\nAs with stage UP, the key with these dimensions is to keep the aspect ratio the same as the prior latents. Theoretically, any resolution may be used, though some odd distortions can occur when the ideal upscale ratio is not used. It's not entirely clear what those ratios are, so some experimentation may be necessary. \n\nSome resolutions that work particularly well are:\n\n1536x1536 *\n2048x2048 *\n\n1600x960\n2560x1536 **\n2880x1792 *\n3200x1920\n\nIf you use stage B lite, you can hit 4k resolutions without even using more than 12GB of VRAM.\n\nIt's highly recommended to use the CSBW finetune of stage B, as it fixes many of the severe artifact problems the original release had.\n\nNote: CFG is not needed for this stage!"],"color":"#432","bgcolor":"#653"},{"id":637,"type":"Note","pos":[-1838.5732421875,2922.63671875],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":16,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Perturbed attention guidance (PAG) makes an enormous difference with Stable Cascade stages C and UP. Like CFG, it will double the runtime."],"color":"#432","bgcolor":"#653"},{"id":598,"type":"CLIPTextEncode","pos":[-1811.0350341796875,3205.474853515625],"size":[351.592529296875,173.00360107421875],"flags":{},"order":201,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1937}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1907,1911,1914],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["impasto oil painting by Yayoi Kusama and Lisa Frank, thick paint textures, tunning contrasts at night with stylish roughly drawn thick black lines, a nuclear explosion destroying a city, its towering wide glowing nuclear mushroom cloud enveloping the entire skyline, the nuclear fireball lighting up the dark sky"]},{"id":601,"type":"UltraCascade_PerturbedAttentionGuidance","pos":[-1808.5911865234375,3084.306884765625],"size":[344.3999938964844,58],"flags":{},"order":200,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1904}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[1909,1910],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_PerturbedAttentionGuidance"},"widgets_values":[3]},{"id":599,"type":"CLIPTextEncode","pos":[-1814.4205322265625,3435.57763671875],"size":[356.2470703125,110.6326904296875],"flags":{},"order":202,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1938}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1908,1912,1915],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, bad quality, low detail, blurry, unsharp"]},{"id":631,"type":"Note","pos":[-1557.671142578125,2725.4599609375],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":17,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is a checkpoint that, for convenience, includes the stage B lite CSBW finetune, clip G, and stage A (the FT_HQ finetune).\n\nhttps://huggingface.co/ClownsharkBatwing/CSBW_Style/blob/main/cascade_B-lite_refined_CSBW_v1.1.safetensors"],"color":"#432","bgcolor":"#653"},{"id":649,"type":"Note","pos":[2011.257080078125,3860],"size":[282.2704772949219,88],"flags":{},"order":18,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Since \"steps_to_run\" is set to -1,\nthis will run all remaining steps."],"color":"#432","bgcolor":"#653"},{"id":648,"type":"Note","pos":[1661.257080078125,3860],"size":[283.8087463378906,88],"flags":{},"order":19,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Runs the next 10 steps (out of 30)."],"color":"#432","bgcolor":"#653"},{"id":657,"type":"ClownsharKSampler_Beta","pos":[1710,3140],"size":[296.93646240234375,418],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,0,"fixed","standard",true]},{"id":680,"type":"ClownSampler_Beta","pos":[1050,3140],"size":[283.6876220703125,174],"flags":{},"order":21,"mode":0,"inputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"sampler","localized_name":"sampler","type":"SAMPLER","links":[1973]}],"properties":{"Node name for S&R":"ClownSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s",-1,"fixed",true]},{"id":685,"type":"Note","pos":[3440,5450],"size":[280.6243896484375,109.73818969726562],"flags":{},"order":22,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["NOTE: \"epsilon_scales\" is currently unused, but exists as a placeholder. \n\n\"frame_weights\" is for video models such as Hunyuan. This is for use with guides."],"color":"#432","bgcolor":"#653"},{"id":713,"type":"Note","pos":[4574.66552734375,4613.29833984375],"size":[280.0735168457031,88],"flags":{},"order":23,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["INPAINTING TIP: Try using the settings to the right with a feathered mask."],"color":"#432","bgcolor":"#653"},{"id":670,"type":"SigmasSchedulePreview","pos":[3850,5410],"size":[315,270],"flags":{},"order":24,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null}],"outputs":[],"properties":{"Node name for S&R":"SigmasSchedulePreview"},"widgets_values":["hard",0.25,1,1,1,"beta57",30,2.1,0]},{"id":654,"type":"BetaSamplingScheduler","pos":[1420,2780],"size":[210,106],"flags":{},"order":25,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"SIGMAS","localized_name":"SIGMAS","type":"SIGMAS","links":null}],"properties":{"Node name for S&R":"BetaSamplingScheduler"},"widgets_values":[20,0.5,0.7]},{"id":653,"type":"Note","pos":[1390,2940],"size":[252.12789916992188,117.73304748535156],"flags":{},"order":26,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["\"beta57\" is equivalent to the BetaSamplingScheduler node above. I have found the results to be generally superior to the default \"beta\" (where the values are both set to 0.60).\n\n\n"],"color":"#432","bgcolor":"#653"},{"id":643,"type":"Note","pos":[751.2572021484375,4100],"size":[507.688720703125,165.58355712890625],"flags":{},"order":27,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["\"steps_to_run\": When set to -1, it will run all steps per usual. \n\nIf set to a positive value, it will run that number of steps, and then stop and pass the latent off to the next sampler node.\n\nIf the next sampler node's \"sampler_mode\" is set to \"resample\", it will then continue where the first one left off. \n\nThis even works with multistep samplers, as it carries its \"momentum\" from node to the next. This is not the case for \"KSampler (Advanced)\", or any other sampler nodes that I'm aware of."],"color":"#432","bgcolor":"#653"},{"id":724,"type":"CLIPTextEncode","pos":[990,4960],"size":[210,88],"flags":{},"order":28,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":null}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1982,1983],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":722,"type":"ClownGuide_Beta","pos":[1250,5430],"size":[315,290],"flags":{},"order":29,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1984],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,false,0.5,1,"constant",0,1000,false]},{"id":720,"type":"ClownsharKSampler_Beta","pos":[1260,4940],"size":[315,418],"flags":{},"order":178,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1982},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1983},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1984},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0,"multistep/res_2m","beta57",30,-1,1,1,0,"fixed","unsample",true]},{"id":721,"type":"ClownsharKSampler_Beta","pos":[1620,4940],"size":[315,418],"flags":{},"order":183,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1985},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0,"multistep/res_2m","beta57",30,-1,1,5.5,-1,"fixed","resample",true]},{"id":727,"type":"Note","pos":[890,5170],"size":[333.3896179199219,108.9758071899414],"flags":{},"order":30,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["UNSAMPLER SETTINGS: \n\nEta should usually be 0.0. \nCFG should be 1.0, and used with an empty prompt.\n\nDenoise < 1.0 can help with adherence to the unsampled image."],"color":"#432","bgcolor":"#653"},{"id":731,"type":"Note","pos":[1980,5160],"size":[364.70263671875,103.89823150634766],"flags":{},"order":31,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Ensure resampler denoise matches the unsampler denoise.\n\nLow eta values can be used here (try 0.1 to 0.25). Sometimes they can actually improve adherence to the unsampled image."],"color":"#432","bgcolor":"#653"},{"id":733,"type":"Note","pos":[880,5530],"size":[339.3138122558594,133.51815795898438],"flags":{},"order":32,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Typical guide settings for unsampling/resampling with a rectified flow model (AuraFlow, SD3.5, Flux) are to the right.\n\nThis will generally NOT work well with UNSAMPLING SD1.5, SDXL, Cascade, etc.! (These guide nodes however work great as regular guides with these models!)"],"color":"#432","bgcolor":"#653"},{"id":659,"type":"Note","pos":[1427.1387939453125,3646.506591796875],"size":[352.2813415527344,88],"flags":{},"order":33,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["All of the configurations above will have the same output (and runtime) as the chained samplers below."],"color":"#432","bgcolor":"#653"},{"id":610,"type":"ClownsharKSampler_Beta","pos":[-1373.449462890625,3188.063232421875],"size":[311.41375732421875,693.9824829101562],"flags":{},"order":213,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1909},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1907},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1908},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1951},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1949],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,1,"fixed","standard",true]},{"id":612,"type":"ClownsharKSampler_Beta","pos":[-1014.779296875,3187.209228515625],"size":[314.421142578125,693.9824829101562],"flags":{},"order":221,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1910},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1911},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1912},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1949},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1947},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1950],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,-1,"fixed","standard",true]},{"id":613,"type":"ClownsharKSampler_Beta","pos":[-648.0813598632812,3185.39013671875],"size":[309.2452087402344,691.814208984375],"flags":{},"order":226,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1926},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1914},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1915},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1950},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1948},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1923],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,1,-1,"fixed","standard",true]},{"id":716,"type":"Note","pos":[4574.66552734375,4963.29833984375],"size":[270.65277099609375,108.61186218261719],"flags":{},"order":34,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["INPAINTING TIP: Try using the settings to the right with a feathered mask, and \"end_step\" set to the number of sampling steps (or less). This will allow the entire image to change slightly to help heal any seams that may appear."],"color":"#432","bgcolor":"#653"},{"id":714,"type":"ClownGuide_Beta","pos":[4874.66552734375,4503.29833984375],"size":[257.2991638183594,290],"flags":{},"order":35,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,false,1,1,"constant",0,1000,false]},{"id":715,"type":"ClownGuide_Beta","pos":[4874.66552734375,4863.29833984375],"size":[254.67617797851562,290],"flags":{},"order":36,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,true,1,1,"beta57",0,40,false]},{"id":709,"type":"Note","pos":[5570,4480],"size":[280.0735168457031,88],"flags":{},"order":37,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Note: The \"guide_masked\" latent image will control the region that is \"masked out\"! And vice versa with \"guide_unmasked\"."],"color":"#432","bgcolor":"#653"},{"id":688,"type":"ClownGuides_Beta","pos":[5542.884765625,3927.678955078125],"size":[315,450],"flags":{},"order":38,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":null},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1977],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["epsilon",false,true,0.75,0.75,1,1,"beta57","constant",0,0,15,15,false]},{"id":707,"type":"ClownGuide_Beta","pos":[5206.15283203125,3929.6015625],"size":[315,290],"flags":{},"order":39,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["pseudoimplicit",false,false,0.15,1,"beta57",5,15,false]},{"id":706,"type":"Note","pos":[5228.6552734375,4270.94677734375],"size":[280.0735168457031,88],"flags":{},"order":40,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: Try a delayed start (start_step > 0), like shown above with pseudoimplicit, for wacky results!"],"color":"#432","bgcolor":"#653"},{"id":711,"type":"Note","pos":[5570,4610],"size":[280.0735168457031,88],"flags":{},"order":41,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: I recommend drawing your masks on random load image nodes, for convenience."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[5546.97216796875,3772.719970703125],"size":[308.80828857421875,88],"flags":{},"order":42,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["INPAINTING TIP: Use ClownRegionalConditioning ClownGuides together with the same mask!"],"color":"#432","bgcolor":"#653"},{"id":744,"type":"Note","pos":[7538.74755859375,3817.72216796875],"size":[337.9170227050781,389.18304443359375],"flags":{},"order":43,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["It can be confusing at first, trying to understand which area is affected by which conditioning or mask. I suggest starting with prompts like \"blue ice\" and \"red fire\" with region_bleed = 0.0 to clear things up.\n\nTO THE LEFT:\n\nThe two nodes to the left will automatically create an unmasked area, based on what areas are not masked by mask, mask_A, or mask_B.\n\nAs an example:\n\nPositive_A will affect the area masked by \"mask_A\".\n\nPositive_B will affect the area masked by \"mask_B\".\n\nPositive_unmasked will affect the area that is not masked by \"mask_A\" or \"mask_B\".\n\nTO THE RIGHT:\n\nThese two nodes give you manual control over the area for each prompt. This is especially useful for temporal attention with video modes like WAN. The risk is if you fail to ensure every part of the image (or frame) is masked by one of the masks, you'll end up with an unconditioned area that will look like pure noise."],"color":"#432","bgcolor":"#653"},{"id":756,"type":"TemporalCrossAttnMask","pos":[7017.90087890625,4790.6005859375],"size":[210,82],"flags":{},"order":44,"mode":0,"inputs":[],"outputs":[{"name":"temporal_mask","localized_name":"temporal_mask","type":"MASK","links":[1988]}],"properties":{"Node name for S&R":"TemporalCrossAttnMask"},"widgets_values":[1,65]},{"id":757,"type":"TemporalCrossAttnMask","pos":[7017.12060546875,4954.5556640625],"size":[210,82],"flags":{},"order":45,"mode":0,"inputs":[],"outputs":[{"name":"temporal_mask","localized_name":"temporal_mask","type":"MASK","links":[1989],"slot_index":0}],"properties":{"Node name for S&R":"TemporalCrossAttnMask"},"widgets_values":[65,133]},{"id":758,"type":"Note","pos":[7644.2734375,4659.04248046875],"size":[275.73828125,88],"flags":{},"order":46,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Sometimes it is beneficial to allow self-attention masks to overlap slightly. This is similar to the \"edge_width\" parameter above, except it overlaps frames, not spatial components (areas of the image)."],"color":"#432","bgcolor":"#653"},{"id":751,"type":"TemporalSplitAttnMask","pos":[7668.654296875,4808.83642578125],"size":[210,130],"flags":{},"order":47,"mode":0,"inputs":[],"outputs":[{"name":"temporal_mask","localized_name":"temporal_mask","type":"MASK","links":[1986],"slot_index":0}],"properties":{"Node name for S&R":"TemporalSplitAttnMask"},"widgets_values":[1,69,1,65]},{"id":753,"type":"TemporalSplitAttnMask","pos":[7668.654296875,4998.83642578125],"size":[210,130],"flags":{},"order":48,"mode":0,"inputs":[],"outputs":[{"name":"temporal_mask","localized_name":"temporal_mask","type":"MASK","links":[1987],"slot_index":0}],"properties":{"Node name for S&R":"TemporalSplitAttnMask"},"widgets_values":[61,133,65,133]},{"id":749,"type":"Note","pos":[6907.34326171875,4623.6328125],"size":[280.0735168457031,88],"flags":{},"order":49,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The advanced version of the WAN patcher can set a sliding self-attention window. The \"size\" is the number of latent frames (which is 1/4th the number of frames in the final output)."],"color":"#432","bgcolor":"#653"},{"id":689,"type":"Note","pos":[4562.91796875,3811.044189453125],"size":[494.1324462890625,535.6380004882812],"flags":{},"order":50,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Guides are a way of controlling the image generation process without denoising an image directly, but by steering the denoising process itself. This can mimic many of the benefits of unsampling, without the need to spend extra time unsampling the image.\n\nThere are two main guide modes:\n\n\"Epsilon\" can be used in conjunction with unsampling/resampling workflows to dramatically improve results with rectified flow models (AuraFlow, SD3.5, Flux). It can also be used directly. It works by modifying the noise prediction made by the model to align with the guide image.\n\n\"Pseudoimplicit\" works by lying to the model about the state of the denoising process, so that it generates a noise prediction that strongly aligns with the guide image. \"Fully_pseudoimplicit\" is only supported with \"fully_implicit\" and \"diag_implicit\" samplers (all others will default back to pseudoimplicit).\n\nChannelwise and projection modes can have a dramatic effect. I especially recommend trying epsilon with these modes, though they are quite interesting with pseudoimplicit as well. \"projection_mode\" can result in some issues with image details if used for the entire sampling process.\n\nCUTOFF:\n\nFlux has extremely strong self-attention, and has issues with getting \"stuck\" if the guide strength is too high (or used for too many steps), which results in an output that looks nearly identical to the guide. \"cutoff\" does a crude check for how similar the image is to the guide - if it exceeds that value, it will turn off the guide for that step. Try setting to 0.5 or 0.6 when using Flux.\n\nWEIGHT SCHEDULERS:\n\nThese control the weight at each step. For example, with the settings shown:\n\n * the \"unmasked\" region will have a weight of 0.75 for the first 15 steps, then 0.0 for every step after that\n\n * the \"masked\" region will start with a weight of 0.75 for the first step, gradually declining until reaching 0.0 after 15 steps (and remaining at 0.0)\n"],"color":"#432","bgcolor":"#653"},{"id":726,"type":"Note","pos":[5630,5410],"size":[257.97479248046875,159.16941833496094],"flags":{},"order":51,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["VAEEncodeAdvanced is a quality of life node for convenience when using multiple guides.\n\nNote: the mask input is for a black and white image. It is there for convenience with converting any masks you may have saved as black and white images into masks you can use in a workflow."],"color":"#432","bgcolor":"#653"},{"id":691,"type":"ClownsharKSampler_Beta","pos":[5894.66552734375,3923.29833984375],"size":[274.2724609375,418],"flags":{},"order":179,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1977},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_2m","beta57",30,-1,1,5.5,0,"fixed","standard",true]},{"id":695,"type":"ModelSamplingAdvancedResolution","pos":[8110,3210],"size":[260.3999938964844,126],"flags":{},"order":215,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1980},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":701,"type":"Note","pos":[7080,3060],"size":[327.4920959472656,88],"flags":{},"order":52,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Loader nodes are provided for convenience with Flux and SD3.5. The Flux loader can also load the Redux (and ClipVision) models for you."],"color":"#432","bgcolor":"#653"},{"id":694,"type":"FluxGuidanceDisable","pos":[7790,3210],"size":[210,82],"flags":{},"order":208,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1979}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1980],"slot_index":0}],"properties":{"Node name for S&R":"FluxGuidanceDisable"},"widgets_values":[true,true]},{"id":699,"type":"Note","pos":[7760,3030],"size":[253.01846313476562,112.91952514648438],"flags":{},"order":53,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This disables \"Flux Guidance\" (which is actually NOT disabled by setting to 1.0 or 0.0). It can be helpful in many cases where you wish to banish the \"Flux look\" to the bottom of a creepy old well in Transylvania."],"color":"#432","bgcolor":"#653"},{"id":660,"type":"ClownsharKSampler_Beta","pos":[3907.121826171875,3512.491943359375],"size":[293.78173828125,618],"flags":{},"order":205,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1962},{"name":"options 2","type":"OPTIONS","link":1963},{"name":"options 3","type":"OPTIONS","link":1991},{"name":"options 4","type":"OPTIONS","link":1968},{"name":"options 5","type":"OPTIONS","link":1971},{"name":"options 6","type":"OPTIONS","link":1972},{"name":"options 7","type":"OPTIONS","link":1974},{"name":"options 8","type":"OPTIONS","link":1990},{"name":"options 9","type":"OPTIONS","link":2003},{"name":"options 10","type":"OPTIONS","link":2007},{"name":"options 11","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,-1,"fixed","standard",true]},{"id":647,"type":"Note","pos":[1321.257080078125,3860],"size":[288.0400390625,88],"flags":{},"order":54,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Runs the first 7 steps (out of 30)."],"color":"#432","bgcolor":"#653"},{"id":640,"type":"ClownsharKSampler_Beta","pos":[1321.257080078125,4010],"size":[296.93646240234375,418],"flags":{},"order":55,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1952],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,7,1,5.5,0,"fixed","standard",true]},{"id":641,"type":"ClownsharKSampler_Beta","pos":[1671.257080078125,4010],"size":[288.4732666015625,418],"flags":{},"order":182,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1952},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1953],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,10,1,5.5,-1,"fixed","resample",true]},{"id":642,"type":"ClownsharKSampler_Beta","pos":[2021.257080078125,4010],"size":[291.5506286621094,422.6160888671875],"flags":{},"order":203,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1953},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,-1,"fixed","resample",true]},{"id":729,"type":"Note","pos":[1666.1065673828125,4786.38134765625],"size":[210,88],"flags":{},"order":56,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":[" RESAMPLER NODE"],"color":"#432","bgcolor":"#653"},{"id":650,"type":"Note","pos":[1771.257080078125,4480],"size":[453.94183349609375,144.25192260742188],"flags":{},"order":57,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["IMPORTANT: sampler_mode is set to \"resample\"!\n\nALSO: seed is set to -1!\n\nThis means \"continue where the last sampler left off\", as in, use the next available unused seed.\n\nIf you set it to another value, the noise sampler that is used at every step might reuse a seed, which can cause the image to burn.\n\n"],"color":"#432","bgcolor":"#653"},{"id":738,"type":"ReHiDreamPatcher","pos":[6490,4280],"size":[210,82],"flags":{},"order":58,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float32",true]},{"id":739,"type":"ReSD35Patcher","pos":[6490,4420],"size":[210,82],"flags":{},"order":59,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ReSD35Patcher"},"widgets_values":["float32",true]},{"id":740,"type":"ReAuraPatcher","pos":[6490,4560],"size":[210,82],"flags":{},"order":60,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null,"slot_index":0}],"properties":{"Node name for S&R":"ReAuraPatcher"},"widgets_values":[true,true]},{"id":741,"type":"ReWanPatcher","pos":[6490,4680],"size":[210,58],"flags":{},"order":61,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ReWanPatcher"},"widgets_values":[true]},{"id":658,"type":"ClownsharKSampler_Beta","pos":[2070,3140],"size":[296.93646240234375,418],"flags":{},"order":62,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,10000,1,5.5,0,"fixed","standard",true]},{"id":734,"type":"Note","pos":[1830,2950],"size":[433.063232421875,101.85264587402344],"flags":{},"order":63,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["NOTE: steps_to_run = -1 means to run all steps (the usual default approach for any sampler).\n\nOn the right, steps_to_run > steps, so it will run all the way till the end, just like on the left. This is the approach traditionally used in KSampler (Advanced)."],"color":"#432","bgcolor":"#653"},{"id":765,"type":"Note","pos":[-3299.93603515625,2699.88427734375],"size":[389.86285400390625,98.29244232177734],"flags":{},"order":64,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["THESE NODES ARE NOT REQUIRED TO USE RES4LYF!!!\n\nThese descriptions are included only out of a desire to consolidate all CSBW node documentation into one location."],"color":"#432","bgcolor":"#653"},{"id":728,"type":"Note","pos":[1311.5924072265625,4784.7666015625],"size":[213.4912109375,88],"flags":{},"order":65,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":[" UNSAMPLER NODE"],"color":"#432","bgcolor":"#653"},{"id":723,"type":"ClownGuide_Beta","pos":[1620,5430],"size":[315,290],"flags":{},"order":66,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1985],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,true,0.75,1,"beta57",0,10,false]},{"id":766,"type":"Note","pos":[1980.62939453125,5502.46337890625],"size":[366.45068359375,97.77838134765625],"flags":{},"order":67,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: \"projection\" and \"channelwise\" modes can increase the intensity of the effect with epsilon and data guide modes. Sometimes, this effect is very desirable. It's worth experimenting with."],"color":"#432","bgcolor":"#653"},{"id":732,"type":"Note","pos":[1981.25732421875,5003.00537109375],"size":[361.62445068359375,90.62290954589844],"flags":{},"order":68,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: multistep samplers usually adhere to unsampled images more effectively than others."],"color":"#432","bgcolor":"#653"},{"id":768,"type":"ClownGuides_Beta","pos":[4877.3896484375,5235.57373046875],"size":[333.3587951660156,450],"flags":{},"order":69,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":null},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["lure",false,false,1,1,1,1,"linear_quadratic","constant",0,0,8,-1,false]},{"id":769,"type":"Note","pos":[4576.1689453125,5281.28271484375],"size":[266.2802734375,135.71385192871094],"flags":{},"order":70,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["INPAINTING TIP: Try using the settings to the right with your input image connected to both the guide_masked and guide_unmasked inputs. Adjust \"end_step_masked\" to change the strength of the inpainting effect (or weight_masked, or eight_scheduler_masked)."],"color":"#432","bgcolor":"#653"},{"id":725,"type":"VAEEncodeAdvanced","pos":[5930,5410],"size":[255.3756103515625,278],"flags":{},"order":71,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":null}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":null},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":null},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":735,"type":"ClownGuide_Beta","pos":[5147.18896484375,4861.30419921875],"size":[254.67617797851562,290],"flags":{},"order":72,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1992],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["flow",false,false,1,1,"constant",0,40,false]},{"id":770,"type":"ClownsharKSampler_Beta","pos":[5451.8759765625,4763.7705078125],"size":[315,438],"flags":{},"order":184,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1992},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1994},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_2m","beta57",30,-1,1,5.5,0,"randomize","standard",true]},{"id":772,"type":"SharkOptions_GuideCond_Beta","pos":[5230.13525390625,5239.04736328125],"size":[210,98],"flags":{},"order":73,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1994]}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[5.5]},{"id":773,"type":"Note","pos":[5229.7958984375,5385.55810546875],"size":[272.242919921875,112.58575439453125],"flags":{},"order":74,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["When using \"flow\" mode a second set of conditionings can be added that will be used to evolve the guide itself to sync up better with your image during sampling. Try describing the guide with some creative liberties to bend things in the desired stylistic direction."],"color":"#432","bgcolor":"#653"},{"id":774,"type":"Note","pos":[4576.4013671875,5501.66796875],"size":[266.2802734375,135.71385192871094],"flags":{},"order":75,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["TIP: ClownGuides allows you to use multiple input images, each with their own separate schedule and strength settings. There's a lot of creative possibilities here, especially when combined with regional conditioning sharing the same mask!"],"color":"#432","bgcolor":"#653"},{"id":746,"type":"ClownRegionalConditioning_AB","pos":[7918.16943359375,3816.63427734375],"size":[248.7556610107422,350],"flags":{},"order":76,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":null},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":null},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning_AB"},"widgets_values":[1,0,0,"constant",0,-1,"boolean",128,false]},{"id":747,"type":"ClownRegionalConditioning_ABC","pos":[7916.001953125,4221.97314453125],"size":[250.51895141601562,390],"flags":{},"order":77,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_C","localized_name":"conditioning_C","type":"CONDITIONING","shape":7,"link":null},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":null},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":null},{"name":"mask_C","localized_name":"mask_C","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning_ABC"},"widgets_values":[1,0,0,"constant",0,100,"boolean",128,false]},{"id":743,"type":"ClownRegionalConditioning3","pos":[7224.5439453125,4216.19189453125],"size":[287.20001220703125,370],"flags":{},"order":78,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":null},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":null},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning3"},"widgets_values":[1,0,0,"constant",0,100,"boolean",128,false]},{"id":754,"type":"ClownRegionalConditioning_AB","pos":[7261.39306640625,4816.611328125],"size":[248.7556610107422,350],"flags":{},"order":180,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":null},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":1988},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":1989},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning_AB"},"widgets_values":[1,0,0,"constant",0,-1,"boolean",128,false]},{"id":752,"type":"ClownRegionalConditioning_AB","pos":[7918.654296875,4798.83642578125],"size":[248.7556610107422,350],"flags":{},"order":181,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":null},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":1986},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":1987},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning_AB"},"widgets_values":[1,0,0,"constant",0,-1,"boolean",128,false]},{"id":777,"type":"ClownRegionalConditioning2","pos":[7226.02978515625,3817.949462890625],"size":[287.20001220703125,330],"flags":{},"order":79,"mode":0,"inputs":[{"name":"conditioning_masked","localized_name":"conditioning_masked","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning2"},"widgets_values":[1,0,0,"constant",0,-1,"boolean",128,false]},{"id":705,"type":"Note","pos":[6811.22021484375,3808.38671875],"size":[379.8222351074219,549.5839233398438],"flags":{},"order":80,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["\"weight\" affects how strongly the attention mask is applied, which controls how well the masked and unmasked regions are separated. \n\n\"region_bleed\" affects how much the regions can \"talk\" with each other (via self-attention). region_bleed=0.0 will ensure the strongest possible separation, but higher values can help prevent visible seams from forming along the edges of the masked areas.\n\n\nWEIGHT SCHEDULER:\n\nThis controls the weight (strength of separation of the regions) at each step. For example, with the settings shown, the weight will begin at 1.70, and gradually decline before reaching 0.0 after 10 steps (and remaining at 0.0).\n\n\"mask_type\" currently only has the \"gradient\" option, but others may be added later. \n\nYes, this does mean you can use masks with gradients (so you can feather and blur them if you wish)!\n\nMASK_TYPE:\n\nThere are options here that are a bit like causal attention in LLMs. For example, \"boolean_masked\" means the masked area can \"see\" the entire image (via self-attention), while the unmasked area cannot \"see\" the masked area. This is very useful with Flux if you wish to generate a character close to the camera but have an unblurred background. Place the character in the masked area, describe only the background in the unmasked area, select \"boolean_masked\" and set region_bleed = 0.0. \n\nEDGE_WIDTH:\n\nThis creates overlapping self-attention at the boundaries between masked and unmasked areas. This helps to conceal seams. Try values like 50 or 150 to start, and watch the preview.\n\n"],"color":"#432","bgcolor":"#653"},{"id":776,"type":"ClownRegionalConditionings","pos":[9220.0224609375,3819.96826171875],"size":[238.2400665283203,266],"flags":{},"order":222,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":2000},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditionings"},"widgets_values":[0.9,0.25,0,"constant",0,-1,"boolean",false]},{"id":782,"type":"Note","pos":[8261.9765625,4016.659423828125],"size":[272.1261291503906,131.35166931152344],"flags":{},"order":81,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["\"Spineless\" mode means that the region \"has no spine\" and is susceptible to influence by other regions (via self-attention). This is comparable to the \"boolean_masked\" etc. modes in the nodes to the left. For example, \"boolean_masked\" sets the masked area to \"spineless\"."],"color":"#432","bgcolor":"#653"},{"id":785,"type":"Note","pos":[8574.7734375,4022.382080078125],"size":[210,97.39286804199219],"flags":{},"order":82,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Unlimited regions may be set using these nodes. Up to 12 regions have been successfully tested in a single workflow."],"color":"#432","bgcolor":"#653"},{"id":786,"type":"Note","pos":[8979.33203125,4020.87939453125],"size":[212.89056396484375,176.87088012695312],"flags":{},"order":83,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The risk of manual control over all masks is that you miss an area (some part ends up not being covered by any mask) which means it then has no conditioning. \n\nThis is easily avoided by simply not hooking up a mask to the final node. It will use any remaining unmasked area as the final mask."],"color":"#432","bgcolor":"#653"},{"id":755,"type":"Note","pos":[7241.16015625,4666.3251953125],"size":[275.73828125,88],"flags":{},"order":84,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["If sliding self-attention is used, only cross-attention needs to be masked."],"color":"#432","bgcolor":"#653"},{"id":748,"type":"ReWanPatcherAdvanced","pos":[6702.76953125,4816.7041015625],"size":[279.3623352050781,214],"flags":{},"order":85,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ReWanPatcherAdvanced"},"widgets_values":["all","all",true,"standard",60]},{"id":750,"type":"Note","pos":[6444.103515625,4815.1484375],"size":[225.1619873046875,212.99703979492188],"flags":{},"order":86,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Sliding self-attention is useful for generating sequences where the conditioning changes from one frame to another, or for reducing VRAM requirements and reducing inference time when generating long sequences. At least 601 frames can be generated in one shot on a RTX 4090 with the above settings.\n\nThere are two modes: standard and circular. Circular allows the first frame to \"see\" the last frame, whereas standard does not."],"color":"#432","bgcolor":"#653"},{"id":742,"type":"Note","pos":[6420.9990234375,3806.16064453125],"size":[345.86224365234375,263.46356201171875],"flags":{},"order":87,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Regional conditioning requires for you to patch the model with a \"Re\" (for Regional) patcher (shown below) and to use the beta versions of either ClownSampler + SharkSampler, or ClownSharkSampler.\n\nFully compatible with Flux Redux, CFG, etc.\n\nHiDream notes:\nRegional negative conditioning is currently supported with HiDream and is useful for controlling styles (i.e., \"photo\" in a region that should be a painting, and vice versa). \n\nWith HiDream, weight and region_bleed may also be set to negative values. The effect in terms of strength is the same for -0.9 vs 0.9, but it will change whether it operates on initial or final blocks in the model. The effect can be quite different.\n"],"color":"#432","bgcolor":"#653"},{"id":737,"type":"ReFluxPatcher","pos":[6490,4140],"size":[210,82],"flags":{},"order":88,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":778,"type":"ClownRegionalConditioning","pos":[8500,3820],"size":[211.60000610351562,122],"flags":{},"order":185,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1995},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1996],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]},{"id":775,"type":"ClownRegionalConditioning","pos":[8260,3820],"size":[211.60000610351562,122],"flags":{},"order":89,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":null},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1995],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[true,128]},{"id":779,"type":"ClownRegionalConditioning","pos":[8740.115234375,3820.114990234375],"size":[211.60000610351562,122],"flags":{},"order":204,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1996},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1999],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]},{"id":783,"type":"ClownRegionalConditioning","pos":[8975.990234375,3820.1171875],"size":[211.60000610351562,122],"flags":{},"order":214,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1999},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[2000],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]},{"id":651,"type":"Note","pos":[1041.0577392578125,2843.751953125],"size":[304.6747741699219,235.28672790527344],"flags":{},"order":90,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["SDE NOISE:\n\n\"eta\" represents how much noise the sampler adds after each step. If set to 0.0, the samplers will be \"ODEs\". If set to > 0.0, they will be \"SDEs\" and/or \"ancestral\". \n\nThe math has been carefully designed for both variance preserving and exploding models: results are particularly good with SD1.5, SDXL, Stable Cascade, Auraflow, SD3.5 Medium, and Flux. \n\nIn most cases, using eta will result in gains in quality and coherence when using at least 20 sampling steps. Best results are with 30 or more. \n"],"color":"#432","bgcolor":"#653"},{"id":638,"type":"Note","pos":[690,2842.6943359375],"size":[321.5638427734375,270.1020202636719],"flags":{},"order":91,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["SPEED:\n\nAll multistep samplers, like Euler, use one model call per step. Therefore, they run at the same speed.\n\nAll others have the number of model calls per step listed at the end of the name in terms of \"stages\" (abbreviated \"s\").\n\nTherefore, \"res_2s\" has 2 stages, and uses 2 model calls per step. Each step will take 2x as long as a Euler step. \"ralston_3s\" will take 3x as long.\n\nImplicit samplers benefit enormously from an extra model call to initialize each step. Therefore, \"gauss-legendre_2s\" will actually use 3 model calls per step.\n\n"],"color":"#432","bgcolor":"#653"},{"id":681,"type":"Note","pos":[691.0578002929688,3171.1572265625],"size":[320.96875,168.8627166748047],"flags":{},"order":92,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["IMPORTANT: the seed here is set to -1! \n\nThis means \"use the next available seed\" (which will be the most recently used seed + 1).\n\nThis setting is irrelevant if eta = 0.0. It is only used for SDE sampling (where noise is added after each step, the amount of which is controlled by \"eta\").\n\n"],"color":"#432","bgcolor":"#653"},{"id":801,"type":"Note","pos":[631.0161743164062,2693.894775390625],"size":[602.4559326171875,93.21308135986328],"flags":{},"order":93,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["MANY COMPLEX WORKFLOWS BECOME MUCH SIMPLER WHEN USING RES4LYF NODES.\n\nA great emphasis has been placed, during the design of these nodes, on usability - ensuring they are not just more powerful than the default KSampler nodes, and don't just provide superior results, but are also ultimately easier to use, encouraging experimentation. "],"color":"#432","bgcolor":"#653"},{"id":655,"type":"Note","pos":[2114.199951171875,2702.9755859375],"size":[321.8917236328125,108.77723693847656],"flags":{},"order":94,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["BONGMath is an algorithm unique to RES4LYF that vastly improves sampling quality and coherence in most cases, with little to no effect on sampling speed.\n\nIt has no effect when eta is set to 0. \n"],"color":"#432","bgcolor":"#653"},{"id":667,"type":"Note","pos":[2630,2720],"size":[242.25900268554688,198.10833740234375],"flags":{},"order":96,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["OPTIONS NODES:\n\nThese can be connected directly to ClownSampler, ClownSharkSampler, and SharkSampler, to control a variety of advanced parameters.\n\nSHARKoptions may be connected to SHARKsampler or clownSHARKsampler.\n\nCLOWNoptions may be connected to CLOWNsampler or CLOWNsharksampler."],"color":"#432","bgcolor":"#653"},{"id":669,"type":"Note","pos":[2894.58544921875,3200.53369140625],"size":[478.7455139160156,399.50189208984375],"flags":{},"order":97,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownOptions_SDE controls the noise that is added after each step (or substep).\n\nNOISE TYPES:\n\nBrownian can give very good results, and is the \"correct\" noise type to use from a mathematical perspective. It does, however, result in a burned image with BONGMath when using many of the higher order samplers (the issue is with \"non-monotonic\" ones, which are typically those \n whose names end with \"5s\" or greater).\n\nNOISE MODES:\n\nThe \"noise mode\" controls how much noise is actually used after each step. The list is roughly sorted in order of strength (hard at the top being the strongest, hard_var at the bottom being the weakest - and the only one that uses \"mathematically correct\" scaling). \n\n\"Sinusoidal\" begins very weak, then becomes strong in the middle of the sampling process before losing strength again.\n\nThe \"soft\" noise types begin very strong, and drop off extremely rapidly.\n\nSUBSTEPS:\n\nAny sampler that is not euler or ddim uses information from multiple model calls (\"stages\") to predict the step. Multistep samplers reuse previous steps as \"stages\", whereas the rest make new model calls. \n\nThe settings for \"substep\" control these intermediate \"substeps\". If eta_substep is set to 0, BONGMath will have no effect."],"color":"#432","bgcolor":"#653"},{"id":661,"type":"ClownOptions_SDE_Beta","pos":[3414.8017578125,3262.863037109375],"size":[315,266],"flags":{},"order":98,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1963],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.5,-1,"randomize"]},{"id":677,"type":"Note","pos":[2900,3650],"size":[471.3785095214844,160.20542907714844],"flags":{},"order":99,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Overshoot > 0 causes the sampler to \"overshoot\" the step size, and then scale backwards to where it was really supposed to end. This is what all other SDE and ancestral sampler implementations do, though I have found it to adversely affect accuracy, especially with high eta values (> 0.7), resulting in softened, simplified images with little detail.\n\nHowever, careful use can soften images and deepen colors with pleasant results.\n\nTo mimic the behavior of the typical SDE and ancestral sampler implementations, set these settings to match those in ClownOptions_SDE."],"color":"#432","bgcolor":"#653"},{"id":683,"type":"Note","pos":[2900,5230],"size":[481.8527526855469,325.1487731933594],"flags":{},"order":100,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["RES4LYF heavily emphasizes giving you control over the sampling process!\n\nYou will often see these green \"sigmas\" inputs that aren't really for sigmas. These are used to control parameters on a step-by-step basis. \n\nIMPORTANT: The values used in the input here are multiplied by the value in ClownSampler/SharkSampler/ClownsharKSampler!\n\nFor example, the KarrasSchedule connected below creates a list of numbers:\n\n1.0, 1.0, 1.0, 1.0, 1.0\n\n(The rest is automatically filled in with 0.0.)\n\nThese are then multiplied by the value for \"eta\" (0.5) in the connected ClownsharKSampler node:\n\n0.5, 0.5, 0.5, 0.5, 0.5\n\nThe result is the sampler sets \"eta\" to 0.5 for the first 5 steps, and then 0.0 for every step after that. \n\nTry connecting something like the BetaScheduler while using \"beta57\" as your sampling scheduler!"],"color":"#432","bgcolor":"#653"},{"id":676,"type":"ClownOptions_StepSize_Beta","pos":[3420,3660],"size":[316.0789794921875,130],"flags":{},"order":101,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1968],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_StepSize_Beta"},"widgets_values":["hard","hard",0,0]},{"id":668,"type":"Note","pos":[2900,2720],"size":[476.7748718261719,425.3497314453125],"flags":{},"order":102,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["SharkOptions controls the initial noise generated before starting the sampling process. \n\nNOISE TYPES:\n\nPerlin is great with Stable Cascade, and with Flux will frequently result in a less blurred image (and a somewhat less saturated look, which can be helpful for realism).\n\nThe \"color\" noise modes have more low frequency information (structure), brown being greater than pink. White is neutral, while blue and especially violet have extra high frequency information (details).\n\nhires-pyramid-bicubic can generate exceptionally sharp images in many cases. The other pyramid noise types, and studentt, are often great for painterly styles.\n\nOTHER OPTIONS:\n\n\"noise_stdev\" increases the \"size\" of the noise. Values around 1.05 to 1.1 can have a wonderful effect with some painterly styles, with a boost in saturation.\n\n\"denoise_alt\" overrides the denoise setting. It has a very different effect that can often be easier to control when doing img2img generations with rectified flow models. (It scales the sigmas schedule, instead of slicing it).\n\n\"channelwise_cfg\" changes the cfg method used to one that can be very good with models that use a 16 channel VAE (SD3.5, Flux). Setting a negative value in the \"cfg\" box in any ClownsharKSampler or SharkSampler node is equivalent to using this toggle (for example, cfg = -2.0 is the same as setting cfg = 2.0, and channelwise_cfg = true)."],"color":"#432","bgcolor":"#653"},{"id":666,"type":"SharkOptions_Beta","pos":[3413.490478515625,2880],"size":[257.98193359375,130],"flags":{},"order":103,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1962]}],"properties":{"Node name for S&R":"SharkOptions_Beta"},"widgets_values":["gaussian",1,1,false]},{"id":684,"type":"KarrasScheduler","pos":[3190,5610],"size":[210,130],"flags":{},"order":104,"mode":0,"inputs":[],"outputs":[{"name":"SIGMAS","localized_name":"SIGMAS","type":"SIGMAS","links":[1975,1976],"slot_index":0}],"properties":{"Node name for S&R":"KarrasScheduler"},"widgets_values":[5,1,1,1]},{"id":687,"type":"Note","pos":[2910,5610],"size":[257.2243957519531,88],"flags":{},"order":105,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: use SigmasPreview very heavily so that you can *see* what's going on!"],"color":"#432","bgcolor":"#653"},{"id":671,"type":"Note","pos":[3820,5080],"size":[363.5062255859375,260.6607971191406],"flags":{},"order":106,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This node can be used to visualize the effect of different noise parameters on how much noise is actually added (or removed) during the sampling process.\n\nDelta (Δ) signifies change. So for example, Δt = step size.\n\nThe most important thing to look at is the original sigma (σ) schedule vs σup. \n\nThe difference between σ (white line) and σup (red line above) is how much noise is added by the sampler after each step. If the two overlap, you aren't adding noise, and it's in ODE mode (eta = 0.0).\n\nThe most important thing to try here is higher or lower eta values, and different noise_modes. Try comparing \"hard\" vs \"soft\" vs \"hard_var\" with eta = 0.5."],"color":"#432","bgcolor":"#653"},{"id":686,"type":"SigmasPreview","pos":[3430,5610],"size":[289.7076110839844,128.47837829589844],"flags":{},"order":187,"mode":0,"inputs":[{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","link":1976}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"SigmasPreview"},"widgets_values":[false]},{"id":682,"type":"ClownOptions_Automation_Beta","pos":[3430,5250],"size":[284.9833984375,146],"flags":{},"order":186,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":1975},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"s_noises","localized_name":"s_noises","type":"SIGMAS","shape":7,"link":null},{"name":"s_noises_substep","localized_name":"s_noises_substep","type":"SIGMAS","shape":7,"link":null},{"name":"epsilon_scales","localized_name":"epsilon_scales","type":"SIGMAS","shape":7,"link":null},{"name":"frame_weights","localized_name":"frame_weights","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1974]}],"properties":{"Node name for S&R":"ClownOptions_Automation_Beta"},"widgets_values":[]},{"id":673,"type":"Note","pos":[2900.961181640625,4977.04736328125],"size":[480.85333251953125,190.63368225097656],"flags":{},"order":107,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This option node can be very useful for SAVING TIME! \n\n\"swap_below_error\" is a tolerance threshold where, if the total error at each step falls below the value in the box, it will switch to the sampler specified here.\n\n\"log_err_to_console\" will print these values at each step to the terminal/console/cmd.exe window where ComfyUI is running. This is essential if you wish to choose a reasonable value for \"swap_below_err\".\n\n\"swap_at_step\" will switch after the step specified, no matter what. This is equivalent to chaining two samplers together as shown to the left - it's just more convenient and compact.\n"],"color":"#432","bgcolor":"#653"},{"id":665,"type":"ClownOptions_SwapSampler_Beta","pos":[3430.416015625,5008.54541015625],"size":[287.92266845703125,130],"flags":{},"order":108,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1972],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SwapSampler_Beta"},"widgets_values":["multistep/res_3m",0,30,false]},{"id":798,"type":"ClownOptions_Momentum_Beta","pos":[3433.12158203125,4837.14990234375],"size":[286.6007995605469,58],"flags":{},"order":109,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2003],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Momentum_Beta"},"widgets_values":[0.5]},{"id":803,"type":"Note","pos":[2904.318359375,4827.84912109375],"size":[481.74639892578125,88],"flags":{},"order":110,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Momentum can be used to accelerate convergence in some cases. Use carefully.\n\nMay be best used with chained workflows, with momentum applied only to some portion of early steps."],"color":"#432","bgcolor":"#653"},{"id":672,"type":"Note","pos":[2904.900634765625,4490.91796875],"size":[483.2145690917969,270.3226623535156],"flags":{},"order":111,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Implicit steps refine the sampling process by feeding the output of each step back into its input and rerunning it. This means setting either to \"1\" will increase the runtime 2x, as you're doubling the number of steps. \n\nThey can drastically increase quality. In some cases, results can actually be improved by cutting the step count in half, and running with implicit_steps=1 or implicit_substeps=1 (which results in an equivalent runtime).\n\nWith the other samplers, rebound will add one extra model call per step. \n\nBongmath and predictor-corrector can have significantly different effects. Rebound can as well (but also adds 1 model call per implicit step or substep).\n\nTRUE IMPLICIT SAMPLERS:\n\nIt is recommended to use \"implicit_steps\" with the \"fully_implicit\" samplers, and \"implicit_substeps\" with the \"diag_implicit\" samplers. Both of these sampler types will ignore the \"implicit_type\" settings.\n"],"color":"#432","bgcolor":"#653"},{"id":675,"type":"Note","pos":[2906.080322265625,4199.57763671875],"size":[474.05108642578125,230.29006958007812],"flags":{},"order":112,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownOptions_DetailBoost aims to BREAK the noise scaling math that was so carefully prepared by ClownOptions_SDE. Most users see this as a replacement for the \"Detail Daemon\" node.\n\nTry experimenting with different methods: ones that end with \"normal\" will attempt to preserve luminosity in the image after the adjustments to the noise are made.\n\nIt is worth trying \"sinusoidal\" mode as well, as this is designed to be strongest at middle steps.\n\nEta will increase the intensity of the effect. \n\nIt seems to be best to not have this start on the first step (step 0), and to have it end no more than halfway (end_step = 1/2 of total steps or less). With method = \"model\", this seems to add a lot of detail without losing saturation, increasing luminosity, or triggering mutations."],"color":"#432","bgcolor":"#653"},{"id":763,"type":"ClownOptions_DetailBoost_Beta","pos":[3436.449462890625,4207.4345703125],"size":[282.9725646972656,218],"flags":{},"order":113,"mode":0,"inputs":[{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1991],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_DetailBoost_Beta"},"widgets_values":[1,"model","hard",0.5,3,10]},{"id":761,"type":"Note","pos":[2905.32470703125,3869.700439453125],"size":[471.6525573730469,266.9491882324219],"flags":{},"order":114,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownOptions_SigmaScaling aims to BREAK the noise scaling math that was so carefully prepared by ClownOptions_SDE.\n\n\"noise_anchor_sde\" can make the image look much dirtier with lower values. Try 0.5 for starters, especially with any non-multistep sampler.\n\n\"s_noise\" increases the \"size\" of the noise added with each step. Values around 1.05-1.1 can considerably boost saturation in painterly images. BONGMath is particularly good when this is set to values > 1.0.\n\n\"s_noise_substep\" is not compatible with BONGMath. You will get a terrible image if you use them together.\n\n\"lying\" is equivalent to the popular \"lying sigmas\" approach. Like \"noise_anchor\", values < 1.0 will increase the \"dirty\" look. Try starting with 0.95.\n\n\"lying_inv\" will do the opposite of \"lying\". If you find your images look \"dried out\" or desaturated when using lying, try setting this to a similar value, and have it start at a later step, as shown below."],"color":"#432","bgcolor":"#653"},{"id":760,"type":"ClownOptions_SigmaScaling_Beta","pos":[3436.542724609375,3886.986328125],"size":[272.21429443359375,454],"flags":{},"order":115,"mode":0,"inputs":[{"name":"s_noises","localized_name":"s_noises","type":"SIGMAS","shape":7,"link":null},{"name":"s_noises_substep","localized_name":"s_noises_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1990],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SigmaScaling_Beta"},"widgets_values":[1,1,1,0.9500000000000001,0.9500000000000001,2,8]},{"id":678,"type":"Note","pos":[3783.1923828125,4263.46484375],"size":[363.2837219238281,88],"flags":{},"order":116,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: if your results are too noisy,\ntry setting \"overshoot\" in \"ClownOptions StepSize\" to the same value as \"eta\" used in \"ClownOptions SDE\"! \n\n(Default eta is 0.50 if \"ClownOptions SDE\" is not used)."],"color":"#432","bgcolor":"#653"},{"id":806,"type":"ClownsharkChainsampler_Beta","pos":[5163.4833984375,2735.41357421875],"size":[262.0870056152344,298],"flags":{},"order":206,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",-1,5.5,"resample",true]},{"id":804,"type":"ClownsharKSampler_Beta","pos":[4552.6552734375,2734.24609375],"size":[268.7583312988281,418],"flags":{},"order":117,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2004],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_2m","beta57",20,14,1,5.5,0,"fixed","unsample",true]},{"id":805,"type":"ClownsharkChainsampler_Beta","pos":[4861.5244140625,2737.638671875],"size":[262.0870056152344,318],"flags":{},"order":188,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2004},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":2006},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",1,5.5,"resample",true]},{"id":808,"type":"Note","pos":[4806.57275390625,3320.429443359375],"size":[384.6367492675781,194.1151580810547],"flags":{},"order":118,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownOptions Cycles causes the sampler node to rerun after completion, while reversing the sampling mode (resample becomes unsample, unsample becomes resample). \n\n1.0 cycles implies it returns to where it began. \n\n1.5 cycles implies it returns to where it began, then reverses direction and reruns one last time - so it would end at the end of the step.\n\nThis often has VERY good results with unsampling workflows, various img2img workflows, style transfer, etc. With 1 steps_to_run, it's a lot like \"ClownOptions Implicit\", though results are often better."],"color":"#432","bgcolor":"#653"},{"id":717,"type":"Note","pos":[2635.989501953125,4579.111328125],"size":[237.44444274902344,91.61251831054688],"flags":{},"order":119,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["INPAINTING TIP: Implicit steps can really help, especially with seams! They also can have a significant impact on unsampling, and guides in general."],"color":"#432","bgcolor":"#653"},{"id":662,"type":"ClownOptions_ImplicitSteps_Beta","pos":[3440.08251953125,4551.392578125],"size":[286.5861511230469,130],"flags":{},"order":120,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1971],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_ImplicitSteps_Beta"},"widgets_values":["bongmath","bongmath",0,0]},{"id":811,"type":"ClownOptions_Cycles_Beta","pos":[3860.137451171875,4546.62158203125],"size":[261.53253173828125,202],"flags":{},"order":121,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2007],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1,-1,1,false]},{"id":812,"type":"Note","pos":[3846.717041015625,4748.94482421875],"size":[308.37188720703125,88],"flags":{},"order":122,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This node is closely related to ImplicitSteps! It is explained in more detail in the \"Cyclosampling\" group above and to the right (northeast)."],"color":"#432","bgcolor":"#653"},{"id":807,"type":"ClownOptions_Cycles_Beta","pos":[4863.31494140625,3128.078125],"size":[261.53253173828125,202],"flags":{},"order":123,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2006]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,5.5,-1,1,false]},{"id":818,"type":"Note","pos":[5294.46533203125,3122.492431640625],"size":[309.0342712402344,192.40728759765625],"flags":{},"order":124,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["eta_decay_scale multiplies the eta by that value after each cycle. This can help the cyclosampling process converge on an output. \n\nFor example, if you started with an eta of 0.5, and eta_decay_scale is set to 0.9, the following etas will be used for successive cycles:\n\n0.5\n0.45 (0.5 * 0.9)\n0.405 (0.5 * 0.9 * 0.9)\n0.3645 (0.5 * 0.9 * 0.9 * 0.9)"],"color":"#432","bgcolor":"#653"},{"id":809,"type":"ClownsharkChainsampler_Beta","pos":[5704.94384765625,2739.097900390625],"size":[262.0870056152344,318],"flags":{},"order":190,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":2009},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",5,5.5,"resample",true],"color":"#2a363b","bgcolor":"#3f5159"},{"id":814,"type":"ClownsharkChainsampler_Beta","pos":[6042.30615234375,2736.216064453125],"size":[262.0870056152344,318],"flags":{},"order":125,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2010],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",5,0.5,"resample",true],"color":"#2a363b","bgcolor":"#3f5159"},{"id":815,"type":"ClownsharkChainsampler_Beta","pos":[6327.12060546875,2732.67724609375],"size":[262.0870056152344,318],"flags":{},"order":189,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2010},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2011],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.25,"multistep/res_2m",5,4,"unsample",true],"color":"#233","bgcolor":"#355"},{"id":817,"type":"ClownsharkChainsampler_Beta","pos":[6619.41552734375,2730.05029296875],"size":[262.0870056152344,318],"flags":{},"order":207,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2011},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",5,0.5,"resample",true],"color":"#2a363b","bgcolor":"#3f5159"},{"id":810,"type":"Note","pos":[5840.63916015625,3313.710693359375],"size":[333.3376770019531,103.0768051147461],"flags":{},"order":126,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The two setups above are equivalent. \n\nI suggest running \"ClownOptions Cycles\" with 10 steps_to_run and just watching the progress bar. It's easier to understand visually."],"color":"#2a363b","bgcolor":"#3f5159"},{"id":813,"type":"ClownOptions_Cycles_Beta","pos":[5706.35546875,3124.623291015625],"size":[261.53253173828125,202],"flags":{},"order":127,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2009],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[1,1,0.25,4,-1,1,false],"color":"#233","bgcolor":"#355"},{"id":816,"type":"Note","pos":[6323.08056640625,3110.669189453125],"size":[274.8790588378906,88],"flags":{},"order":128,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Compare these settings to the \"ClownOptions Cycles\" node to the left (eta 0.25, cfg 4.0).\n"],"color":"#233","bgcolor":"#355"},{"id":692,"type":"ReFluxPatcher","pos":[7490,3210],"size":[210,82],"flags":{},"order":191,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1978}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1979],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":820,"type":"Note","pos":[7770,3350],"size":[251.27003479003906,88],"flags":{},"order":129,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["THIS NODE IS NOT REQUIRED!\n\nEXPERIMENTAL!"],"color":"#432","bgcolor":"#653"},{"id":693,"type":"FluxLoader","pos":[7090,3210],"size":[315,282],"flags":{},"order":130,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1978],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":null},{"name":"vae","localized_name":"vae","type":"VAE","links":null},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":null},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":null}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["consolidated_s6700.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":819,"type":"ClownModelLoader","pos":[7090,2740],"size":[315,266],"flags":{},"order":131,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null},{"name":"clip","localized_name":"clip","type":"CLIP","links":null},{"name":"vae","localized_name":"vae","type":"VAE","links":null}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_g_hidream.safetensors","clip_l_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":698,"type":"Note","pos":[8098.36279296875,3386.5087890625],"size":[282.65814208984375,92.654541015625],"flags":{},"order":132,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This node is similar ModelSamplingAdvanced, except it uses the dimensions of the latent image to set the shift value."],"color":"#432","bgcolor":"#653"},{"id":795,"type":"Image Sharpen FS","pos":[8812.0927734375,2744.748046875],"size":[210,106],"flags":{},"order":133,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":null}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Image Sharpen FS"},"widgets_values":["hard","median",6]},{"id":821,"type":"Note","pos":[8531.4560546875,2746.428466796875],"size":[211.12799072265625,95.03887939453125],"flags":{},"order":134,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Unique method for sharpening images. Can add a lot of \"pop\" to SDXL and AuraFlow outputs that otherwise look soft due to the 4 channel VAE."],"color":"#432","bgcolor":"#653"},{"id":796,"type":"Image Grain Add","pos":[8819.529296875,2974.022216796875],"size":[210,58],"flags":{},"order":135,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":null}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Image Grain Add"},"widgets_values":[0.5]},{"id":822,"type":"Note","pos":[8537.251953125,2954.097412109375],"size":[210.33291625976562,88],"flags":{},"order":136,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Very useful with ClipVision, IPadapter, etc. for avoiding blurry or blown out/oversaturated outputs."],"color":"#432","bgcolor":"#653"},{"id":797,"type":"Image Repeat Tile To Size","pos":[8819.0595703125,3152.188720703125],"size":[210,106],"flags":{},"order":137,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":null}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":823,"type":"Note","pos":[8541.458984375,3162.56103515625],"size":[229.4075927734375,156.3510284423828],"flags":{},"order":138,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Use in conjunction with \"ClownGuide Style\" when upscaling to prevent blurry outputs. \n\nWhen used wisely (not applied to all steps), this can improve results dramatically.\n\nConnect your original (unresized) input image to this node."],"color":"#432","bgcolor":"#653"},{"id":828,"type":"PreviewImage","pos":[9950,2830],"size":[210,26],"flags":{},"order":216,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2015}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":829,"type":"PreviewImage","pos":[9950,3220],"size":[210,26],"flags":{},"order":218,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2018}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":825,"type":"Frequency Separation Hard Light","pos":[9990,3070],"size":[260.3999938964844,66],"flags":{},"order":217,"mode":0,"inputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","shape":7,"link":2016},{"name":"original","localized_name":"original","type":"IMAGE","shape":7,"link":null},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","shape":7,"link":2017}],"outputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","links":null},{"name":"original","localized_name":"original","type":"IMAGE","links":[2019],"slot_index":1},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Frequency Separation Hard Light"},"widgets_values":[]},{"id":830,"type":"PreviewImage","pos":[10300,3090],"size":[210,26],"flags":{},"order":223,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":831,"type":"Note","pos":[9205.6611328125,2745.06982421875],"size":[293.7847900390625,149.87860107421875],"flags":{},"order":139,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Unique frequency separation method. Try combining the low pass layer from one image, and the high pass layer from another, such as with faces with carefully matched overlapping alignment, or other scenes. Better at transferring compositional information such as lighting and hue than the frequency separation method in Photoshop."],"color":"#432","bgcolor":"#653"},{"id":827,"type":"Image Median Blur","pos":[9443.4658203125,3153.701416015625],"size":[210,58],"flags":{},"order":192,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2013}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2014],"slot_index":0}],"properties":{"Node name for S&R":"Image Median Blur"},"widgets_values":[40]},{"id":832,"type":"Image Gaussian Blur","pos":[9442.099609375,3277.42578125],"size":[210,58],"flags":{},"order":140,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":null}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Image Gaussian Blur"},"widgets_values":[40]},{"id":824,"type":"Frequency Separation Hard Light","pos":[9680,3070],"size":[260.3999938964844,66],"flags":{},"order":209,"mode":0,"inputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","shape":7,"link":null},{"name":"original","localized_name":"original","type":"IMAGE","shape":7,"link":2012},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","shape":7,"link":2014}],"outputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","links":[2015,2016],"slot_index":0},{"name":"original","localized_name":"original","type":"IMAGE","links":null},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","links":[2017,2018],"slot_index":2}],"properties":{"Node name for S&R":"Frequency Separation Hard Light"},"widgets_values":[]},{"id":833,"type":"Note","pos":[9539.69921875,2749.275146484375],"size":[255.63558959960938,88],"flags":{},"order":141,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Median blur is edge-aware, and usually gives better results than gaussian blur, if images are carefully aligned first."],"color":"#432","bgcolor":"#653"},{"id":840,"type":"Frequency Separation Hard Light","pos":[11160,3190],"size":[260.3999938964844,66],"flags":{},"order":211,"mode":0,"inputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","shape":7,"link":null},{"name":"original","localized_name":"original","type":"IMAGE","shape":7,"link":2025},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","shape":7,"link":2026}],"outputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","links":[],"slot_index":0},{"name":"original","localized_name":"original","type":"IMAGE","links":null},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","links":[2022],"slot_index":2}],"properties":{"Node name for S&R":"Frequency Separation Hard Light"},"widgets_values":[]},{"id":838,"type":"Image Median Blur","pos":[10920,3270],"size":[210,58],"flags":{},"order":194,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2024}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2026],"slot_index":0}],"properties":{"Node name for S&R":"Image Median Blur"},"widgets_values":[40]},{"id":842,"type":"Image Median Blur","pos":[10930,3010],"size":[210,58],"flags":{},"order":193,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2031}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2032],"slot_index":0}],"properties":{"Node name for S&R":"Image Median Blur"},"widgets_values":[40]},{"id":826,"type":"LoadImage","pos":[9212.66796875,3090.664306640625],"size":[210,314],"flags":{},"order":142,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2012,2013],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["00107-496528661.png","image"]},{"id":843,"type":"LoadImage","pos":[10690,2920],"size":[210,314],"flags":{},"order":143,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2030,2031],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["00109-3396456281.png","image"]},{"id":835,"type":"Frequency Separation Hard Light","pos":[11480,3030],"size":[260.3999938964844,66],"flags":{},"order":219,"mode":0,"inputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","shape":7,"link":2033},{"name":"original","localized_name":"original","type":"IMAGE","shape":7,"link":null},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","shape":7,"link":2022}],"outputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","links":null},{"name":"original","localized_name":"original","type":"IMAGE","links":[2023],"slot_index":1},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Frequency Separation Hard Light"},"widgets_values":[]},{"id":837,"type":"PreviewImage","pos":[11800,3050],"size":[210,26],"flags":{},"order":224,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2023}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":841,"type":"Frequency Separation Hard Light","pos":[11160,2910],"size":[260.3999938964844,66],"flags":{},"order":210,"mode":0,"inputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","shape":7,"link":null},{"name":"original","localized_name":"original","type":"IMAGE","shape":7,"link":2030},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","shape":7,"link":2032}],"outputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","links":[2033],"slot_index":0},{"name":"original","localized_name":"original","type":"IMAGE","links":null},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","links":[],"slot_index":2}],"properties":{"Node name for S&R":"Frequency Separation Hard Light"},"widgets_values":[]},{"id":836,"type":"LoadImage","pos":[10680,3210],"size":[213.1792755126953,314],"flags":{},"order":144,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2024,2025],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["00107-496528661.png","image"]},{"id":844,"type":"Note","pos":[10287.0224609375,3208.888916015625],"size":[255.63558959960938,88],"flags":{},"order":145,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This will output the original image."],"color":"#432","bgcolor":"#653"},{"id":845,"type":"Note","pos":[11723.419921875,2862.596923828125],"size":[285.8372497558594,88.79490661621094],"flags":{},"order":146,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This will combine high frequency (detail) information from the first image with the low frequency (color, hue, lighting) information from the second image."],"color":"#432","bgcolor":"#653"},{"id":794,"type":"Image Get Color Swatches","pos":[8580,5060],"size":[295.6000061035156,26],"flags":{},"order":147,"mode":0,"inputs":[{"name":"image_color_swatches","localized_name":"image_color_swatches","type":"IMAGE","link":null}],"outputs":[{"name":"color_swatches","localized_name":"color_swatches","type":"COLOR_SWATCHES","links":[2034],"slot_index":0}],"properties":{"Node name for S&R":"Image Get Color Swatches"},"widgets_values":[]},{"id":848,"type":"Note","pos":[8750,4900],"size":[328.192138671875,88],"flags":{},"order":148,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This configuration is equivalent to \"Masks From Colors\"."],"color":"#432","bgcolor":"#653"},{"id":792,"type":"Masks From Color Swatches","pos":[8900,5040],"size":[315,46],"flags":{},"order":195,"mode":0,"inputs":[{"name":"image_color_mask","localized_name":"image_color_mask","type":"IMAGE","link":null},{"name":"color_swatches","localized_name":"color_swatches","type":"COLOR_SWATCHES","link":2034}],"outputs":[{"name":"masks","localized_name":"masks","type":"MASK","links":null}],"properties":{"Node name for S&R":"Masks From Color Swatches"},"widgets_values":[]},{"id":851,"type":"Masks Unpack 8","pos":[9280,4590],"size":[140,166],"flags":{},"order":149,"mode":0,"inputs":[{"name":"masks","localized_name":"masks","type":"MASK","link":null}],"outputs":[{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null}],"properties":{"Node name for S&R":"Masks Unpack 8"},"widgets_values":[]},{"id":852,"type":"Masks Unpack 4","pos":[9280,4430],"size":[140,86],"flags":{},"order":150,"mode":0,"inputs":[{"name":"masks","localized_name":"masks","type":"MASK","link":null}],"outputs":[{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null}],"properties":{"Node name for S&R":"Masks Unpack 4"},"widgets_values":[]},{"id":849,"type":"Note","pos":[8590,4370],"size":[296.4569396972656,149.35540771484375],"flags":{},"order":151,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["image_color_swatches:\n\nThis is an image with colors drawn one at a time, top to bottom. It will set the order of the masks outputted in the connected \"unpack\" node to be the same as the order they appear in the \"swatches\" image.\n\nNote: white is the background and is ignored!"],"color":"#432","bgcolor":"#653"},{"id":793,"type":"Masks From Colors","pos":[8910,4430],"size":[330,46],"flags":{},"order":152,"mode":0,"inputs":[{"name":"image_color_swatches","localized_name":"image_color_swatches","type":"IMAGE","link":null},{"name":"image_color_mask","localized_name":"image_color_mask","type":"IMAGE","link":null}],"outputs":[{"name":"masks","localized_name":"masks","type":"MASK","links":[2036,2037],"slot_index":0}],"properties":{"Node name for S&R":"Masks From Colors"},"widgets_values":[]},{"id":855,"type":"MaskPreview+","pos":[8970,4590],"size":[210,26],"flags":{},"order":197,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2037}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview+"},"widgets_values":[]},{"id":854,"type":"Note","pos":[8600,4580],"size":[284.8203125,146.1818084716797],"flags":{},"order":153,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["image_color_mask:\n\nDraw a mask using the same colors used in the swatches. \n\nI *strongly* suggest using Mask Preview as shown to the right to get a feel for this."],"color":"#432","bgcolor":"#653"},{"id":847,"type":"MaskFromRGBCMYBW+","pos":[8300,4640],"size":[224.02476501464844,246],"flags":{},"order":154,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":null}],"outputs":[{"name":"red","localized_name":"red","type":"MASK","links":null},{"name":"green","localized_name":"green","type":"MASK","links":null},{"name":"blue","localized_name":"blue","type":"MASK","links":null},{"name":"cyan","localized_name":"cyan","type":"MASK","links":null},{"name":"magenta","localized_name":"magenta","type":"MASK","links":null},{"name":"yellow","localized_name":"yellow","type":"MASK","links":null},{"name":"black","localized_name":"black","type":"MASK","links":null},{"name":"white","localized_name":"white","type":"MASK","links":null}],"properties":{"Node name for S&R":"MaskFromRGBCMYBW+"},"widgets_values":[0.15,0.15,0.15]},{"id":846,"type":"Note","pos":[8270,4370],"size":[286.9356994628906,206.45907592773438],"flags":{},"order":155,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["These nodes can be useful in situations where you want to composite complex masks with many regions, without overlap, and without missing areas. They allow these to be made easily in an editor such as MSPaint.\n\nThey are somewhat similar in function to the node shown below from ComfyUI Essentials. They will all get the job done. The only advantage of the \"Masks From Colors\" nodes is that any color may be used, theoretically allowing dozens of zones to be drawn. For 8 or fewer zones (most cases) either may be used."],"color":"#432","bgcolor":"#653"},{"id":850,"type":"Masks Unpack 16","pos":[9280,4840],"size":[140,326],"flags":{},"order":196,"mode":0,"inputs":[{"name":"masks","localized_name":"masks","type":"MASK","link":2036}],"outputs":[{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null}],"properties":{"Node name for S&R":"Masks Unpack 16"},"widgets_values":[]},{"id":858,"type":"VAEEncode","pos":[12693.298828125,3026.815673828125],"size":[140,46],"flags":{},"order":212,"mode":0,"inputs":[{"name":"pixels","localized_name":"pixels","type":"IMAGE","link":2039},{"name":"vae","localized_name":"vae","type":"VAE","link":null}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[2040],"slot_index":0}],"properties":{"Node name for S&R":"VAEEncode"},"widgets_values":[]},{"id":862,"type":"Note","pos":[12226.9638671875,2733.324951171875],"size":[307.74560546875,219.57456970214844],"flags":{},"order":156,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ConditioningBatch4 (and 8) apply conditioning to each tile in the order received by the sampler. This is very useful for ensuring coherent results. This WF avoids the creation of seams, and is efficient, only requiring 4 tiles for upscales. \n\nConditioningBatch4 currently is not supported by the negative conditioning input. Use a standard negative prompt (or nothing).\n\nIf you separate the tiles individually, you should be able to use Flux Redux conditioning for each of the ConditioningBatch4 inputs."],"color":"#432","bgcolor":"#653"},{"id":860,"type":"VAEDecode","pos":[13233.298828125,2776.815673828125],"size":[140,46],"flags":{},"order":225,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2041},{"name":"vae","localized_name":"vae","type":"VAE","link":null}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2042],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":853,"type":"ConditioningBatch4","pos":[12613.298828125,2746.815673828125],"size":[228.39999389648438,86],"flags":{},"order":157,"mode":0,"inputs":[{"name":"conditioning_0","localized_name":"conditioning_0","type":"CONDITIONING","link":null},{"name":"conditioning_1","localized_name":"conditioning_1","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_2","localized_name":"conditioning_2","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_3","localized_name":"conditioning_3","type":"CONDITIONING","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[2038],"slot_index":0}],"properties":{"Node name for S&R":"ConditioningBatch4"},"widgets_values":[]},{"id":856,"type":"ClownsharKSampler_Beta","pos":[12883.298828125,2776.815673828125],"size":[315,418],"flags":{},"order":220,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2038},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2043},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2040},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2041],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_2m","beta57",30,-1,1,5.5,0,"randomize","standard",true]},{"id":863,"type":"CLIPTextEncode","pos":[12610.970703125,2882.634033203125],"size":[229.78173828125,88],"flags":{},"order":158,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":null}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2043]}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":864,"type":"ImageResize+","pos":[12208.77734375,3038.4794921875],"size":[210,218],"flags":{},"order":159,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":null}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2044]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[1792,1792,"nearest","stretch","always",0]},{"id":857,"type":"ImageTile+","pos":[12453.298828125,3036.815673828125],"size":[210,234],"flags":{},"order":198,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":2044}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2039],"slot_index":0},{"name":"tile_width","localized_name":"tile_width","type":"INT","links":null},{"name":"tile_height","localized_name":"tile_height","type":"INT","links":null},{"name":"overlap_x","localized_name":"overlap_x","type":"INT","links":null},{"name":"overlap_y","localized_name":"overlap_y","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageTile+"},"widgets_values":[2,2,0,128,128]},{"id":859,"type":"ImageUntile+","pos":[13413.298828125,2776.815673828125],"size":[210,130],"flags":{},"order":227,"mode":0,"inputs":[{"name":"tiles","localized_name":"tiles","type":"IMAGE","link":2042}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"ImageUntile+"},"widgets_values":[128,128,2,2]},{"id":868,"type":"Note","pos":[9664.4189453125,3771.01416015625],"size":[284.8223571777344,176.87057495117188],"flags":{},"order":160,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Styles are supported for:\n\nHiDream (outstanding results)\n\nFlux (best results are with style loras, as the base model is severely lacking understanding of styles)\n\nWAN \n\nUse of the \"Re...\" patcher nodes is required, as custom model code is used."],"color":"#432","bgcolor":"#653"},{"id":869,"type":"TorchCompileModels","pos":[7795.904296875,2736.478515625],"size":[247.29759216308594,178],"flags":{},"order":161,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":703,"type":"SD35Loader","pos":[7438.462890625,2740.95849609375],"size":[315,218],"flags":{},"order":162,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null},{"name":"clip","localized_name":"clip","type":"CLIP","links":null},{"name":"vae","localized_name":"vae","type":"VAE","links":null}],"properties":{"Node name for S&R":"SD35Loader"},"widgets_values":["sd3.5_medium.safetensors","default",".use_ckpt_clip",".none",".none",".use_ckpt_vae"]},{"id":700,"type":"Note","pos":[7480,3060],"size":[225.09121704101562,88],"flags":{},"order":163,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This node must be used when using regional conditioning with Flux. "],"color":"#432","bgcolor":"#653"},{"id":870,"type":"Note","pos":[8087.9755859375,2736.00830078125],"size":[291.73583984375,88],"flags":{},"order":164,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Generic compile node for many models."],"color":"#432","bgcolor":"#653"},{"id":696,"type":"ModelSamplingAdvanced","pos":[8136.62109375,3062.419677734375],"size":[210,82],"flags":{},"order":165,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ModelSamplingAdvanced"},"widgets_values":["exponential",3]},{"id":697,"type":"Note","pos":[8088.63037109375,2876.072998046875],"size":[299.7002868652344,122.62284851074219],"flags":{},"order":166,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ModelSamplingAdvanced many different models, including AuraFlow, SD3.5, Flux, and more, including video models.\n\nWhen \"scaling\" is set to \"exponential\" it uses the method employed by Flux, which is actually quite good with SD3.5. \"linear\" is the default method used by SD3.5."],"color":"#432","bgcolor":"#653"},{"id":788,"type":"ClownGuide_Style_Beta","pos":[9993.591796875,4034.93017578125],"size":[243.85076904296875,286],"flags":{},"order":167,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,15,false]},{"id":865,"type":"Note","pos":[9667.3056640625,4012.013427734375],"size":[283.76544189453125,448.7384338378906],"flags":{},"order":168,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownGuide style: the settings shown are the ones you will generally use. WCT is the more accurate of the two methods. If you have issues, you can fall back to AdaIN. \n\nIt is best to use this on the first 1/2 of steps or so. Be sure to provide some information about the style in the prompt for best results \"cel-shaded anime illustration of...\" \"\"gritty illustration of....\" \"analog photo of\".\n\nThe mask current has no effect, but is there as a placeholder as regional style methods are under development.\n\n\nIf you are using CFG = 1.0 (typical with distilled models such as Flux or HiDream Dev), synweight has no effect and can be ignored.\n\nSynweight simply applies the same style to the opposite conditioning (so if apply_to = positive, and synweight is at 0.5, it will use it at 0.5 strength on the negative). In the vast majority of cases, it's best to leave synweight at the default. Occasionally, setting it to 0.5 or 0.0 can be helpful, but it can result in burning the image due to issues with CFG. \n\nStandard guides may be inputted into this node, if you wish to use them together.\n"],"color":"#432","bgcolor":"#653"},{"id":871,"type":"Note","pos":[9669.4208984375,4527.666015625],"size":[277.0335998535156,102.53260040283203],"flags":{},"order":169,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownGuide Mean: somewhat similar effect, but does not require the \"Re...\" patcher nodes and works with all models. Effect is typically considerably less precise."],"color":"#432","bgcolor":"#653"},{"id":866,"type":"Note","pos":[10292.3017578125,4024.706787109375],"size":[284.8223571777344,176.87057495117188],"flags":{},"order":170,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["BLUR KILLER TIP:\n\nWhen generating photography with Flux or HiDream (where blur can be frustratingly difficult to avoid), try using a style guide for the first 1/3rd of steps that is a sharp photograph with similar lighting/hues to what you are aiming for. You might need to try a handful of photos before landing on a \"hit\", but the right one will eliminate blur 100%, even with a close up portrait photograph.\n"],"color":"#432","bgcolor":"#653"},{"id":872,"type":"ClownGuide_AdaIN_MMDiT_Beta","pos":[10680,3810],"size":[246.13087463378906,430],"flags":{},"order":171,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_AdaIN_MMDiT_Beta"},"widgets_values":[1,"constant","","","20","0.5",0,15,false]},{"id":874,"type":"ClownGuide_AttnInj_MMDiT_Beta","pos":[10990,3810],"size":[272.0969543457031,718],"flags":{},"order":172,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_AttnInj_MMDiT_Beta"},"widgets_values":[1,"constant","0,1,3","1.0","20","0.5",0,0,1,0,0,0,0,0,0,0,0,0,0,15,false]},{"id":873,"type":"Note","pos":[10590,4300],"size":[348.2928771972656,313.42919921875],"flags":{},"order":173,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownGuide AdaIN and AttnInj:\n\nAdvanced experimental nodes for HiDream. Very strong effect and can be used together with all other guide nodes.\n\nBest used like a monkey in a missile silo. Start pushing buttons and you'll win eventually!\n\nList the blocks you wish the effect to be applied to, and the weight of the effect on that block, in the same order. \"all\" will use all blocks of that type, and if only one weight is listed, it will use that for all blocks listed.\n\nThere are 16 double blocks, and 32 single blocks. Each is numbered beginning at 0. For example, the following block numberings are equivalent for double_blocks:\n\nall\n0-15\n0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\n"],"color":"#432","bgcolor":"#653"},{"id":875,"type":"Note","pos":[10980,4590],"size":[301.1705017089844,233.60943603515625],"flags":{},"order":174,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Injects calculated attention from the guide into the main sampling process. This will carry over some compositional information, as well as lighting. It can be very interesting in combination with ClownGuide Style or ClownGuide AdaIN (MMDiT).\n\nimg_v will have the most color/style information with the least effect on composition.\n\nimg_k will increase the amount of compositional information.\n\nimg_q will increase the compositional information to the point where it can begin looking more like a traditional guide mode."],"color":"#432","bgcolor":"#653"},{"id":867,"type":"ClownGuide_Mean_Beta","pos":[9997.337890625,4526.90869140625],"size":[241.34442138671875,238],"flags":{},"order":175,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_Mean_Beta"},"widgets_values":[1,1,"constant",0,15,false]},{"id":679,"type":"SharkSampler_Beta","pos":[1370,3140],"size":[285.713623046875,386],"flags":{},"order":199,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sampler","localized_name":"sampler","type":"SAMPLER","shape":7,"link":1973},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":2046},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"SharkSampler_Beta"},"widgets_values":["beta57",30,-1,1,5.5,0,"fixed","standard"]},{"id":876,"type":"SharkOptions_GuiderInput","pos":[1051.8299560546875,3379.638427734375],"size":[282.30291748046875,46],"flags":{},"order":176,"mode":0,"inputs":[{"name":"guider","localized_name":"guider","type":"GUIDER","link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2046],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuiderInput"}},{"id":802,"type":"Note","pos":[690.13916015625,3410.965576171875],"size":[321.8917236328125,108.77723693847656],"flags":{},"order":95,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Typically, SharkSampler slots into workflows where SamplerCustom would have been used.\n\nSharkOptions GuiderInput allows it to be used like SamplerCustomAdvanced, with any guider input of your choosing. It may also be used with ClownSharkSampler."],"color":"#432","bgcolor":"#653"}],"links":[[1365,453,0,454,0,"IMAGE"],[1890,584,0,606,0,"MODEL"],[1904,606,0,601,0,"MODEL"],[1907,598,0,610,1,"CONDITIONING"],[1908,599,0,610,2,"CONDITIONING"],[1909,601,0,610,0,"MODEL"],[1910,601,0,612,0,"MODEL"],[1911,598,0,612,1,"CONDITIONING"],[1912,599,0,612,2,"CONDITIONING"],[1914,598,0,613,1,"CONDITIONING"],[1915,599,0,613,2,"CONDITIONING"],[1923,613,0,453,0,"LATENT"],[1926,609,0,613,0,"MODEL"],[1937,606,1,598,0,"CLIP"],[1938,606,1,599,0,"CLIP"],[1939,620,0,606,1,"CLIP"],[1940,621,0,453,1,"VAE"],[1947,624,0,612,6,"OPTIONS"],[1948,625,0,613,6,"OPTIONS"],[1949,610,0,612,3,"LATENT"],[1950,612,0,613,3,"LATENT"],[1951,626,0,610,6,"OPTIONS"],[1952,640,0,641,3,"LATENT"],[1953,641,0,642,3,"LATENT"],[1962,666,0,660,6,"OPTIONS"],[1963,661,0,660,7,"OPTIONS"],[1968,676,0,660,9,"OPTIONS"],[1971,662,0,660,10,"OPTIONS"],[1972,665,0,660,11,"OPTIONS"],[1973,680,0,679,3,"SAMPLER"],[1974,682,0,660,12,"OPTIONS"],[1975,684,0,682,0,"SIGMAS"],[1976,684,0,686,0,"SIGMAS"],[1977,688,0,691,5,"GUIDES"],[1978,693,0,692,0,"MODEL"],[1979,692,0,694,0,"MODEL"],[1980,694,0,695,0,"MODEL"],[1982,724,0,720,1,"CONDITIONING"],[1983,724,0,720,2,"CONDITIONING"],[1984,722,0,720,5,"GUIDES"],[1985,723,0,721,5,"GUIDES"],[1986,751,0,752,2,"MASK"],[1987,753,0,752,3,"MASK"],[1988,756,0,754,2,"MASK"],[1989,757,0,754,3,"MASK"],[1990,760,0,660,13,"OPTIONS"],[1991,763,0,660,8,"OPTIONS"],[1992,735,0,770,5,"GUIDES"],[1994,772,0,770,6,"OPTIONS"],[1995,775,0,778,0,"COND_REGIONS"],[1996,778,0,779,0,"COND_REGIONS"],[1999,779,0,783,0,"COND_REGIONS"],[2000,783,0,776,0,"COND_REGIONS"],[2003,798,0,660,14,"OPTIONS"],[2004,804,0,805,4,"LATENT"],[2005,805,0,806,4,"LATENT"],[2006,807,0,805,6,"OPTIONS"],[2007,811,0,660,15,"OPTIONS"],[2009,813,0,809,6,"OPTIONS"],[2010,814,0,815,4,"LATENT"],[2011,815,0,817,4,"LATENT"],[2012,826,0,824,1,"IMAGE"],[2013,826,0,827,0,"IMAGE"],[2014,827,0,824,2,"IMAGE"],[2015,824,0,828,0,"IMAGE"],[2016,824,0,825,0,"IMAGE"],[2017,824,2,825,2,"IMAGE"],[2018,824,2,829,0,"IMAGE"],[2019,825,1,830,0,"IMAGE"],[2022,840,2,835,2,"IMAGE"],[2023,835,1,837,0,"IMAGE"],[2024,836,0,838,0,"IMAGE"],[2025,836,0,840,1,"IMAGE"],[2026,838,0,840,2,"IMAGE"],[2030,843,0,841,1,"IMAGE"],[2031,843,0,842,0,"IMAGE"],[2032,842,0,841,2,"IMAGE"],[2033,841,0,835,0,"IMAGE"],[2034,794,0,792,1,"COLOR_SWATCHES"],[2036,793,0,850,0,"MASK"],[2037,793,0,855,0,"MASK"],[2038,853,0,856,1,"CONDITIONING"],[2039,857,0,858,0,"IMAGE"],[2040,858,0,856,3,"LATENT"],[2041,856,0,860,0,"LATENT"],[2042,860,0,859,0,"IMAGE"],[2043,863,0,856,2,"CONDITIONING"],[2044,864,0,857,0,"IMAGE"],[2046,876,0,679,6,"OPTIONS"]],"groups":[{"id":1,"title":"UNSAMPLING SETUP","bounding":[727.6610717773438,4702.3486328125,1679.59423828125,1066.484619140625],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"CHAINED SAMPLER SETUP","bounding":[726.7158203125,3763.15576171875,1680.4798583984375,894.4533081054688],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"INTRODUCTION TO CLOWNSAMPLING","bounding":[603.1690063476562,2607.28857421875,1866.77099609375,983.0913696289062],"color":"#3f789e","font_size":24,"flags":{}},{"id":5,"title":"OPTIONS AND AUTOMATION","bounding":[2599.417236328125,2632.92578125,1724.455078125,3136.83154296875],"color":"#3f789e","font_size":24,"flags":{}},{"id":6,"title":"GUIDES","bounding":[4494.4521484375,3692.791259765625,1757.123291015625,2078.85498046875],"color":"#3f789e","font_size":24,"flags":{}},{"id":7,"title":"LOADERS AND PATCHERS","bounding":[7042.78515625,2636.466552734375,1379.5494384765625,916.3328247070312],"color":"#3f789e","font_size":24,"flags":{}},{"id":8,"title":"ULTRACASCADE","bounding":[-3341.39892578125,2604.916748046875,3831.125244140625,1936.0570068359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":9,"title":"REGIONAL CONDITIONING","bounding":[6335.28125,3695.2412109375,3155.973388671875,1510.357421875],"color":"#3f789e","font_size":24,"flags":{}},{"id":10,"title":"Cyclosampling (looping a sampler node)","bounding":[4505.0283203125,2635.9287109375,2438.49853515625,916.0137939453125],"color":"#3f789e","font_size":24,"flags":{}},{"id":11,"title":"Miscellaneous Image Nodes","bounding":[8475.5224609375,2640.341552734375,610.4679565429688,912.4259033203125],"color":"#3f789e","font_size":24,"flags":{}},{"id":12,"title":"Frequency Separation","bounding":[9150.265625,2642.074462890625,2909.011474609375,911.339111328125],"color":"#3f789e","font_size":24,"flags":{}},{"id":13,"title":"Tiled Upscales with Tiled Conditioning","bounding":[12136.822265625,2643.802490234375,1541.95556640625,664.3409423828125],"color":"#3f789e","font_size":24,"flags":{}},{"id":14,"title":"Style Transfer","bounding":[9615.3837890625,3690.478515625,2453.906494140625,1514.1885986328125],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.6105100000000008,"offset":[1291.1169756467461,-2415.779581669771]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/sd35 medium unsampling data.json
================================================
{"last_node_id":635,"last_link_id":2023,"nodes":[{"id":627,"type":"SD35Loader","pos":[602.6103515625,-123.47957611083984],"size":[315,218],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2014],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2010],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2011,2012],"slot_index":2}],"properties":{"Node name for S&R":"SD35Loader"},"widgets_values":["sd3.5_medium.safetensors","default","clip_l_sd35.safetensors","clip_g_sd35.safetensors","t5xxl_fp16.safetensors","sd35_vae.safetensors"]},{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2010}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":3,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2012}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2014},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[210,46],"flags":{"collapsed":false},"order":9,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2011}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":10,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,5.5,"resample",true]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",60,-1,1,1,0,"fixed","unsample",true]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":4,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,false,0.5,1,"constant",0,-1,false]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,true,0.35,0.35,"beta57",0,12,false]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2010,627,1,107,0,"CLIP"],[2011,627,2,591,1,"VAE"],[2012,627,2,629,4,"VAE"],[2013,629,0,630,3,"LATENT"],[2014,627,0,632,0,"MODEL"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[672.6014509912476,552.1175843760627]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/sd35 medium unsampling.json
================================================
{"last_node_id":635,"last_link_id":2023,"nodes":[{"id":627,"type":"SD35Loader","pos":[602.6103515625,-123.47957611083984],"size":[315,218],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2014],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2010],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2011,2012],"slot_index":2}],"properties":{"Node name for S&R":"SD35Loader"},"widgets_values":["sd3.5_medium.safetensors","default","clip_l_sd35.safetensors","clip_g_sd35.safetensors","t5xxl_fp16.safetensors","sd35_vae.safetensors"]},{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2010}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":3,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2012}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2014},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":4,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,false,0.5,1,"constant",0,-1,false]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":10,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,5.5,"resample",true]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",60,-1,1,1,0,"fixed","unsample",true]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,true,0.5,1,"beta57",0,25,false]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[140,46],"flags":{"collapsed":false},"order":9,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2011}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2010,627,1,107,0,"CLIP"],[2011,627,2,591,1,"VAE"],[2012,627,2,629,4,"VAE"],[2013,629,0,630,3,"LATENT"],[2014,627,0,632,0,"MODEL"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.635079908265751,"offset":[1291.723098320105,628.7383473687522]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/sdxl regional antiblur.json
================================================
{"last_node_id":730,"last_link_id":2113,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":2098}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":2099}],"outputs":[{"name":"","type":"CLIP","links":[1939,2092,2112],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":2100}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":21,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":701,"type":"Note","pos":[80,-520],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[-210,-520],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":676,"type":"InvertMask","pos":[20,-370],"size":[142.42074584960938,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2073}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2083],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":7,"type":"VAEEncodeAdvanced","pos":[719.6110229492188,16.752899169921875],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":16,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":710,"type":"MaskPreview","pos":[180,-190],"size":[210,246],"flags":{},"order":17,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":20,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":715,"type":"SolidMask","pos":[-220,-370],"size":[210,106],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2073],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1024,1024]},{"id":716,"type":"SolidMask","pos":[-220,-220],"size":[210,106],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2065],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,384,864]},{"id":709,"type":"MaskComposite","pos":[190,-370],"size":[210,126],"flags":{},"order":11,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2083},{"name":"source","localized_name":"source","type":"MASK","link":2065}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2091],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[256,160,"add"]},{"id":704,"type":"Note","pos":[101.74818420410156,112.67951965332031],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":722,"type":"ClownRegionalConditioning2","pos":[690,-370],"size":[287.75750732421875,330],"flags":{},"order":18,"mode":0,"inputs":[{"name":"conditioning_masked","localized_name":"conditioning_masked","type":"CONDITIONING","shape":7,"link":2094},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":2093},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2091},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[2095],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning2"},"widgets_values":[1,0,0,"constant",0,-1,"boolean_masked",32,false]},{"id":703,"type":"Note","pos":[423.10699462890625,-96.14085388183594],"size":[241.9689483642578,386.7543640136719],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask.\n\nboolean_masked means that the masked area can \"see\" the rest of the image, but the unmasked area cannot. \"boolean\" would mean neither area could see the rest of the image.\n\nTry setting to boolean_unmasked and see what happens!\n\nIf you still have blur, try reducing edge_width (and if you have seams, try increasing it, or setting end_step to something like 20). \n\nAlso verify that you can generate the background prompt alone without blur (if you can't, this won't work). And don't get stuck on one seed.\n\nVaguely human-shaped masks also tend to work better than the blocky one used here."],"color":"#432","bgcolor":"#653"},{"id":725,"type":"ReSDPatcher","pos":[1012.9199829101562,-651.4929809570312],"size":[210,82],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2097}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2098],"slot_index":0}],"properties":{"Node name for S&R":"ReSDPatcher"},"widgets_values":["float64",true]},{"id":724,"type":"CheckpointLoaderSimple","pos":[549.1465454101562,-653.311767578125],"size":[416.2424011230469,98],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[2097],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[2099],"slot_index":1},{"name":"VAE","localized_name":"VAE","type":"VAE","links":[2100],"slot_index":2}],"properties":{"Node name for S&R":"CheckpointLoaderSimple"},"widgets_values":["_SDXL_/juggernautXL_v9Rundiffusionphoto2.safetensors"]},{"id":730,"type":"CLIPTextEncode","pos":[712.8302612304688,358.5015869140625],"size":[273.04931640625,94.66851806640625],"flags":{"collapsed":false},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2112}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2113],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["low quality, low detail, blurry, unsharp, low resolution, jpeg artifacts"]},{"id":662,"type":"CLIPTextEncode","pos":[460,-370],"size":[210,88],"flags":{"collapsed":false},"order":13,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2094],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat"]},{"id":723,"type":"CLIPTextEncode","pos":[460,-240],"size":[210,88],"flags":{"collapsed":false},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2092}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2093],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a photo from the ground of a college campus"]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2095},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2113},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_3s","karras",60,-1,1,7,2,"fixed","standard",true]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1967,13,0,401,0,"MODEL"],[2054,709,0,710,0,"MASK"],[2065,716,0,709,1,"MASK"],[2073,715,0,676,0,"MASK"],[2083,676,0,709,0,"MASK"],[2091,709,0,722,2,"MASK"],[2092,490,0,723,0,"CLIP"],[2093,723,0,722,1,"CONDITIONING"],[2094,662,0,722,0,"CONDITIONING"],[2095,722,0,401,1,"CONDITIONING"],[2096,401,0,397,0,"LATENT"],[2097,724,0,725,0,"MODEL"],[2098,725,0,13,0,"*"],[2099,724,1,490,0,"*"],[2100,724,2,14,0,"*"],[2112,490,0,730,0,"CLIP"],[2113,730,0,401,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":2.322515441988848,"offset":[1367.132902556087,589.0262767308418]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/sdxl style transfer.json
================================================
{"last_node_id":1394,"last_link_id":3744,"nodes":[{"id":13,"type":"Reroute","pos":[13508.9013671875,-109.2831802368164],"size":[75,26],"flags":{},"order":18,"mode":0,"inputs":[{"name":"","type":"*","link":3741}],"outputs":[{"name":"","type":"MODEL","links":[3740],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[13508.9013671875,-29.283178329467773],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":3744}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[13508.9013671875,-69.28317260742188],"size":[75,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"","type":"*","link":3743}],"outputs":[{"name":"","type":"CLIP","links":[2881,3581],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[14108.255859375,675.60693359375],"size":[246.31312561035156,286],"flags":{},"order":26,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3709},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3699}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3604],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":970,"type":"CLIPTextEncode","pos":[13688.255859375,165.60690307617188],"size":[281.9206848144531,109.87118530273438],"flags":{},"order":19,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3627],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, jpeg artifacts, low quality, bad quality, unsharp"]},{"id":1378,"type":"Reroute","pos":[13184.07421875,533.128662109375],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":3721}],"outputs":[{"name":"","type":"IMAGE","links":[3724,3729],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1379,"type":"Reroute","pos":[13185.853515625,168.15780639648438],"size":[75,26],"flags":{},"order":17,"mode":0,"inputs":[{"name":"","type":"*","link":3725}],"outputs":[{"name":"","type":"IMAGE","links":[3726],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[15220,-259.5838928222656],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":31,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13400,560],"size":[261.2217712402344,298],"flags":{"collapsed":true},"order":24,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3688},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3727},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18},{"name":"width","type":"INT","pos":[10,160.00003051757812],"widget":{"name":"width"},"link":3732},{"name":"height","type":"INT","pos":[10,184.00003051757812],"widget":{"name":"height"},"link":3733}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983,3710],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3709],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1371,"type":"Image Repeat Tile To Size","pos":[13390,500],"size":[210,146],"flags":{"collapsed":true},"order":21,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3726},{"name":"width","type":"INT","pos":[10,36],"widget":{"name":"width"},"link":3730},{"name":"height","type":"INT","pos":[10,60],"widget":{"name":"height"},"link":3731}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3727,3728],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":1380,"type":"SetImageSize","pos":[13380,320],"size":[210,102],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[3730,3732],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[3731,3733],"slot_index":1}],"properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1344,768]},{"id":1377,"type":"Image Comparer (rgthree)","pos":[15742.4619140625,-253.3526153564453],"size":[461.9190368652344,413.5953369140625],"flags":{},"order":32,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":3720},{"name":"image_b","type":"IMAGE","dir":3,"link":3729}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_ogxbu_00017_.png&type=temp&subfolder=&rand=0.8732033562598724"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_ogxbu_00018_.png&type=temp&subfolder=&rand=0.08327234118228466"}]]},{"id":908,"type":"VAEDecode","pos":[15217.7802734375,-312.1965637207031],"size":[210,46],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697,3720],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1383,"type":"Note","pos":[14428.40234375,580.1749877929688],"size":[261.9539489746094,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Samplers like res_2s in this cycling node will also work and are faster. res_2m and res_3m are even faster, but sometimes the effect takes longer in wall time to fully kick in."],"color":"#432","bgcolor":"#653"},{"id":1384,"type":"Note","pos":[14793.0322265625,518.4120483398438],"size":[261.9539489746094,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_2m or res_3m can be used here instead and are faster, but are less likely to fully clean up lingering artifacts."],"color":"#432","bgcolor":"#653"},{"id":1385,"type":"Note","pos":[14398.345703125,768.2096557617188],"size":[261.9539489746094,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["method = AdaIN is faster and uses less memory, but is less accurate. Some prefer the effect."],"color":"#432","bgcolor":"#653"},{"id":1328,"type":"ClownOptions_SDE_Beta","pos":[14186.4755859375,-132.6126251220703],"size":[315,266],"flags":{"collapsed":true},"order":4,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3707],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1381,"type":"Note","pos":[13881.6279296875,-217.62835693359375],"size":[261.9539489746094,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease \"steps_to_run\" in ClownsharKSampler to change the effective denoise level."],"color":"#432","bgcolor":"#653"},{"id":1382,"type":"Note","pos":[14718.0498046875,-295.4144592285156],"size":[268.1851806640625,124.49711608886719],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increasing cycles will increase the amount of change, but take longer.\n\nCycles will rerun the same step over and over, forwards and backwards, iteratively refining an image at a controlled noise level."],"color":"#432","bgcolor":"#653"},{"id":1373,"type":"LoadImage","pos":[12810.2314453125,534.0346069335938],"size":[315,314],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3721],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Composition)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (476).png","image"]},{"id":1362,"type":"PreviewImage","pos":[13380,620],"size":[210,246],"flags":{},"order":23,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3682}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1390,"type":"Note","pos":[13148.0439453125,257.643310546875],"size":[210,88],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Color Match SOMETIMES helps accelerate style transfer.\n"],"color":"#432","bgcolor":"#653"},{"id":1376,"type":"Note","pos":[13710.3271484375,473.56817626953125],"size":[265.1909484863281,137.36415100097656],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease weight in ClownGuide to alter adherence to the input image.\n\nFor now, set to low weights or bypass if using any model except HiDream. The HiDream code was adapted so that this composition guide doesn't fight the style guide. Others will be added soon."],"color":"#432","bgcolor":"#653"},{"id":1350,"type":"ColorMatch","pos":[13380,160],"size":[210,102],"flags":{"collapsed":false},"order":22,"mode":0,"inputs":[{"name":"image_ref","localized_name":"image_ref","type":"IMAGE","link":3728},{"name":"image_target","localized_name":"image_target","type":"IMAGE","link":3724}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3682,3688],"slot_index":0}],"properties":{"Node name for S&R":"ColorMatch"},"widgets_values":["mkl",0]},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14758.255859375,-64.39308166503906],"size":[340.20001220703125,510],"flags":{},"order":29,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3698},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,4,"resample",true]},{"id":1393,"type":"ReSDPatcher","pos":[13246.306640625,-162.28057861328125],"size":[210,82],"flags":{},"order":14,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3742}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3741],"slot_index":0}],"properties":{"Node name for S&R":"ReSDPatcher"},"widgets_values":["float64",true]},{"id":1394,"type":"CheckpointLoaderSimple","pos":[12837.810546875,-94.67196655273438],"size":[375.491943359375,98],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[3742],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[3743],"slot_index":1},{"name":"VAE","localized_name":"VAE","type":"VAE","links":[3744],"slot_index":2}],"properties":{"Node name for S&R":"CheckpointLoaderSimple"},"widgets_values":["_SDXL_/zavychromaxl_v70.safetensors"]},{"id":1374,"type":"LoadImage","pos":[12805.896484375,167.56053161621094],"size":[315,314],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3725],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Style Guide)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image May 13, 2025, 09_18_45 AM.png","image"]},{"id":1333,"type":"CLIPTextEncode","pos":[13688.255859375,-44.393089294433594],"size":[280.6252746582031,164.06936645507812],"flags":{"collapsed":false},"order":20,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3581}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3602,3626],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["the inside of a car driving down a creepy road"]},{"id":1318,"type":"ClownGuide_Beta","pos":[13828.255859375,675.60693359375],"size":[263.102783203125,290],"flags":{},"order":25,"mode":4,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3710},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3699,3708],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.25,1,"constant",0,-1,false]},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[14418.0478515625,-325.06365966796875],"size":[265.2884826660156,202],"flags":{},"order":12,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,-1,"none",-1,4,false]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[14008.255859375,-64.39308166503906],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3740},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3602},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3708},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3578],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","beta57",20,14,1,4,201,"fixed","unsample",true]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[14378.255859375,-64.39308166503906],"size":[340.20001220703125,570],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3626},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3627},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3578},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3604},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":3707},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3698],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",1,4,"resample",true]}],"links":[[18,14,0,7,4,"VAE"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2983,7,0,907,3,"LATENT"],[3469,981,0,908,0,"LATENT"],[3533,1317,0,980,6,"OPTIONS"],[3578,907,0,980,4,"LATENT"],[3581,490,0,1333,0,"CLIP"],[3602,1333,0,907,1,"CONDITIONING"],[3604,1308,0,980,5,"GUIDES"],[3626,1333,0,980,1,"CONDITIONING"],[3627,970,0,980,2,"CONDITIONING"],[3682,1350,0,1362,0,"IMAGE"],[3688,1350,0,7,0,"IMAGE"],[3698,980,0,981,4,"LATENT"],[3699,1318,0,1308,3,"GUIDES"],[3707,1328,0,980,7,"OPTIONS"],[3708,1318,0,907,5,"GUIDES"],[3709,7,1,1308,0,"LATENT"],[3710,7,0,1318,0,"LATENT"],[3720,908,0,1377,0,"IMAGE"],[3721,1373,0,1378,0,"*"],[3724,1378,0,1350,1,"IMAGE"],[3725,1374,0,1379,0,"*"],[3726,1379,0,1371,0,"IMAGE"],[3727,1371,0,7,1,"IMAGE"],[3728,1371,0,1350,0,"IMAGE"],[3729,1378,0,1377,1,"IMAGE"],[3730,1380,0,1371,1,"INT"],[3731,1380,1,1371,2,"INT"],[3732,1380,0,7,5,"INT"],[3733,1380,1,7,6,"INT"],[3740,13,0,907,0,"MODEL"],[3741,1393,0,13,0,"*"],[3742,1394,0,1393,0,"MODEL"],[3743,1394,1,490,0,"*"],[3744,1394,2,14,0,"*"]],"groups":[{"id":1,"title":"Model Loaders","bounding":[12796.72265625,-401.9004211425781,822.762451171875,436.0693359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Sampling","bounding":[13652.6533203125,-402.70721435546875,1470.8076171875,1409.0289306640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Input Prep","bounding":[12797.1396484375,77.69412231445312,817.4218139648438,820.6239624023438],"color":"#3f789e","font_size":24,"flags":{}},{"id":4,"title":"Save and Compare","bounding":[15180.705078125,-399.09112548828125,1050.6468505859375,615.8845825195312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.486436280241595,"offset":[-10958.961513232216,457.651089011118]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/style transfer.json
================================================
{"last_node_id":1408,"last_link_id":3768,"nodes":[{"id":14,"type":"Reroute","pos":[13508.9013671875,-29.283178329467773],"size":[75,26],"flags":{},"order":22,"mode":0,"inputs":[{"name":"","type":"*","link":3737}],"outputs":[{"name":"","type":"VAE","links":[18,2696,3767],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[13508.9013671875,-69.28317260742188],"size":[75,26],"flags":{},"order":21,"mode":0,"inputs":[{"name":"","type":"*","link":3736}],"outputs":[{"name":"","type":"CLIP","links":[2881,3581],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":970,"type":"CLIPTextEncode","pos":[13688.255859375,165.60690307617188],"size":[281.9206848144531,109.87118530273438],"flags":{},"order":25,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3627],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, jpeg artifacts, low quality, bad quality, unsharp"]},{"id":1379,"type":"Reroute","pos":[13185.853515625,168.15780639648438],"size":[75,26],"flags":{},"order":23,"mode":0,"inputs":[{"name":"","type":"*","link":3747}],"outputs":[{"name":"","type":"IMAGE","links":[3726],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[15220,-259.5838928222656],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":39,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1380,"type":"SetImageSize","pos":[13324.7197265625,323.0480041503906],"size":[210,102],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[3730,3732],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[3731,3733],"slot_index":1}],"properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1344,768]},{"id":1377,"type":"Image Comparer (rgthree)","pos":[15742.4619140625,-253.3526153564453],"size":[461.9190368652344,413.5953369140625],"flags":{},"order":40,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":3720},{"name":"image_b","type":"IMAGE","dir":3,"link":3768}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_zdjno_00005_.png&type=temp&subfolder=&rand=0.40554525758657745"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_zdjno_00006_.png&type=temp&subfolder=&rand=0.28640062579003533"}]]},{"id":908,"type":"VAEDecode","pos":[15217.7802734375,-312.1965637207031],"size":[210,46],"flags":{"collapsed":true},"order":38,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697,3720],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1383,"type":"Note","pos":[14428.40234375,580.1749877929688],"size":[261.9539489746094,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Samplers like res_2s in this cycling node will also work and are faster. res_2m and res_3m are even faster, but sometimes the effect takes longer in wall time to fully kick in."],"color":"#432","bgcolor":"#653"},{"id":1384,"type":"Note","pos":[14793.0322265625,518.4120483398438],"size":[261.9539489746094,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_2m or res_3m can be used here instead and are faster, but are less likely to fully clean up lingering artifacts."],"color":"#432","bgcolor":"#653"},{"id":1328,"type":"ClownOptions_SDE_Beta","pos":[14186.4755859375,-132.6126251220703],"size":[315,266],"flags":{"collapsed":true},"order":3,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3707],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1381,"type":"Note","pos":[13881.6279296875,-217.62835693359375],"size":[261.9539489746094,88],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease \"steps_to_run\" in ClownsharKSampler to change the effective denoise level."],"color":"#432","bgcolor":"#653"},{"id":1385,"type":"Note","pos":[14429.50390625,729.0418701171875],"size":[261.9539489746094,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["method = AdaIN is faster and uses less memory, but is less accurate. Some prefer the effect."],"color":"#432","bgcolor":"#653"},{"id":1386,"type":"ClownModelLoader","pos":[12855.7509765625,-269.1963806152344],"size":[335.2314453125,266],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3734],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[3736],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[3737],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["sd3.5_medium.safetensors","default","clip_g_sd35.safetensors","clip_l_sd35.safetensors","t5xxl_fp16.safetensors",".none","sd3","sd35_vae.safetensors"]},{"id":1378,"type":"Reroute","pos":[13184.07421875,533.128662109375],"size":[75,26],"flags":{},"order":24,"mode":0,"inputs":[{"name":"","type":"*","link":3751}],"outputs":[{"name":"","type":"IMAGE","links":[3742],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1333,"type":"CLIPTextEncode","pos":[13688.255859375,-44.393089294433594],"size":[280.6252746582031,164.06936645507812],"flags":{"collapsed":false},"order":26,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3581}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3602,3626],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["evil blacklight mountains by a frozen lake at night at night, wild dangerous looking illustration ,dark pop art style, glowing inverted blackness, nothing"]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[14378.255859375,-64.39308166503906],"size":[340.20001220703125,570],"flags":{},"order":36,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3626},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3627},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3578},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3763},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":3707},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3698],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_5s",1,7,"resample",true]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[14008.255859375,-64.39308166503906],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":35,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3765},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3602},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3708},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3578],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","beta57",20,14,1,1,202,"fixed","unsample",true]},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14758.255859375,-64.39308166503906],"size":[340.20001220703125,510],"flags":{},"order":37,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3698},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_5s",-1,7,"resample",true]},{"id":1373,"type":"LoadImage","pos":[12835.318359375,168.2541046142578],"size":[315,314],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3747],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Composition)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00492_.png","image"]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[13212.6708984375,-154.3930206298828],"size":[260.3999938964844,126],"flags":{},"order":31,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3735},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3764],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":13,"type":"Reroute","pos":[13508.9013671875,-109.2831802368164],"size":[75,26],"flags":{},"order":33,"mode":0,"inputs":[{"name":"","type":"*","link":3764}],"outputs":[{"name":"","type":"MODEL","links":[3765],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1387,"type":"ReSD35Patcher","pos":[13242.98046875,-303.1613464355469],"size":[210,82],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3734}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3735],"slot_index":0}],"properties":{"Node name for S&R":"ReSD35Patcher"},"widgets_values":["float64",true]},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[14122.4169921875,684.2660522460938],"size":[246.31312561035156,286],"flags":{},"order":32,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3709},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3740}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3762],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","scattersort",1,1,"constant",0,-1,false]},{"id":1389,"type":"ClownGuide_Style_TileSize","pos":[14761.21484375,704.8385009765625],"size":[223.3114471435547,106],"flags":{},"order":34,"mode":0,"inputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3762}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3763],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_TileSize"},"widgets_values":[256,192,64]},{"id":1400,"type":"Note","pos":[14773.240234375,866.2615966796875],"size":[298.4509582519531,104.02301025390625],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Your image dimensions need to be neatly divisible by these tile dimensions or you will get an error. This node currently will only have an effect with \"scattersort\". It will cause the image to follow your style reference's composition as well."],"color":"#432","bgcolor":"#653"},{"id":1376,"type":"Note","pos":[13703.93359375,509.9842529296875],"size":[261.9539489746094,88],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease weight in ClownGuide to alter adherence to the input image."],"color":"#432","bgcolor":"#653"},{"id":1318,"type":"ClownGuide_Beta","pos":[13823.8046875,679.1676025390625],"size":[263.102783203125,290],"flags":{},"order":29,"mode":4,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3710},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3708,3740],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.5,1,"constant",0,-1,false]},{"id":1401,"type":"Note","pos":[13818.6318359375,1056.417724609375],"size":[271.7456970214844,88],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This bypassed node can improve adherence to the composition, but the tradeoff is less movement with the style."],"color":"#432","bgcolor":"#653"},{"id":1402,"type":"Note","pos":[14120.05859375,1058.747314453125],"size":[271.7456970214844,88],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["WCT is slower, but also an excellent style mode."],"color":"#432","bgcolor":"#653"},{"id":1390,"type":"LoadImage","pos":[12836.228515625,550.88427734375],"size":[315,314],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3751],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Style Guide)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["6a985aaa-8a95-4382-97a9-91cdf96f43d3-Moraine_Lake_Dennis_Frates_Alamy_Stock_Photo.jpg","image"]},{"id":1403,"type":"Note","pos":[12890.732421875,-557.807373046875],"size":[271.7456970214844,88],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["If you wish to use another model, just load it in the ClownModelLoader (which is an efficiency node) or via your usual loader nodes. There is a Flux loader specifically for loading Redux as well. "],"color":"#432","bgcolor":"#653"},{"id":1405,"type":"Note","pos":[12480.912109375,-186.05596923828125],"size":[271.7456970214844,88],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["If you load the wrong clip, you may get some very strange errors from ComfyUI about an \"attn_mask\" etc."],"color":"#432","bgcolor":"#653"},{"id":1404,"type":"Note","pos":[13214.4140625,-591.9750366210938],"size":[561.9423828125,149.42193603515625],"flags":{},"order":15,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["You will need to use the appropriate patcher node to use other models.\n\nSD1.5, SDXL: ReSDPatcher\nStable Cascade: natively supported by https://github.com/ClownsharkBatwing/UltraCascade\nSD3.5: ReSD3.5Patcher\nFlux: ReFluxPatcher\nHiDream: ReHiDreamPatcher\nAuraFlow: ReAuraPatcher\nWAN: ReWanPatcher\nLTXV: ReLTXVPatcher"],"color":"#432","bgcolor":"#653"},{"id":1406,"type":"Note","pos":[14420.2861328125,-528.9069213867188],"size":[261.9539489746094,88],"flags":{},"order":16,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_5s is a very high quality sampler that can really help SD3.5M become a much more coherent model. It is slow, however. Try res_2s or even res_2m if you want more speed."],"color":"#322","bgcolor":"#533"},{"id":1371,"type":"Image Repeat Tile To Size","pos":[13345.26171875,497.8262939453125],"size":[210,146],"flags":{"collapsed":true},"order":27,"mode":4,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3726},{"name":"width","type":"INT","pos":[10,36],"widget":{"name":"width"},"link":3730},{"name":"height","type":"INT","pos":[10,60],"widget":{"name":"height"},"link":3731}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3727],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":1407,"type":"Note","pos":[13314.3076171875,171.45277404785156],"size":[271.7456970214844,88],"flags":{},"order":17,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Enable the bypassed ImageRepeatToTile node if you're using Flux and getting blurry outputs."],"color":"#432","bgcolor":"#653"},{"id":1382,"type":"Note","pos":[14718.0498046875,-295.4144592285156],"size":[288.7483215332031,156.81048583984375],"flags":{},"order":18,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increasing cycles will increase the amount of change, but take longer.\n\nCycles will rerun the same step over and over, forwards and backwards, iteratively refining an image at a controlled noise level.\n\nTry reducing cycles if you want to stay very close to the original composition."],"color":"#432","bgcolor":"#653"},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[14418.048828125,-327.3294982910156],"size":[265.2884826660156,202],"flags":{},"order":19,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,"none",-1,7,true]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13343.19140625,556.8784790039062],"size":[261.2217712402344,298],"flags":{"collapsed":false},"order":28,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3742},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3727},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18},{"name":"width","type":"INT","pos":[10,160],"widget":{"name":"width"},"link":3732},{"name":"height","type":"INT","pos":[10,184],"widget":{"name":"height"},"link":3733}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983,3710,3766],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3709],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1408,"type":"VAEDecode","pos":[15377.6826171875,-315.0729064941406],"size":[210,46],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3766},{"name":"vae","localized_name":"vae","type":"VAE","link":3767}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3768],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"}}],"links":[[18,14,0,7,4,"VAE"],[1398,7,3,431,1,"LATENT"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2983,7,0,907,3,"LATENT"],[3469,981,0,908,0,"LATENT"],[3533,1317,0,980,6,"OPTIONS"],[3578,907,0,980,4,"LATENT"],[3581,490,0,1333,0,"CLIP"],[3602,1333,0,907,1,"CONDITIONING"],[3626,1333,0,980,1,"CONDITIONING"],[3627,970,0,980,2,"CONDITIONING"],[3698,980,0,981,4,"LATENT"],[3707,1328,0,980,7,"OPTIONS"],[3708,1318,0,907,5,"GUIDES"],[3709,7,1,1308,0,"LATENT"],[3710,7,0,1318,0,"LATENT"],[3720,908,0,1377,0,"IMAGE"],[3726,1379,0,1371,0,"IMAGE"],[3727,1371,0,7,1,"IMAGE"],[3730,1380,0,1371,1,"INT"],[3731,1380,1,1371,2,"INT"],[3732,1380,0,7,5,"INT"],[3733,1380,1,7,6,"INT"],[3734,1386,0,1387,0,"MODEL"],[3735,1387,0,431,0,"MODEL"],[3736,1386,1,490,0,"*"],[3737,1386,2,14,0,"*"],[3740,1318,0,1308,3,"GUIDES"],[3742,1378,0,7,0,"IMAGE"],[3747,1373,0,1379,0,"*"],[3751,1390,0,1378,0,"*"],[3762,1308,0,1389,0,"GUIDES"],[3763,1389,0,980,5,"GUIDES"],[3764,431,0,13,0,"*"],[3765,13,0,907,0,"MODEL"],[3766,7,0,1408,0,"LATENT"],[3767,14,0,1408,1,"VAE"],[3768,1408,0,1377,1,"IMAGE"]],"groups":[{"id":1,"title":"Model Loaders","bounding":[12796.72265625,-401.9004211425781,822.762451171875,436.0693359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Sampling","bounding":[13652.6533203125,-402.70721435546875,1470.8076171875,1409.0289306640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Input Prep","bounding":[12797.1396484375,77.69412231445312,817.4218139648438,820.6239624023438],"color":"#3f789e","font_size":24,"flags":{}},{"id":4,"title":"Save and Compare","bounding":[15180.705078125,-399.09112548828125,1050.6468505859375,615.8845825195312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.188365497732567,"offset":[-11346.93636409885,735.4056846100609]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/ultracascade txt2img style transfer.json
================================================
{"last_node_id":43,"last_link_id":52,"nodes":[{"id":1,"type":"VAEDecode","pos":[2240,3610],"size":[210,46],"flags":{"collapsed":false},"order":37,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1},{"name":"vae","localized_name":"vae","type":"VAE","link":2,"slot_index":1}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","shape":3,"links":[5],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":2,"type":"LoraLoader","pos":[-24.50164031982422,3718.225341796875],"size":[359.7619323730469,126],"flags":{},"order":18,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3},{"name":"clip","localized_name":"clip","type":"CLIP","link":4}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[7],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[6,8],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["csbw_cascade_dark_ema.safetensors",1,1]},{"id":4,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[1890,4480],"size":[310.79998779296875,82],"flags":{},"order":0,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[22],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[1536,1536]},{"id":5,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[797.6149291992188,4484.87158203125],"size":[310.79998779296875,82],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[12],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[24,24]},{"id":6,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[1157.109375,4484.87158203125],"size":[310.79998779296875,82],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[17],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[36,36]},{"id":8,"type":"VAELoader","pos":[1900,3600],"size":[294.6280212402344,58],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[2,51],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["stage_a_ft_hq.safetensors"]},{"id":10,"type":"UltraCascade_Loader","pos":[-394.08612060546875,3670.32373046875],"size":[345.5117492675781,82.95540618896484],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","shape":3,"links":[3],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_Loader"},"widgets_values":["stage_c_bf16.safetensors","ultrapixel_t2i.safetensors"]},{"id":13,"type":"CLIPTextEncode","pos":[355.95135498046875,3972.858154296875],"size":[356.2470703125,110.6326904296875],"flags":{},"order":25,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":8}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[11],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, bad quality, low detail, blurry, unsharp"]},{"id":9,"type":"CLIPLoader","pos":[-394.50164794921875,3810.115478515625],"size":[344.635498046875,98],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[4],"slot_index":0}],"properties":{"Node name for S&R":"CLIPLoader"},"widgets_values":["cascade_text_encoder.safetensors","stable_cascade","default"]},{"id":20,"type":"VAELoader","pos":[-376.8145751953125,3973.57080078125],"size":[315,58],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[24,25]}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["effnet_encoder.safetensors"]},{"id":22,"type":"UltraCascade_StageC_VAEEncode_Exact","pos":[-140,4520],"size":[302.3999938964844,102],"flags":{},"order":21,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":34},{"name":"vae","localized_name":"vae","type":"VAE","link":25}],"outputs":[{"name":"stage_c","localized_name":"stage_c","type":"LATENT","links":[31,32],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_StageC_VAEEncode_Exact"},"widgets_values":[36,36]},{"id":19,"type":"UltraCascade_StageC_VAEEncode_Exact","pos":[-140,4160],"size":[302.3999938964844,102],"flags":{},"order":20,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":33},{"name":"vae","localized_name":"vae","type":"VAE","link":24}],"outputs":[{"name":"stage_c","localized_name":"stage_c","type":"LATENT","links":[27,28],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_StageC_VAEEncode_Exact"},"widgets_values":[24,24]},{"id":17,"type":"ClownGuide_Style_Beta","pos":[190,4160],"size":[244.26441955566406,286],"flags":{},"order":26,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":27},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[23],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":18,"type":"ClownGuide_Style_Beta","pos":[470,4160],"size":[244.26441955566406,286],"flags":{},"order":29,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":28},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":23}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[29],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["negative","WCT",1,1,"constant",0,-1,false]},{"id":12,"type":"UltraCascade_PerturbedAttentionGuidance","pos":[361.78070068359375,3621.58740234375],"size":[344.3999938964844,58],"flags":{},"order":23,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":7}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[9],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_PerturbedAttentionGuidance"},"widgets_values":[3]},{"id":3,"type":"SaveImage","pos":[2240,3720],"size":[753.4503784179688,734.7869262695312],"flags":{},"order":38,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":5}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":27,"type":"ClownOptions_Cycles_Beta","pos":[1158.6995849609375,3539.621337890625],"size":[315,130],"flags":{},"order":7,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,0.5,5.5]},{"id":21,"type":"ClownGuide_Style_Beta","pos":[190,4520],"size":[244.26441955566406,286],"flags":{},"order":27,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":32},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[26],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,20,false]},{"id":11,"type":"CLIPTextEncode","pos":[359.33685302734375,3742.75537109375],"size":[351.592529296875,173.00360107421875],"flags":{},"order":24,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":6}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[10],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["impasto oil painting by Yayoi Kusama and Lisa Frank, thick paint textures, tunning contrasts at night with stylish roughly drawn thick black lines, a nuclear explosion destroying a city, its towering wide glowing nuclear mushroom cloud enveloping the entire skyline, the nuclear fireball lighting up the dark sky"]},{"id":7,"type":"UNETLoader","pos":[1520,3580],"size":[356.544677734375,82],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[40],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader"},"widgets_values":["stage_b_lite_CSBW_v1.1.safetensors","default"]},{"id":31,"type":"UltraCascade_StageB_Patcher","pos":[1901.8192138671875,3508.625244140625],"size":[235.1999969482422,26],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":40}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[41],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_StageB_Patcher"},"widgets_values":[]},{"id":15,"type":"ClownsharKSampler_Beta","pos":[1155.5926513671875,3724.48974609375],"size":[314.421142578125,693.9824829101562],"flags":{},"order":34,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":16},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":30},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":17},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[35],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,1,1,5.5,100,"fixed","standard",true]},{"id":26,"type":"ClownsharkChainsampler_Beta","pos":[1520.32470703125,3723.215087890625],"size":[315,510],"flags":{},"order":36,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":35},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":37},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_3s",-1,5.5,"resample",true]},{"id":14,"type":"ClownsharKSampler_Beta","pos":[796.9224243164062,3725.34375],"size":[311.41375732421875,693.9824829101562],"flags":{},"order":32,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":9},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":10},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":11},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":29},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":12},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[16,43],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,1,"fixed","standard",true]},{"id":23,"type":"ClownGuide_Style_Beta","pos":[470,4520],"size":[244.26441955566406,286],"flags":{},"order":30,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":31},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":26}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[30,37],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["negative","WCT",1,1,"constant",0,20,false]},{"id":24,"type":"LoadImage","pos":[-497.6204833984375,4160.34375],"size":[315,314],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[33,34,49],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image May 13, 2025, 09_38_14 AM.png","image"]},{"id":16,"type":"ClownsharKSampler_Beta","pos":[1890,3720],"size":[309.2452087402344,691.814208984375],"flags":{},"order":35,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":41},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":43},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":52},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":22},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,1,-1,"fixed","standard",true]},{"id":38,"type":"Note","pos":[-398.6913757324219,3401.711669921875],"size":[336.9422302246094,88],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Check out the \"ultracascade txt2img\" workflow for non-style related explanations of this workflow."],"color":"#432","bgcolor":"#653"},{"id":39,"type":"Note","pos":[-515.8250732421875,4543.421875],"size":[342.7132263183594,118.7740249633789],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This image serves as a style/color palette reference.\n\nInclude something about the style in the prompt (painting, illustration, pen drawing, etc.) or use ClipVision (which is very good with Cascade) if you wish to ensure that more than just the color palette is transferred."],"color":"#432","bgcolor":"#653"},{"id":32,"type":"ClownGuide_Style_Beta","pos":[2040,4710],"size":[236.5709686279297,286],"flags":{},"order":33,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":47},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":45}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[52],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["negative","WCT",1,1,"constant",0,-1,false]},{"id":33,"type":"ClownGuide_Style_Beta","pos":[1775.3868408203125,4709.03857421875],"size":[238.49423217773438,286],"flags":{},"order":31,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":48},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[45],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":34,"type":"VAEEncode","pos":[1598.26904296875,4709.12841796875],"size":[140,46],"flags":{},"order":28,"mode":0,"inputs":[{"name":"pixels","localized_name":"pixels","type":"IMAGE","link":50},{"name":"vae","localized_name":"vae","type":"VAE","link":51}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[47,48],"slot_index":0}],"properties":{"Node name for S&R":"VAEEncode"},"widgets_values":[]},{"id":35,"type":"ImageResize+","pos":[1359.3343505859375,4709.12890625],"size":[210,218],"flags":{},"order":22,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":49}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[50],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[1536,1536,"lanczos","stretch","always",0]},{"id":40,"type":"Note","pos":[778.7919921875,4684.98095703125],"size":[342.7132263183594,118.7740249633789],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Set end_step to -1 (which means \"infinity\", \"run until the end\") or 10000, etc. if you wish to use the style guide for all steps. Sometimes this can cause a bit of a CFG burned look, so mileage may vary. "],"color":"#432","bgcolor":"#653"},{"id":36,"type":"Note","pos":[1889.730712890625,3353.24462890625],"size":[314.823486328125,88],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This patcher is only needed if you wish to use the style guide with stage B. It'll improve adherence to the colors in the style guide."],"color":"#432","bgcolor":"#653"},{"id":37,"type":"Note","pos":[1153.2738037109375,3351.5126953125],"size":[410.0306701660156,88],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Connect ClownOptions Cycles to the node below to increase the effect even more. It will cause it to rerun the single step this node is set to run (steps_to_run == 1), by unsampling, sampling, unsampling, sampling, etc. in a loop."],"color":"#432","bgcolor":"#653"},{"id":42,"type":"SharkOptions_Beta","pos":[478.9419860839844,3353.24462890625],"size":[230.37158203125,130],"flags":{},"order":15,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"SharkOptions_Beta"},"widgets_values":["perlin",1,1,false]},{"id":43,"type":"Note","pos":[97.72860717773438,3353.8193359375],"size":[336.9422302246094,88],"flags":{},"order":17,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["TIP: Try connecting the options nodes to the right to some of the samplers. It'll replace the default noise types with perlin, which can be quite good with Cascade."],"color":"#432","bgcolor":"#653"},{"id":41,"type":"ClownOptions_SDE_Beta","pos":[801.105712890625,3352.283203125],"size":[301.5363464355469,266],"flags":{},"order":16,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["perlin","perlin","hard","hard",0.5,0.5,-1,"fixed"]}],"links":[[1,16,0,1,0,"LATENT"],[2,8,0,1,1,"VAE"],[3,10,0,2,0,"MODEL"],[4,9,0,2,1,"CLIP"],[5,1,0,3,0,"IMAGE"],[6,2,1,11,0,"CLIP"],[7,2,0,12,0,"MODEL"],[8,2,1,13,0,"CLIP"],[9,12,0,14,0,"MODEL"],[10,11,0,14,1,"CONDITIONING"],[11,13,0,14,2,"CONDITIONING"],[12,5,0,14,6,"OPTIONS"],[16,14,0,15,3,"LATENT"],[17,6,0,15,6,"OPTIONS"],[22,4,0,16,6,"OPTIONS"],[23,17,0,18,3,"GUIDES"],[24,20,0,19,1,"VAE"],[25,20,0,22,1,"VAE"],[26,21,0,23,3,"GUIDES"],[27,19,0,17,0,"LATENT"],[28,19,0,18,0,"LATENT"],[29,18,0,14,5,"GUIDES"],[30,23,0,15,5,"GUIDES"],[31,22,0,23,0,"LATENT"],[32,22,0,21,0,"LATENT"],[33,24,0,19,0,"IMAGE"],[34,24,0,22,0,"IMAGE"],[35,15,0,26,4,"LATENT"],[37,23,0,26,5,"GUIDES"],[40,7,0,31,0,"MODEL"],[41,31,0,16,0,"MODEL"],[43,14,0,16,3,"LATENT"],[45,33,0,32,3,"GUIDES"],[47,34,0,32,0,"LATENT"],[48,34,0,33,0,"LATENT"],[49,24,0,35,0,"IMAGE"],[50,35,0,34,0,"IMAGE"],[51,8,0,34,1,"VAE"],[52,32,0,16,5,"GUIDES"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.2100000000000006,"offset":[2416.6858398230765,-3132.1930084977703]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/ultracascade txt2img.json
================================================
{"last_node_id":33,"last_link_id":23,"nodes":[{"id":1,"type":"VAEDecode","pos":[1867.32421875,3610.962158203125],"size":[210,46],"flags":{"collapsed":false},"order":29,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1},{"name":"vae","localized_name":"vae","type":"VAE","link":2,"slot_index":1}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","shape":3,"links":[5],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":2,"type":"LoraLoader","pos":[-24.50164031982422,3718.225341796875],"size":[359.7619323730469,126],"flags":{},"order":22,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3},{"name":"clip","localized_name":"clip","type":"CLIP","link":4}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[7],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[6,8],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["csbw_cascade_dark_ema.safetensors",1,1]},{"id":4,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[1522.302734375,4481.47900390625],"size":[310.79998779296875,82],"flags":{},"order":0,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[22],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[1536,1536]},{"id":5,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[797.6149291992188,4484.87158203125],"size":[310.79998779296875,82],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[12],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[24,24]},{"id":6,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[1157.109375,4484.87158203125],"size":[310.79998779296875,82],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[17],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[36,36]},{"id":7,"type":"UNETLoader","pos":[1149.8580322265625,3582.3779296875],"size":[356.544677734375,82],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[18],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader"},"widgets_values":["stage_b_lite_CSBW_v1.1.safetensors","default"]},{"id":8,"type":"VAELoader","pos":[1533.0584716796875,3605.814697265625],"size":[294.6280212402344,58],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[2],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["stage_a_ft_hq.safetensors"]},{"id":10,"type":"UltraCascade_Loader","pos":[-394.08612060546875,3670.32373046875],"size":[345.5117492675781,82.95540618896484],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","shape":3,"links":[3],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_Loader"},"widgets_values":["stage_c_bf16.safetensors","ultrapixel_t2i.safetensors"]},{"id":11,"type":"CLIPTextEncode","pos":[359.33685302734375,3742.75537109375],"size":[351.592529296875,173.00360107421875],"flags":{},"order":24,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":6}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[10,14,19],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["impasto oil painting by Yayoi Kusama and Lisa Frank, thick paint textures, tunning contrasts at night with stylish roughly drawn thick black lines, a nuclear explosion destroying a city, its towering wide glowing nuclear mushroom cloud enveloping the entire skyline, the nuclear fireball lighting up the dark sky"]},{"id":12,"type":"UltraCascade_PerturbedAttentionGuidance","pos":[361.78070068359375,3621.58740234375],"size":[344.3999938964844,58],"flags":{},"order":23,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":7}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[9,13],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_PerturbedAttentionGuidance"},"widgets_values":[3]},{"id":13,"type":"CLIPTextEncode","pos":[355.95135498046875,3972.858154296875],"size":[356.2470703125,110.6326904296875],"flags":{},"order":25,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":8}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[11,15,20],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, bad quality, low detail, blurry, unsharp"]},{"id":14,"type":"ClownsharKSampler_Beta","pos":[796.9224243164062,3725.34375],"size":[311.41375732421875,693.9824829101562],"flags":{},"order":26,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":9},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":10},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":11},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":12},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[16],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,1,"fixed","standard",true]},{"id":16,"type":"ClownsharKSampler_Beta","pos":[1522.29052734375,3722.670654296875],"size":[309.2452087402344,691.814208984375],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":18},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":19},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":20},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":21},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":22},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,1,-1,"fixed","standard",true]},{"id":9,"type":"CLIPLoader","pos":[-394.50164794921875,3810.115478515625],"size":[344.635498046875,98],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[4],"slot_index":0}],"properties":{"Node name for S&R":"CLIPLoader"},"widgets_values":["cascade_text_encoder.safetensors","stable_cascade","default"]},{"id":15,"type":"ClownsharKSampler_Beta","pos":[1155.5926513671875,3724.48974609375],"size":[314.421142578125,693.9824829101562],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":13},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":14},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":15},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":16},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":17},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[21],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,100,"fixed","standard",true]},{"id":20,"type":"Note","pos":[1150,4640],"size":[331.63720703125,415.29815673828125],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage UP: a patched version of Stable Cascade stage C (\"UltraPixel\"). \n\nThe key with these dimensions is to keep the aspect ratio the same as the stage C latent. Typically, best results are with a 1.5x upscale. 2.0x works, but will result in somewhat more issues with doubling, and can be a lot slower. However, the detail level will also be very high.\n\nSome viable resolutions are listed below. Asterisks signify ones that have been verified to work particularly well.\n\n32x32\n36x36 **\n40x40\n42x42\n48x48 *\n\n40x24\n50x30\n60x36 **\n70x42\n80x48 *\n\n72x36 \n80x40 *\n96x48 (very slow!)\n\n\n\n"],"color":"#432","bgcolor":"#653"},{"id":21,"type":"Note","pos":[1520,4640],"size":[331.63720703125,415.29815673828125],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage B: the Stable Cascade superresolution model.\n\nAs with stage UP, the key with these dimensions is to keep the aspect ratio the same as the prior latents. Theoretically, any resolution may be used, though some odd distortions can occur when the ideal upscale ratio is not used. It's not entirely clear what those ratios are, so some experimentation may be necessary. \n\nSome resolutions that work particularly well are:\n\n1536x1536 *\n2048x2048 *\n\n1600x960\n2560x1536 **\n2880x1792 *\n3200x1920\n\nIf you use stage B lite, you can hit 4k resolutions without even using more than 12GB of VRAM.\n\nIt's highly recommended to use the CSBW finetune of stage B, as it fixes many of the severe artifact problems the original release had.\n\nNote: CFG is not needed for this stage!"],"color":"#432","bgcolor":"#653"},{"id":19,"type":"Note","pos":[780,4640],"size":[331.63720703125,415.29815673828125],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage C: the original Stable Cascade version. \n\nStable Cascade latents are actually quite small: typically, a 1024x1024 image will be generated from a stage C latent that is only 24x24 (for comparison, with SDXL or SD1.5, the dimensions are 128x128). \n\n\"Compression\" is just a shorthand method of determining these dimensions, such as 24x24 (1024 / 42 = 24.38, which means a \"compression\" of 42).\n\nThis poses a problem though: Cascade was only trained on a handful of resolutions. The difference between 24x24 and 25x25 is a significant drop in quality and coherence. Therefore, it is best to just set these dimensions directly.\n\nThe best trained resolutions are:\n\n24x24 > 32x32\n30x16 > 40x24 \n\n48x24 also works, but seems to result in more doubling problems than the others.\n\n\n"],"color":"#432","bgcolor":"#653"},{"id":23,"type":"Note","pos":[-1140,3810],"size":[715.61083984375,89.37511444091797],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Any clip G will do. The Cascade version is available at:\n\nhttps://huggingface.co/stabilityai/stable-cascade/blob/main/text_encoder/model.bf16.safetensors\n\n"],"color":"#432","bgcolor":"#653"},{"id":22,"type":"Note","pos":[-1140,3590],"size":[717.709228515625,165.61032104492188],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I recommend the BF16 version of stage C. There is no visible difference vs. the full precision weights, and it halves the disk space requirements.\n\nhttps://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_bf16.safetensors\n\nIMPORTANT: The original UltraPixel \"safetensors\" is not a safetensors at all - it is a PICKLE, where they lazily (at best) changed the file extension to \".safetensors\"!\n\nI converted it to a real safetensors file, and it's available below:\n\nhttps://huggingface.co/ClownsharkBatwing/ultrapixel_convert/blob/main/ultrapixel_t2i.safetensors"],"color":"#432","bgcolor":"#653"},{"id":26,"type":"Note","pos":[570,3250],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is a checkpoint that, for convenience, includes the stage B lite CSBW finetune, clip G, and stage A (the FT_HQ finetune).\n\nhttps://huggingface.co/ClownsharkBatwing/CSBW_Style/blob/main/cascade_B-lite_refined_CSBW_v1.1.safetensors"],"color":"#432","bgcolor":"#653"},{"id":27,"type":"Note","pos":[1050,3420],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is the stage B lite CSBW finetune (model only).\n\nhttps://huggingface.co/ClownsharkBatwing/Cascade_Stage_B_CSBW_Refined/blob/main/stage_b_lite_CSBW_v1.1.safetensors"],"color":"#432","bgcolor":"#653"},{"id":25,"type":"Note","pos":[305.43292236328125,3455.5634765625],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Perturbed attention guidance (PAG) makes an enormous difference with Stable Cascade stages C and UP. Like CFG, it will double the runtime."],"color":"#432","bgcolor":"#653"},{"id":29,"type":"Note","pos":[1534.365478515625,3422.38427734375],"size":[547.0546875,91.47331237792969],"flags":{},"order":15,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is a finetune of stage A. You will get a sharper image, but in images with large white areas, small circular grey halos are sometimes visible.\n\nhttps://huggingface.co/madebyollin/stage-a-ft-hq/blob/main/stage_a_ft_hq.safetensors"],"color":"#432","bgcolor":"#653"},{"id":28,"type":"CheckpointLoaderSimple","pos":[1054.370849609375,3250],"size":[452.7829895019531,102.89583587646484],"flags":{},"order":16,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":null},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":null},{"name":"VAE","localized_name":"VAE","type":"VAE","links":null}],"properties":{"Node name for S&R":"CheckpointLoaderSimple"},"widgets_values":["cascade_B-lite_refined_CSBW_v1.1.safetensors"]},{"id":24,"type":"Note","pos":[-1140,3960],"size":[715.61083984375,113.57872772216797],"flags":{},"order":17,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The LORA was trained with OneTrainer (https://github.com/Nerogar/OneTrainer) on some of my own SDXL generations. It has deep colors and is strong with wacky paint, illustration, and vector art styles. \n\nCascade learns extremely quickly and is very adept with artistic styles (it knows many artist names).\n\nhttps://huggingface.co/ClownsharkBatwing/CSBW_Style/blob/main/csbw_cascade_dark_ema.safetensors\n"],"color":"#432","bgcolor":"#653"},{"id":30,"type":"Note","pos":[796.0823364257812,3575.965576171875],"size":[315.20135498046875,88],"flags":{},"order":18,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_3s can be replaced with res_2s or even res_2m or res_3m (in the multistep folder in the sampler_name dropdown) if more speed is desired."],"color":"#432","bgcolor":"#653"},{"id":33,"type":"Note","pos":[-220,4190],"size":[336.9422302246094,88],"flags":{},"order":19,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["TIP: Try connecting the options nodes to the right to some of the samplers. It'll replace the default noise types with perlin, which can be quite good with Cascade."],"color":"#432","bgcolor":"#653"},{"id":31,"type":"SharkOptions_Beta","pos":[150,4190],"size":[234.2189178466797,130],"flags":{},"order":20,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"SharkOptions_Beta"},"widgets_values":["perlin",1,1,false]},{"id":32,"type":"ClownOptions_SDE_Beta","pos":[420,4190],"size":[281.34088134765625,266],"flags":{},"order":21,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["perlin","perlin","hard","hard",0.5,0.5,-1,"fixed"]},{"id":3,"type":"SaveImage","pos":[1871.823974609375,3716.926025390625],"size":[670.7464599609375,700.1661987304688],"flags":{},"order":30,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":5}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]}],"links":[[1,16,0,1,0,"LATENT"],[2,8,0,1,1,"VAE"],[3,10,0,2,0,"MODEL"],[4,9,0,2,1,"CLIP"],[5,1,0,3,0,"IMAGE"],[6,2,1,11,0,"CLIP"],[7,2,0,12,0,"MODEL"],[8,2,1,13,0,"CLIP"],[9,12,0,14,0,"MODEL"],[10,11,0,14,1,"CONDITIONING"],[11,13,0,14,2,"CONDITIONING"],[12,5,0,14,6,"OPTIONS"],[13,12,0,15,0,"MODEL"],[14,11,0,15,1,"CONDITIONING"],[15,13,0,15,2,"CONDITIONING"],[16,14,0,15,3,"LATENT"],[17,6,0,15,6,"OPTIONS"],[18,7,0,16,0,"MODEL"],[19,11,0,16,1,"CONDITIONING"],[20,13,0,16,2,"CONDITIONING"],[21,15,0,16,3,"LATENT"],[22,4,0,16,6,"OPTIONS"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.2100000000000006,"offset":[2786.903339088035,-3170.107825364122]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/wan img2vid 720p (fp8 fast).json
================================================
{"last_node_id":67,"last_link_id":138,"nodes":[{"id":56,"type":"PreviewImage","pos":[480,600],"size":[210,246],"flags":{},"order":8,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":118}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":8,"type":"VAEDecode","pos":[1140,80],"size":[210,46],"flags":{},"order":12,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":121},{"name":"vae","localized_name":"vae","type":"VAE","link":137}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[56],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":6,"type":"CLIPTextEncode","pos":[30,20],"size":[422.84503173828125,164.31304931640625],"flags":{},"order":6,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":134}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[97],"slot_index":0}],"title":"CLIP Text Encode (Positive Prompt)","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["trump and putin kissing, two men in love making out"],"color":"#232","bgcolor":"#353"},{"id":61,"type":"LoadImage","pos":[-169.0706024169922,588.6607666015625],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[128],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (371).png","image"]},{"id":55,"type":"ImageResize+","pos":[190.57818603515625,590.173583984375],"size":[251.91366577148438,218],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":128}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[118,119,120],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[1280,720,"nearest","fill / crop","always",0]},{"id":51,"type":"CLIPVisionEncode","pos":[191.15573120117188,457.861572265625],"size":[253.60000610351562,78],"flags":{},"order":9,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":94},{"name":"image","localized_name":"image","type":"IMAGE","link":120}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[107],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["none"]},{"id":7,"type":"CLIPTextEncode","pos":[29.393102645874023,230.72264099121094],"size":[425.27801513671875,180.6060791015625],"flags":{},"order":7,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":135}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[98],"slot_index":0}],"title":"CLIP Text Encode (Negative Prompt)","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["Overexposure, static, blurred details, subtitles, paintings, pictures, still, overall gray, worst quality, low quality, JPEG compression residue, ugly, mutilated, redundant fingers, poorly painted hands, poorly painted faces, deformed, disfigured, deformed limbs, fused fingers, cluttered background, three legs, a lot of people in the background, upside down"],"color":"#322","bgcolor":"#533"},{"id":49,"type":"CLIPVisionLoader","pos":[-169.1327362060547,459.3064880371094],"size":[315,58],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"CLIP_VISION","localized_name":"CLIP_VISION","type":"CLIP_VISION","links":[94],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionLoader"},"widgets_values":["clip_vision_vit_h.safetensors"]},{"id":66,"type":"ClownModelLoader","pos":[-330.852294921875,28.57785415649414],"size":[315,266],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[138],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[134,135],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[136,137],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors","fp8_e4m3fn_fast","umt5_xxl_fp8_e4m3fn_scaled.safetensors",".none",".none",".none","wan","wan_2.1_vae.safetensors"]},{"id":54,"type":"ClownsharKSampler_Beta","pos":[780,190],"size":[337.16485595703125,661.9249267578125],"flags":{},"order":11,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":114},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":115},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":113},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[121],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_2m","beta57",30,-1,1,5.5,0,"fixed","standard",true]},{"id":65,"type":"TorchCompileModels","pos":[479.10052490234375,-32.837005615234375],"size":[273.09326171875,178],"flags":{},"order":5,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":138}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[],"slot_index":0}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":50,"type":"WanImageToVideo","pos":[478.8801574707031,204.63995361328125],"size":[269.6244201660156,210],"flags":{},"order":10,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":97},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":98},{"name":"vae","localized_name":"vae","type":"VAE","link":136},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","shape":7,"link":107},{"name":"start_image","localized_name":"start_image","type":"IMAGE","shape":7,"link":119}],"outputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","links":[114],"slot_index":0},{"name":"negative","localized_name":"negative","type":"CONDITIONING","links":[115],"slot_index":1},{"name":"latent","localized_name":"latent","type":"LATENT","links":[113],"slot_index":2}],"properties":{"Node name for S&R":"WanImageToVideo"},"widgets_values":[1280,720,33,1]},{"id":28,"type":"SaveAnimatedWEBP","pos":[1140,190],"size":[595.4246215820312,665.2847290039062],"flags":{},"order":13,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":56}],"outputs":[],"properties":{},"widgets_values":["ComfyUI",16,false,100,"default"]},{"id":67,"type":"Note","pos":[208.15240478515625,-120.98509979248047],"size":[244.7659149169922,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["TorchCompileModels may not work on older GPUs. After the first run, should lead to significant time savings with GPUs such as the 4090."],"color":"#432","bgcolor":"#653"}],"links":[[56,8,0,28,0,"IMAGE"],[94,49,0,51,0,"CLIP_VISION"],[97,6,0,50,0,"CONDITIONING"],[98,7,0,50,1,"CONDITIONING"],[107,51,0,50,3,"CLIP_VISION_OUTPUT"],[113,50,2,54,3,"LATENT"],[114,50,0,54,1,"CONDITIONING"],[115,50,1,54,2,"CONDITIONING"],[118,55,0,56,0,"IMAGE"],[119,55,0,50,4,"IMAGE"],[120,55,0,51,1,"IMAGE"],[121,54,0,8,0,"LATENT"],[128,61,0,55,0,"IMAGE"],[134,66,1,6,0,"CLIP"],[135,66,1,7,0,"CLIP"],[136,66,2,50,2,"VAE"],[137,66,2,8,1,"VAE"],[138,66,0,65,0,"MODEL"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.6105100000000012,"offset":[2635.71214060565,417.84191139269006]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4}
================================================
FILE: example_workflows/wan txt2img (fp8 fast).json
================================================
{"last_node_id":698,"last_link_id":1748,"nodes":[{"id":676,"type":"CLIPTextEncode","pos":[2651.457763671875,139.2773895263672],"size":[311.1542663574219,134.35691833496094],"flags":{},"order":4,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1745}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1743],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a woman picks up a coffee cup and smiles, then suddenly throws it out the window in her dirty apartment"]},{"id":7,"type":"CLIPTextEncode","pos":[2650.5888671875,336.779296875],"size":[310.6131286621094,150.69346618652344],"flags":{},"order":5,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1746}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1630],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"]},{"id":666,"type":"EmptyHunyuanLatentVideo","pos":[2751.183349609375,552.1126708984375],"size":[210,130],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[1631,1741],"slot_index":0}],"properties":{"Node name for S&R":"EmptyHunyuanLatentVideo"},"widgets_values":[480,480,65,1]},{"id":696,"type":"ClownModelLoader","pos":[2220,340],"size":[382.9175109863281,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1744],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1745,1746],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1747],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["wan2.1_t2v_14B_fp8_e4m3fn.safetensors","fp8_e4m3fn_fast","umt5_xxl_fp8_e4m3fn_scaled.safetensors",".none",".none",".none","wan","wan_2.1_vae.safetensors"]},{"id":698,"type":"Note","pos":[2347.1943359375,-37.566280364990234],"size":[244.7659149169922,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["TorchCompileModels may not work on older GPUs. After the first run, should lead to significant time savings with GPUs such as the 4090."],"color":"#432","bgcolor":"#653"},{"id":346,"type":"ModelSamplingAdvancedResolution","pos":[2340,140],"size":[260.3999938964844,126],"flags":{},"order":3,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1744},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1741}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1684,1748],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":665,"type":"ClownsharKSampler_Beta","pos":[3010,140],"size":[310.3046875,656.2719116210938],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1684},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1743},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1630},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1631},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1643],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",20,-1,1,5.5,896816,"fixed","standard",true]},{"id":667,"type":"SaveAnimatedWEBP","pos":[3360,140],"size":[315,366],"flags":{},"order":9,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1632}],"outputs":[],"properties":{},"widgets_values":["ComfyUI",16,false,100,"default"]},{"id":697,"type":"TorchCompileModels","pos":[2673.776611328125,-98.98099517822266],"size":[260.8105163574219,178],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1748}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":668,"type":"VAEDecode","pos":[3359.884521484375,32.89006805419922],"size":[210,46],"flags":{},"order":8,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1643},{"name":"vae","localized_name":"vae","type":"VAE","link":1747}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1632],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]}],"links":[[1630,7,0,665,2,"CONDITIONING"],[1631,666,0,665,3,"LATENT"],[1632,668,0,667,0,"IMAGE"],[1643,665,0,668,0,"LATENT"],[1684,346,0,665,0,"MODEL"],[1741,666,0,346,1,"LATENT"],[1743,676,0,665,1,"CONDITIONING"],[1744,696,0,346,0,"MODEL"],[1745,696,1,676,0,"CLIP"],[1746,696,1,7,0,"CLIP"],[1747,696,2,668,1,"VAE"],[1748,346,0,697,0,"MODEL"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.6105100000000008,"offset":[-558.9420074905141,402.3405679733133]},"node_versions":{"comfy-core":"0.3.26","comfyui_controlnet_aux":"1e9eac6377c882da8bb360c7544607036904362c","ComfyUI-VideoHelperSuite":"c36626c6028faca912eafcedbc71f1d342fb4d2a"},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: example_workflows/wan vid2vid.json
================================================
{"last_node_id":406,"last_link_id":1039,"nodes":[{"id":7,"type":"CLIPTextEncode","pos":[971.2105712890625,537.63671875],"size":[436.48480224609375,118.3749771118164],"flags":{},"order":11,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1017}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[832],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"]},{"id":346,"type":"ModelSamplingAdvancedResolution","pos":[1152.6932373046875,133.92713928222656],"size":[260.3999938964844,126],"flags":{},"order":13,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1018},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1027}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1010,1011],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":391,"type":"TorchCompileModels","pos":[1438.64501953125,80.51760864257812],"size":[258.1737365722656,178],"flags":{},"order":14,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1010}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":365,"type":"SaveAnimatedWEBP","pos":[2500,310],"size":[315,366],"flags":{},"order":19,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":945}],"outputs":[],"properties":{},"widgets_values":["ComfyUI",16,false,100,"default",""]},{"id":393,"type":"ClownModelLoader","pos":[626.4608154296875,313.0701904296875],"size":[315,266],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1018],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1016,1017],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1012,1013],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["wan2.1_t2v_14B_fp8_e4m3fn.safetensors","fp8_e4m3fn","umt5_xxl_fp8_e4m3fn_scaled.safetensors",".none",".none",".none","wan","wan_2.1_vae.safetensors"]},{"id":394,"type":"ClownsharkChainsampler_Beta","pos":[1799.4302978515625,313.5021667480469],"size":[315,530],"flags":{},"order":16,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1028},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1029},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1033],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",1,5.5,"resample",true]},{"id":324,"type":"ClownsharKSampler_Beta","pos":[1433.78466796875,314.1369934082031],"size":[337.03857421875,670],"flags":{},"order":15,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1011},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":997},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":832},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1026},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1028],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0,"multistep/res_2m","beta57",20,12,1,1,0,"fixed","unsample",true]},{"id":395,"type":"ClownOptions_Cycles_Beta","pos":[1843.936767578125,125.13945007324219],"size":[210,130],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1029],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,0.5,5.5]},{"id":6,"type":"CLIPTextEncode","pos":[966.9983520507812,314.1016540527344],"size":[447.32421875,169.55857849121094],"flags":{},"order":10,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1016}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[997],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["A pretty black woman with thick gorgeous hair walks slowly through a tall, modern colonnade of concrete and glass, cradling a sleek silver laptop under her arm. She wears a sand-colored coat with a high collar and sharp tailoring, the buttons neatly fastened, exuding a quiet, focused confidence. Her complexion is porcelain-smooth, lightly touched by the soft overcast light that filters down through the glass canopy. Dark, straight hair is neatly parted and tucked behind one ear, moving ever so slightly as she walks. Her expression is thoughtful, eyes cast downward in introspection, lips gently pressed into a faint, unreadable line.\n\nThe camera begins off-center, panning slowly to align with the corridor’s clean architectural symmetry. Repeating vertical columns frame her movement, creating a visual rhythm that guides the viewer’s eye toward the vanishing point ahead. As she walks, she shifts just slightly to the side, a natural adjustment that causes the fabric of her coat to pull gently at the seams, adding a subtle sense of motion.\n\nReflections drift along the windows beside her — faint, soft, and ghostlike. The ambient light is cool and diffused, lending the scene a contemplative, almost suspended feeling. Her presence is calm, deliberate, as though she’s carrying not just the laptop, but something unspoken — a sense of purpose shaped quietly in her mind."]},{"id":397,"type":"Note","pos":[639.0505981445312,128.31825256347656],"size":[301.3404235839844,112.45540618896484],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Sometimes the first frame looks noisy with WAN. You can either throw it away, use more steps, use a more accurate sampler (2s > 2m, 3s > 2s), or ensure you aren't using a \"fast\" mode for the weights, such as fp8_e4m3fn_fast, which results in a significant hit to quality."],"color":"#432","bgcolor":"#653"},{"id":398,"type":"Note","pos":[1451.546142578125,1060.8258056640625],"size":[295.7769470214844,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["More \"steps_to_run\" will increase the amount of denoise. Values between 12 and 15 are a good place to start.\n"],"color":"#432","bgcolor":"#653"},{"id":403,"type":"Frames Slice Latent","pos":[555.5986938476562,1154.378173828125],"size":[210,82],"flags":{},"order":4,"mode":0,"inputs":[{"name":"frames","localized_name":"frames","type":"LATENT","link":null}],"outputs":[{"name":"latent","localized_name":"latent","type":"LATENT","links":null}],"properties":{"Node name for S&R":"Frames Slice Latent"},"widgets_values":[0,1]},{"id":402,"type":"Frames Slice","pos":[555.5987548828125,990.6537475585938],"size":[210,82],"flags":{},"order":5,"mode":0,"inputs":[{"name":"frames","localized_name":"frames","type":"IMAGE","link":null}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Frames Slice"},"widgets_values":[0,1]},{"id":401,"type":"Note","pos":[550.497802734375,712.9439086914062],"size":[239.34762573242188,200.06405639648438],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Can use anything that will load a video as a sequence of frames. The core node \"Load Image\" will work in place of this one, if you are loading an animated .webp.\n\nThis node allows you to set the number of frames loaded.\n\nThe nodes below will also allow you to pick and choose ranges of frames. Be sure to use Image Preview to verify you're picking the ones you want!"],"color":"#432","bgcolor":"#653"},{"id":316,"type":"VHS_LoadVideo","pos":[808.8834228515625,711.6345825195312],"size":[319.19403076171875,808.9393920898438],"flags":{},"order":7,"mode":0,"inputs":[{"name":"meta_batch","localized_name":"meta_batch","type":"VHS_BatchManager","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":null}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1014],"slot_index":0},{"name":"frame_count","localized_name":"frame_count","type":"INT","links":null},{"name":"audio","localized_name":"audio","type":"AUDIO","links":null},{"name":"video_info","localized_name":"video_info","type":"VHS_VIDEOINFO","links":null}],"properties":{"Node name for S&R":"VHS_LoadVideo"},"widgets_values":{"video":"3206567-hd_1080_1920_25fps.mp4","force_rate":0,"force_size":"Disabled","custom_width":512,"custom_height":512,"frame_load_cap":35,"skip_first_frames":0,"select_every_nth":1,"choose video to upload":"image","videopreview":{"hidden":false,"paused":false,"params":{"force_rate":0,"frame_load_cap":35,"skip_first_frames":0,"select_every_nth":1,"filename":"3206567-hd_1080_1920_25fps.mp4","type":"input","format":"video/mp4"},"muted":false}}},{"id":392,"type":"VAEEncodeAdvanced","pos":[1157.1488037109375,712.4218139648438],"size":[244.18490600585938,278],"flags":{},"order":12,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1014},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1013}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1026,1027],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",368,640,"red",false,"16_channels"]},{"id":396,"type":"ClownsharkChainsampler_Beta","pos":[2146.74755859375,313.50225830078125],"size":[315,510],"flags":{},"order":17,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1033},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1035],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,5.5,"resample",true]},{"id":400,"type":"Note","pos":[2090.21728515625,70.1985855102539],"size":[324.38916015625,177.81007385253906],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Each full cycle reruns the node twice:\n\nresample -> unsample -> resample -> ... \n\nHigher values will change the video more.\n\nres_2m and 3m will preserve more of the initial structure. Res_2s and especially 3s will result in more dramatic change.\n\nIf you use more steps_to_run in ClownsharKSampler, you'll need fewer cycles here."],"color":"#432","bgcolor":"#653"},{"id":399,"type":"Note","pos":[2153.567626953125,887.000244140625],"size":[303.7249755859375,88],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Using a sampler such as res_2s instead of res_2m in this node can reduce or eliminate first frame noise. It's not always necessary, mileage may vary."],"color":"#432","bgcolor":"#653"},{"id":325,"type":"VAEDecode","pos":[2496.82080078125,203.3095703125],"size":[210,46],"flags":{},"order":18,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1035},{"name":"vae","localized_name":"vae","type":"VAE","link":1012}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[945],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]}],"links":[[832,7,0,324,2,"CONDITIONING"],[945,325,0,365,0,"IMAGE"],[997,6,0,324,1,"CONDITIONING"],[1010,346,0,391,0,"MODEL"],[1011,346,0,324,0,"MODEL"],[1012,393,2,325,1,"VAE"],[1013,393,2,392,4,"VAE"],[1014,316,0,392,0,"IMAGE"],[1016,393,1,6,0,"CLIP"],[1017,393,1,7,0,"CLIP"],[1018,393,0,346,0,"MODEL"],[1026,392,0,324,3,"LATENT"],[1027,392,0,346,1,"LATENT"],[1028,324,0,394,4,"LATENT"],[1029,395,0,394,6,"OPTIONS"],[1033,394,0,396,4,"LATENT"],[1035,396,0,325,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.464100000000001,"offset":[1126.1541105871463,24.96236469373386]},"node_versions":{"comfy-core":"0.3.26","comfyui_controlnet_aux":"1e9eac6377c882da8bb360c7544607036904362c","ComfyUI-VideoHelperSuite":"c36626c6028faca912eafcedbc71f1d342fb4d2a"},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4}
================================================
FILE: flux/controlnet.py
================================================
#Original code can be found on: https://github.com/XLabs-AI/x-flux/blob/main/src/flux/controlnet.py
#modified to support different types of flux controlnets
import torch
import math
from torch import Tensor, nn
from einops import rearrange, repeat
from .layers import (DoubleStreamBlock, EmbedND, LastLayer,
MLPEmbedder, SingleStreamBlock,
timestep_embedding)
from .model import Flux
import comfy.ldm.common_dit
class MistolineCondDownsamplBlock(nn.Module):
def __init__(self, dtype=None, device=None, operations=None):
super().__init__()
self.encoder = nn.Sequential(
operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device)
)
def forward(self, x):
return self.encoder(x)
class MistolineControlnetBlock(nn.Module):
def __init__(self, hidden_size, dtype=None, device=None, operations=None):
super().__init__()
self.linear = operations.Linear(hidden_size, hidden_size, dtype=dtype, device=device)
self.act = nn.SiLU()
def forward(self, x):
return self.act(self.linear(x))
class ControlNetFlux(Flux):
def __init__(self, latent_input=False, num_union_modes=0, mistoline=False, control_latent_channels=None, image_model=None, dtype=None, device=None, operations=None, **kwargs):
super().__init__(final_layer=False, dtype=dtype, device=device, operations=operations, **kwargs)
self.main_model_double = 19
self.main_model_single = 38
self.mistoline = mistoline
# add ControlNet blocks
if self.mistoline:
control_block = lambda : MistolineControlnetBlock(self.hidden_size, dtype=dtype, device=device, operations=operations)
else:
control_block = lambda : operations.Linear(self.hidden_size, self.hidden_size, dtype=dtype, device=device)
self.controlnet_blocks = nn.ModuleList([])
for _ in range(self.params.depth):
self.controlnet_blocks.append(control_block())
self.controlnet_single_blocks = nn.ModuleList([])
for _ in range(self.params.depth_single_blocks):
self.controlnet_single_blocks.append(control_block())
self.num_union_modes = num_union_modes
self.controlnet_mode_embedder = None
if self.num_union_modes > 0:
self.controlnet_mode_embedder = operations.Embedding(self.num_union_modes, self.hidden_size, dtype=dtype, device=device)
self.gradient_checkpointing = False
self.latent_input = latent_input
if control_latent_channels is None:
control_latent_channels = self.in_channels
else:
control_latent_channels *= 2 * 2 #patch size
self.pos_embed_input = operations.Linear(control_latent_channels, self.hidden_size, bias=True, dtype=dtype, device=device)
if not self.latent_input:
if self.mistoline:
self.input_cond_block = MistolineCondDownsamplBlock(dtype=dtype, device=device, operations=operations)
else:
self.input_hint_block = nn.Sequential(
operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device)
)
def forward_orig(
self,
img: Tensor,
img_ids: Tensor,
controlnet_cond: Tensor,
txt: Tensor,
txt_ids: Tensor,
timesteps: Tensor,
y: Tensor,
guidance: Tensor = None,
control_type: Tensor = None,
) -> Tensor:
if img.ndim != 3 or txt.ndim != 3:
raise ValueError("Input img and txt tensors must have 3 dimensions.")
# running on sequences img
img = self.img_in(img)
controlnet_cond = self.pos_embed_input(controlnet_cond)
img = img + controlnet_cond
vec = self.time_in(timestep_embedding(timesteps, 256))
if self.params.guidance_embed:
vec = vec + self.guidance_in(timestep_embedding(guidance, 256))
vec = vec + self.vector_in(y)
txt = self.txt_in(txt)
if self.controlnet_mode_embedder is not None and len(control_type) > 0:
control_cond = self.controlnet_mode_embedder(torch.tensor(control_type, device=img.device), out_dtype=img.dtype).unsqueeze(0).repeat((txt.shape[0], 1, 1))
txt = torch.cat([control_cond, txt], dim=1)
txt_ids = torch.cat([txt_ids[:,:1], txt_ids], dim=1)
ids = torch.cat((txt_ids, img_ids), dim=1)
pe = self.pe_embedder(ids)
controlnet_double = ()
for i in range(len(self.double_blocks)):
img, txt = self.double_blocks[i](img=img, txt=txt, vec=vec, pe=pe)
controlnet_double = controlnet_double + (self.controlnet_blocks[i](img),)
img = torch.cat((txt, img), 1)
controlnet_single = ()
for i in range(len(self.single_blocks)):
img = self.single_blocks[i](img, vec=vec, pe=pe)
controlnet_single = controlnet_single + (self.controlnet_single_blocks[i](img[:, txt.shape[1] :, ...]),)
repeat = math.ceil(self.main_model_double / len(controlnet_double))
if self.latent_input:
out_input = ()
for x in controlnet_double:
out_input += (x,) * repeat
else:
out_input = (controlnet_double * repeat)
out = {"input": out_input[:self.main_model_double]}
if len(controlnet_single) > 0:
repeat = math.ceil(self.main_model_single / len(controlnet_single))
out_output = ()
if self.latent_input:
for x in controlnet_single:
out_output += (x,) * repeat
else:
out_output = (controlnet_single * repeat)
out["output"] = out_output[:self.main_model_single]
return out
def forward(self, x, timesteps, context, y, guidance=None, hint=None, **kwargs):
patch_size = 2
if self.latent_input:
hint = comfy.ldm.common_dit.pad_to_patch_size(hint, (patch_size, patch_size))
elif self.mistoline:
hint = hint * 2.0 - 1.0
hint = self.input_cond_block(hint)
else:
hint = hint * 2.0 - 1.0
hint = self.input_hint_block(hint)
hint = rearrange(hint, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
bs, c, h, w = x.shape
x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))
img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
h_len = ((h + (patch_size // 2)) // patch_size)
w_len = ((w + (patch_size // 2)) // patch_size)
img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
img_ids[..., 1] = img_ids[..., 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype)[:, None]
img_ids[..., 2] = img_ids[..., 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype)[None, :]
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
return self.forward_orig(img, img_ids, hint, context, txt_ids, timesteps, y, guidance, control_type=kwargs.get("control_type", []))
================================================
FILE: flux/layers.py
================================================
# Adapted from: https://github.com/black-forest-labs/flux
import math
import torch
from torch import Tensor, nn
from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar
import torch.nn.functional as F
import einops
from einops import rearrange
from torch import Tensor
from dataclasses import dataclass
from .math import attention, rope, apply_rope
import comfy.ldm.common_dit
class EmbedND(nn.Module):
def __init__(self, dim: int, theta: int, axes_dim: list):
super().__init__()
self.dim = dim
self.theta = theta
self.axes_dim = axes_dim
def forward(self, ids: Tensor) -> Tensor:
n_axes = ids.shape[-1]
emb = torch.cat(
[rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(n_axes)],
dim=-3,
)
return emb.unsqueeze(1)
def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 1000.0):
"""
Create sinusoidal timestep embeddings.
:param t: a 1-D Tensor of N indices, one per batch element.
These may be fractional.
:param dim: the dimension of the output.
:param max_period: controls the minimum frequency of the embeddings.
:return: an (N, D) Tensor of positional embeddings.
"""
t = time_factor * t
half = dim // 2
freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=t.device) / half)
args = t[:, None].float() * freqs[None]
embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
if dim % 2:
embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
if torch.is_floating_point(t):
embedding = embedding.to(t)
return embedding
class MLPEmbedder(nn.Module):
def __init__(self, in_dim: int, hidden_dim: int, dtype=None, device=None, operations=None):
super().__init__()
self.in_layer = operations.Linear( in_dim, hidden_dim, bias=True, dtype=dtype, device=device)
self.silu = nn.SiLU()
self.out_layer = operations.Linear(hidden_dim, hidden_dim, bias=True, dtype=dtype, device=device)
def forward(self, x: Tensor) -> Tensor:
return self.out_layer(self.silu(self.in_layer(x)))
class RMSNorm(torch.nn.Module):
def __init__(self, dim: int, dtype=None, device=None, operations=None):
super().__init__()
self.scale = nn.Parameter(torch.empty((dim), dtype=dtype, device=device)) # self.scale.shape = 128
def forward(self, x: Tensor):
return comfy.ldm.common_dit.rms_norm(x, self.scale, 1e-6)
class QKNorm(torch.nn.Module):
def __init__(self, dim: int, dtype=None, device=None, operations=None):
super().__init__()
self.query_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
self.key_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple:
q = self.query_norm(q)
k = self.key_norm(k)
return q.to(v), k.to(v)
class SelfAttention(nn.Module):
def __init__(self, dim: int, num_heads: int = 8, qkv_bias: bool = False, dtype=None, device=None, operations=None):
super().__init__()
self.num_heads = num_heads # 24
head_dim = dim // num_heads # 128 = 3072 / 24
self.qkv = operations.Linear(dim, dim * 3, bias=qkv_bias, dtype=dtype, device=device)
self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations)
self.proj = operations.Linear(dim, dim, dtype=dtype, device=device) # dim is usually 3072
@dataclass
class ModulationOut:
shift: Tensor
scale: Tensor
gate: Tensor
class Modulation(nn.Module):
def __init__(self, dim: int, double: bool, dtype=None, device=None, operations=None):
super().__init__()
self.is_double = double
self.multiplier = 6 if double else 3
self.lin = operations.Linear(dim, self.multiplier * dim, bias=True, dtype=dtype, device=device)
def forward(self, vec: Tensor) -> tuple:
out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(self.multiplier, dim=-1)
return (ModulationOut(*out[:3]), ModulationOut(*out[3:]) if self.is_double else None,)
class DoubleStreamBlock(nn.Module):
def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, dtype=None, device=None, operations=None, idx=-1):
super().__init__()
self.idx = idx
mlp_hidden_dim = int(hidden_size * mlp_ratio)
self.num_heads = num_heads
self.hidden_size = hidden_size
self.img_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations) # in_features=3072, out_features=18432 (3072*6)
self.txt_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations) # in_features=3072, out_features=18432 (3072*6)
self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) # .qkv: in_features=3072, out_features=9216 .proj: 3072,3072
self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) # .qkv: in_features=3072, out_features=9216 .proj: 3072,3072
self.img_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.txt_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.img_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.txt_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.img_mlp = nn.Sequential(
operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
nn.GELU(approximate="tanh"),
operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
) # 3072->12288, 12288->3072 (3072*4)
self.txt_mlp = nn.Sequential(
operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
nn.GELU(approximate="tanh"),
operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
) # 3072->12288, 12288->3072 (3072*4)
def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, mask=None, idx=0, update_cross_attn=None, style_block=None) -> Tuple[Tensor, Tensor]: # vec 1,3072 # vec 1,3072 #mask.shape 4608,4608 #img_attn.shape 1,4096,3072 txt_attn.shape 1,512,3072
img_len = img.shape[-2]
txt_len = txt.shape[-2]
img_mod1, img_mod2 = self.img_mod(vec) # -> 3072, 3072
txt_mod1, txt_mod2 = self.txt_mod(vec)
img_norm = self.img_norm1(img)
txt_norm = self.txt_norm1(txt)
img_norm = style_block.img(img_norm, "attn_norm")
txt_norm = style_block.txt(txt_norm, "attn_norm")
img_norm = img_norm * (1+img_mod1.scale) + img_mod1.shift
txt_norm = txt_norm * (1+txt_mod1.scale) + txt_mod1.shift
img_norm = style_block.img(img_norm, "attn_norm_mod")
txt_norm = style_block.txt(txt_norm, "attn_norm_mod")
### ATTN ###
img_qkv = self.img_attn.qkv(img_norm)
img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
img_q = style_block.img.ATTN(img_q, "q_proj")
img_k = style_block.img.ATTN(img_k, "k_proj")
img_v = style_block.img.ATTN(img_v, "v_proj")
img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
img_q = style_block.img.ATTN(img_q, "q_norm")
img_k = style_block.img.ATTN(img_k, "k_norm")
txt_qkv = self.txt_attn.qkv(txt_norm)
txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
txt_q = style_block.txt.ATTN(txt_q, "q_proj")
txt_k = style_block.txt.ATTN(txt_k, "k_proj")
txt_v = style_block.txt.ATTN(txt_v, "v_proj")
txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
txt_q = style_block.txt.ATTN(txt_q, "q_norm")
txt_k = style_block.txt.ATTN(txt_k, "k_norm")
q, k, v = torch.cat((txt_q, img_q), dim=2), torch.cat((txt_k, img_k), dim=2), torch.cat((txt_v, img_v), dim=2)
attn = attention(q, k, v, pe=pe, mask=mask)
txt_attn = attn[:,:txt_len] # 1, 768,3072
img_attn = attn[:,txt_len:]
img_attn = style_block.img.ATTN(img_attn, "out")
txt_attn = style_block.txt.ATTN(txt_attn, "out")
img_attn = self.img_attn.proj(img_attn) #to_out
txt_attn = self.txt_attn.proj(txt_attn)
### ATTN ###
img_attn = style_block.img(img_attn, "attn")
txt_attn = style_block.txt(txt_attn, "attn")
img_attn *= img_mod1.gate
txt_attn *= txt_mod1.gate
img_attn = style_block.img(img_attn, "attn_gated")
txt_attn = style_block.txt(txt_attn, "attn_gated")
img += img_attn
txt += txt_attn
img = style_block.img(img, "attn_res")
txt = style_block.txt(txt, "attn_res")
img_norm = self.img_norm2(img)
txt_norm = self.txt_norm2(txt)
img_norm = style_block.img(img_norm, "ff_norm")
txt_norm = style_block.txt(txt_norm, "ff_norm")
img_norm = img_norm * (1+img_mod2.scale) + img_mod2.shift
txt_norm = txt_norm * (1+txt_mod2.scale) + txt_mod2.shift
img_norm = style_block.img(img_norm, "ff_norm_mod")
txt_norm = style_block.txt(txt_norm, "ff_norm_mod")
img_mlp = self.img_mlp(img_norm)
txt_mlp = self.txt_mlp(txt_norm)
img_mlp = style_block.img(img_mlp, "ff")
txt_mlp = style_block.txt(txt_mlp, "ff")
img_mlp *= img_mod2.gate
txt_mlp *= txt_mod2.gate
img_mlp = style_block.img(img_mlp, "ff_gated")
txt_mlp = style_block.txt(txt_mlp, "ff_gated")
img += img_mlp
txt += txt_mlp
img = style_block.img(img, "ff_res")
txt = style_block.txt(txt, "ff_res")
if update_cross_attn is not None:
if not update_cross_attn['skip_cross_attn']:
UNCOND = update_cross_attn['UNCOND']
txt_update = self.txt_norm1(txt.cpu()).float()
txt_update = (1 + txt_mod1.scale.to(txt_update)) * txt_update + txt_mod1.shift.to(txt_update)
if UNCOND:
t5_start = update_cross_attn['src_t5_start']
t5_end = update_cross_attn['src_t5_end']
txt_src = txt_update[:,t5_start:t5_end,:].cpu() #.float()
self.c_src = txt_src.transpose(-2,-1).squeeze(0) # shape [C,1]
else:
t5_start = update_cross_attn['tgt_t5_start']
t5_end = update_cross_attn['tgt_t5_end']
lamb = update_cross_attn['lamb']
erase = update_cross_attn['erase']
c_guide = txt_update[:,t5_start:t5_end,:].transpose(-2,-1).squeeze(0) # [C,1]
Wv_old = self.txt_attn.qkv.weight.data.to(c_guide) # [C,C]
v_star = Wv_old @ c_guide # [C,1]
c_src = self.c_src #.cpu() # [C,1]
lamb = lamb
erase_scale = erase
d = c_src.shape[0]
C = c_src @ c_src.T # [C,C]
I = torch.eye(d, device=C.device, dtype=C.dtype)
mat1_v = lamb*Wv_old + erase_scale*(v_star @ c_src.T) # [C,C]
mat2_v = lamb*I + erase_scale*(C) # [C,C]
I = I.to("cpu")
C = C.to("cpu")
c_src = c_src.to("cpu")
self.c_src = self.c_src.to("cpu")
v_star = v_star.to("cpu")
Wv_old = Wv_old.to("cpu")
c_guide = c_guide.to("cpu")
del I, C, c_src, self.c_src, v_star, Wv_old, c_guide
#Wv_new = mat1_v @ torch.inverse(mat2_v.float()).to(mat1_v) # [C,C]
Wv_new = torch.linalg.solve(mat2_v.T, mat1_v.T).T
mat1_v = mat1_v.to("cpu")
mat2_v = mat2_v.to("cpu")
del mat1_v, mat2_v
update_q = update_cross_attn['update_q']
update_k = update_cross_attn['update_k']
update_v = update_cross_attn['update_v']
if not update_q:
Wv_new[:3072, :] = self.txt_attn.qkv.weight.data[:3072, :].to(Wv_new)
if not update_k:
Wv_new[3072:6144,:] = self.txt_attn.qkv.weight.data[3072:6144,:].to(Wv_new)
if not update_v:
Wv_new[6144: ,:] = self.txt_attn.qkv.weight.data[6144: ,:].to(Wv_new)
self.txt_attn.qkv.weight.data.copy_(Wv_new.to(self.txt_attn.qkv.weight.data.dtype))
Wv_new = Wv_new.to("cpu")
del Wv_new
#torch.cuda.empty_cache()
return img, txt
class SingleStreamBlock(nn.Module): #attn.shape = 1,4608,3072 mlp.shape = 1,4608,12288 4096*3 = 12288
"""
A DiT block with parallel linear layers as described in
https://arxiv.org/abs/2302.05442 and adapted modulation interface.
"""
def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float = 4.0, qk_scale: float = None, dtype=None, device=None, operations=None, idx=-1):
super().__init__()
self.idx = idx
self.hidden_dim = hidden_size #3072
self.num_heads = num_heads #24
head_dim = hidden_size // num_heads
self.scale = qk_scale or head_dim**-0.5 #0.08838834764831845
self.mlp_hidden_dim = int(hidden_size * mlp_ratio) #12288== 3072 * 4
# qkv and mlp_in
self.linear1 = operations.Linear(hidden_size, 3*hidden_size + self.mlp_hidden_dim, dtype=dtype, device=device)
# proj and mlp_out
self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, dtype=dtype, device=device)
self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations)
self.hidden_size = hidden_size #3072
self.pre_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.mlp_act = nn.GELU(approximate="tanh")
self.modulation = Modulation(hidden_size, double=False, dtype=dtype, device=device, operations=operations)
# vec 1,3072 x 1,9984,3072
def forward(self, img: Tensor, vec: Tensor, pe: Tensor, mask=None, idx=0, style_block=None) -> Tensor: # x 1,9984,3072 if 2 reg embeds, 1,9472,3072 if none # 9216x4096 = 16x1536x1536
mod, _ = self.modulation(vec)
img_norm = self.pre_norm(img)
img_norm = style_block.img(img_norm, "attn_norm")
img_norm = (1 + mod.scale) * img_norm + mod.shift # mod => vec
img_norm = style_block.img(img_norm, "attn_norm_mod")
### ATTN ###
qkv, mlp = torch.split(self.linear1(img_norm), [3*self.hidden_size, self.mlp_hidden_dim], dim=-1)
q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) #q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
q = style_block.img.ATTN(q, "q_proj")
k = style_block.img.ATTN(k, "k_proj")
v = style_block.img.ATTN(v, "v_proj")
q, k = self.norm(q, k, v)
q = style_block.img.ATTN(q, "q_norm")
k = style_block.img.ATTN(k, "k_norm")
attn = attention(q, k, v, pe=pe, mask=mask)
attn = style_block.img.ATTN(attn, "out")
### ATTN ###
mlp = style_block.img(mlp, "ff_norm")
mlp_act = self.mlp_act(mlp)
mlp_act = style_block.img(mlp_act, "ff_norm_mod")
img_ff_i = self.linear2(torch.cat((attn, mlp_act), 2)) # effectively FF smooshed into one line
img_ff_i = style_block.img(img_ff_i, "ff")
img_ff_i *= mod.gate
img_ff_i = style_block.img(img_ff_i, "ff_gated")
img += img_ff_i
img = style_block.img(img, "ff_res")
return img
class LastLayer(nn.Module):
def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None):
super().__init__()
self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.linear = operations.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device)
self.adaLN_modulation = nn.Sequential(nn.SiLU(), operations.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device))
def forward(self, x: Tensor, vec: Tensor) -> Tensor:
shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1)
x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :]
x = self.linear(x)
return x
def forward_scale_shift(self, x: Tensor, vec: Tensor) -> Tensor:
shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1)
x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :]
return x
def forward_linear(self, x: Tensor, vec: Tensor) -> Tensor:
x = self.linear(x)
return x
================================================
FILE: flux/math.py
================================================
import torch
from einops import rearrange
from torch import Tensor
from comfy.ldm.modules.attention import attention_pytorch
import comfy.model_management
import math
def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor:
q, k = apply_rope(q, k, pe)
heads = q.shape[1]
x = attention_pytorch(q, k, v, heads, skip_reshape=True, mask=mask)
return x
def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
assert dim % 2 == 0
if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled():
device = torch.device("cpu")
else:
device = pos.device
scale = torch.linspace(0, (dim - 2) / dim, steps=dim//2, dtype=torch.float64, device=device)
omega = 1.0 / (theta**scale)
out = torch.einsum("...n,d->...nd", pos.to(dtype=torch.float32, device=device), omega)
out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
return out.to(dtype=torch.float32, device=pos.device)
def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)
================================================
FILE: flux/model.py
================================================
# Adapted from: https://github.com/black-forest-labs/flux
import torch
import torch.nn.functional as F
from torch import Tensor, nn
from typing import Optional, Callable, Tuple, Dict, List, Any, Union
from ..helper import ExtraOptions
from dataclasses import dataclass
import copy
from .layers import (
DoubleStreamBlock,
EmbedND,
LastLayer,
MLPEmbedder,
SingleStreamBlock,
timestep_embedding,
)
from . import layers
#from comfy.ldm.flux.layers import timestep_embedding
from comfy.ldm.flux.model import Flux as Flux
import math
import einops
from einops import rearrange, repeat
import comfy.ldm.common_dit
from ..latents import tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d
from ..style_transfer import apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch, StyleMMDiT_Model
#from ..latents import interpolate_spd
@dataclass
class FluxParams:
in_channels : int
out_channels : int
vec_in_dim : int
context_in_dim : int
hidden_size : int
mlp_ratio : float
num_heads : int
depth : int
depth_single_blocks: int
axes_dim : list
theta : int
patch_size : int
qkv_bias : bool
guidance_embed : bool
class ReFlux(Flux):
def __init__(self, image_model=None, final_layer=True, dtype=None, device=None, operations=None, **kwargs):
super().__init__()
self.dtype = dtype
self.timestep = -1.0
self.threshold_inv = False
params = FluxParams(**kwargs)
self.params = params #self.params FluxParams(in_channels=16, out_channels=16, vec_in_dim=768, context_in_dim=4096, hidden_size=3072, mlp_ratio=4.0, num_heads=24, depth=19, depth_single_blocks=38, axes_dim=[16, 56, 56], theta=10000, patch_size=2, qkv_bias=True, guidance_embed=False)
self.patch_size = params.patch_size
self.in_channels = params.in_channels * params.patch_size * params.patch_size # in_channels 64
self.out_channels = params.out_channels * params.patch_size * params.patch_size # out_channels 64
if params.hidden_size % params.num_heads != 0:
raise ValueError(f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}")
pe_dim = params.hidden_size // params.num_heads
if sum(params.axes_dim) != pe_dim:
raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}")
self.hidden_size = params.hidden_size # 3072
self.num_heads = params.num_heads # 24
self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim)
self.img_in = operations.Linear( self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device) # in_features= 64, out_features=3072
self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, dtype=dtype, device=device) # in_features=4096, out_features=3072, bias=True
self.time_in = MLPEmbedder( in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations)
self.vector_in = MLPEmbedder(params.vec_in_dim, self.hidden_size, dtype=dtype, device=device, operations=operations) # in_features=768, out_features=3072 (first layer) second layer 3072,3072
self.guidance_in =(MLPEmbedder( in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations) if params.guidance_embed else nn.Identity())
self.double_blocks = nn.ModuleList([DoubleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, qkv_bias=params.qkv_bias, dtype=dtype, device=device, operations=operations, idx=_) for _ in range(params.depth)])
self.single_blocks = nn.ModuleList([SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, dtype=dtype, device=device, operations=operations, idx=_) for _ in range(params.depth_single_blocks)])
if final_layer:
self.final_layer = layers.LastLayer(self.hidden_size, 1, self.out_channels, dtype=dtype, device=device, operations=operations)
def forward_blocks(self,
img : Tensor,
img_ids : Tensor,
txt : Tensor,
txt_ids : Tensor,
timesteps: Tensor,
y : Tensor,
guidance : Tensor = None,
control = None,
update_cross_attn = None,
transformer_options = {},
UNCOND : bool = False,
SIGMA = None,
StyleMMDiT_Model = None,
RECON_MODE=False,
) -> Tensor:
if img.ndim != 3 or txt.ndim != 3:
raise ValueError("Input img and txt tensors must have 3 dimensions.")
# running on sequences img img -> 1,4096,3072
img = self.img_in(img) # 1,9216,64 == 768x192 # 1,9216,64 == 1,16,128,256 + 1,16,64,64 # 1,8192,64 with uncond/cond #:,:,64 -> :,:,3072
vec = self.time_in(timestep_embedding(timesteps, 256).to(img.dtype)) # 1 -> 1,3072
if self.params.guidance_embed:
if guidance is None:
print("Guidance strength is none, not using distilled guidance.")
else:
vec = vec + self.guidance_in(timestep_embedding(guidance, 256).to(img.dtype))
vec = vec + self.vector_in(y) #y.shape=1,768 y==all 0s
txt = self.txt_in(txt)
ids = torch.cat((txt_ids, img_ids), dim=1) # img_ids.shape=1,8192,3 txt_ids.shape=1,512,3 #ids.shape=1,8704,3
pe = self.pe_embedder(ids) # pe.shape 1,1,8704,64,2,2
weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0)
floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0)
mask_zero = None
mask = None
text_len = txt.shape[1]
if not UNCOND and 'AttnMask' in transformer_options:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
if mask_zero is None:
mask_zero = torch.ones_like(mask)
img_len = transformer_options['AttnMask'].img_len
mask_zero[:text_len, :] = mask[:text_len, :]
mask_zero[:, :text_len] = mask[:, :text_len]
if weight == 0:
mask = None
if UNCOND and 'AttnMask_neg' in transformer_options:
AttnMask = transformer_options['AttnMask_neg']
mask = transformer_options['AttnMask_neg'].attn_mask.mask.to('cuda')
if mask_zero is None:
mask_zero = torch.ones_like(mask)
img_len = transformer_options['AttnMask_neg'].img_len
mask_zero[:text_len, :] = mask[:text_len, :]
mask_zero[:, :text_len] = mask[:, :text_len]
if weight == 0:
mask = None
elif UNCOND and 'AttnMask' in transformer_options:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
if mask_zero is None:
mask_zero = torch.ones_like(mask)
img_len = transformer_options['AttnMask'].img_len
mask_zero[:text_len, :] = mask[:text_len, :]
mask_zero[:, :text_len] = mask[:, :text_len]
if weight == 0:
mask = None
if mask is not None and not type(mask[0][0].item()) == bool:
mask = mask.to(img.dtype)
if mask_zero is not None and not type(mask_zero[0][0].item()) == bool:
mask_zero = mask_zero.to(img.dtype)
total_layers = len(self.double_blocks) + len(self.single_blocks)
ca_idx = 0
for i, block in enumerate(self.double_blocks):
if weight > 0 and mask is not None and weight <= i/total_layers:
img, txt = block(img=img, txt=txt, vec=vec, pe=pe, mask=mask_zero, idx=i, update_cross_attn=update_cross_attn)
elif (weight < 0 and mask is not None and abs(weight) <= (1 - i/total_layers)):
img_tmpZ, txt_tmpZ = img.clone(), txt.clone()
img_tmpZ, txt = block(img=img_tmpZ, txt=txt_tmpZ, vec=vec, pe=pe, mask=mask, idx=i, update_cross_attn=update_cross_attn)
img, txt_tmpZ = block(img=img , txt=txt , vec=vec, pe=pe, mask=mask_zero, idx=i, update_cross_attn=update_cross_attn)
elif floor > 0 and mask is not None and floor >= i/total_layers:
mask_tmp = mask.clone()
mask_tmp[text_len:, text_len:] = 1.0
img, txt = block(img=img, txt=txt, vec=vec, pe=pe, mask=mask_tmp, idx=i, update_cross_attn=update_cross_attn)
elif floor < 0 and mask is not None and abs(floor) >= (1 - i/total_layers):
mask_tmp = mask.clone()
mask_tmp[text_len:, text_len:] = 1.0
img, txt = block(img=img, txt=txt, vec=vec, pe=pe, mask=mask_tmp, idx=i, update_cross_attn=update_cross_attn)
else:
img, txt = block(img=img, txt=txt, vec=vec, pe=pe, mask=mask, idx=i, update_cross_attn=update_cross_attn)
if control is not None:
control_i = control.get("input")
if i < len(control_i):
add = control_i[i]
if add is not None:
img[:1] += add
if hasattr(self, "pulid_data"):
if self.pulid_data:
if i % self.pulid_double_interval == 0:
for _, node_data in self.pulid_data.items():
if torch.any((node_data['sigma_start'] >= timesteps) & (timesteps >= node_data['sigma_end'])):
img = img + node_data['weight'] * self.pulid_ca[ca_idx](node_data['embedding'], img)
ca_idx += 1
img = torch.cat((txt, img), 1) #first 256 is txt embed
for i, block in enumerate(self.single_blocks):
if weight > 0 and mask is not None and weight <= (i+len(self.double_blocks))/total_layers:
img = block(img, vec=vec, pe=pe, mask=mask_zero)
elif weight < 0 and mask is not None and abs(weight) <= (1 - (i+len(self.double_blocks))/total_layers):
img = block(img, vec=vec, pe=pe, mask=mask_zero)
elif floor > 0 and mask is not None and floor >= (i+len(self.double_blocks))/total_layers:
mask_tmp = mask.clone()
mask_tmp[text_len:, text_len:] = 1.0
img = block(img, vec=vec, pe=pe, mask=mask_tmp)
elif floor < 0 and mask is not None and abs(floor) >= (1 - (i+len(self.double_blocks))/total_layers):
mask_tmp = mask.clone()
mask_tmp[text_len:, text_len:] = 1.0
img = block(img, vec=vec, pe=pe, mask=mask_tmp)
else:
img = block(img, vec=vec, pe=pe, mask=mask)
if control is not None: # Controlnet
control_o = control.get("output")
if i < len(control_o):
add = control_o[i]
if add is not None:
img[:1, txt.shape[1] :, ...] += add
if hasattr(self, "pulid_data"):
# PuLID attention
if self.pulid_data:
real_img, txt = img[:, txt.shape[1]:, ...], img[:, :txt.shape[1], ...]
if i % self.pulid_single_interval == 0:
# Will calculate influence of all nodes at once
for _, node_data in self.pulid_data.items():
if torch.any((node_data['sigma_start'] >= timesteps) & (timesteps >= node_data['sigma_end'])):
real_img = real_img + node_data['weight'] * self.pulid_ca[ca_idx](node_data['embedding'], real_img)
ca_idx += 1
img = torch.cat((txt, real_img), 1)
img = img[:, txt.shape[1] :, ...]
img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels) 1,8192,3072 -> 1,8192,64
return img
def process_img(self, x, index=0, h_offset=0, w_offset=0):
bs, c, h, w = x.shape
patch_size = self.patch_size
x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))
img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
h_len = ((h + (patch_size // 2)) // patch_size)
w_len = ((w + (patch_size // 2)) // patch_size)
h_offset = ((h_offset + (patch_size // 2)) // patch_size)
w_offset = ((w_offset + (patch_size // 2)) // patch_size)
img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
img_ids[:, :, 0] = img_ids[:, :, 1] + index
img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(h_offset, h_len - 1 + h_offset, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(w_offset, w_len - 1 + w_offset, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
return img, repeat(img_ids, "h w c -> b (h w) c", b=bs)
def _get_img_ids(self, x, bs, h_len, w_len, h_start, h_end, w_start, w_end):
img_ids = torch.zeros( (h_len, w_len, 3), device=x.device, dtype=x.dtype)
img_ids[..., 1] += torch.linspace(h_start, h_end - 1, steps=h_len, device=x.device, dtype=x.dtype)[:, None]
img_ids[..., 2] += torch.linspace(w_start, w_end - 1, steps=w_len, device=x.device, dtype=x.dtype)[None, :]
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
return img_ids
def forward(self,
x,
timestep,
context,
y,
guidance,
ref_latents=None,
control = None,
transformer_options = {},
mask = None,
**kwargs
):
t = timestep
self.max_seq = (128 * 128) // (2 * 2)
x_orig = x.clone()
b, c, h, w = x.shape
h_len = ((h + (self.patch_size // 2)) // self.patch_size) # h_len 96
w_len = ((w + (self.patch_size // 2)) // self.patch_size) # w_len 96
img_len = h_len * w_len
img_slice = slice(-img_len, None) #slice(None, img_len)
txt_slice = slice(None, -img_len)
SIGMA = t[0].clone() #/ 1000
EO = transformer_options.get("ExtraOptions", ExtraOptions(""))
if EO is not None:
EO.mute = True
if EO("zero_heads"):
HEADS = 0
else:
HEADS = 24
StyleMMDiT = transformer_options.get('StyleMMDiT', StyleMMDiT_Model())
StyleMMDiT.set_len(h_len, w_len, img_slice, txt_slice, HEADS=HEADS)
StyleMMDiT.Retrojector = self.Retrojector if hasattr(self, "Retrojector") else None
transformer_options['StyleMMDiT'] = None
x_tmp = transformer_options.get("x_tmp")
if x_tmp is not None:
x_tmp = x_tmp.expand(x.shape[0], -1, -1, -1).clone()
img = comfy.ldm.common_dit.pad_to_patch_size(x_tmp, (self.patch_size, self.patch_size))
else:
img = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size))
y0_style, img_y0_style = None, None
img_orig, t_orig, y_orig, context_orig = clone_inputs(img, t, y, context)
weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0)
floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0)
update_cross_attn = transformer_options.get("update_cross_attn")
z_ = transformer_options.get("z_") # initial noise and/or image+noise from start of rk_sampler_beta()
rk_row = transformer_options.get("row") # for "smart noise"
if z_ is not None:
x_init = z_[rk_row].to(x)
elif 'x_init' in transformer_options:
x_init = transformer_options.get('x_init').to(x)
# recon loop to extract exact noise pred for scattersort guide assembly
RECON_MODE = StyleMMDiT.noise_mode == "recon"
recon_iterations = 2 if StyleMMDiT.noise_mode == "recon" else 1
for recon_iter in range(recon_iterations):
y0_style = StyleMMDiT.guides
y0_style_active = True if type(y0_style) == torch.Tensor else False
RECON_MODE = True if StyleMMDiT.noise_mode == "recon" and recon_iter == 0 else False
if StyleMMDiT.noise_mode == "recon" and recon_iter == 1:
x_recon = x_tmp if x_tmp is not None else x_orig
noise_prediction = x_recon + (1-SIGMA.to(x_recon)) * eps.to(x_recon)
denoised = x_recon - SIGMA.to(x_recon) * eps.to(x_recon)
denoised = StyleMMDiT.apply_recon_lure(denoised, y0_style)
new_x = (1-SIGMA.to(denoised)) * denoised + SIGMA.to(denoised) * noise_prediction
img_orig = img = comfy.ldm.common_dit.pad_to_patch_size(new_x, (self.patch_size, self.patch_size))
x_init = noise_prediction
elif StyleMMDiT.noise_mode == "bonanza":
x_init = torch.randn_like(x_init)
if y0_style_active:
if y0_style.sum() == 0.0 and y0_style.std() == 0.0:
y0_style = img_orig.clone()
else:
SIGMA_ADAIN = (SIGMA * EO("eps_adain_sigma_factor", 1.0)).to(y0_style)
y0_style_noised = (1-SIGMA_ADAIN) * y0_style + SIGMA_ADAIN * x_init[0:1].to(y0_style) #always only use first batch of noise to avoid broadcasting
img_y0_style_orig = comfy.ldm.common_dit.pad_to_patch_size(y0_style_noised, (self.patch_size, self.patch_size))
mask_zero = None
out_list = []
for cond_iter in range(len(transformer_options['cond_or_uncond'])):
UNCOND = transformer_options['cond_or_uncond'][cond_iter] == 1
if update_cross_attn is not None:
update_cross_attn['UNCOND'] = UNCOND
bsz_style = y0_style.shape[0] if y0_style_active else 0
bsz = 1 if RECON_MODE else bsz_style + 1
img, t, y, context = clone_inputs(img_orig, t_orig, y_orig, context_orig, index=cond_iter)
mask = None
if not UNCOND and 'AttnMask' in transformer_options: # and weight != 0:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
if mask_zero is None:
mask_zero = torch.ones_like(mask)
mask_zero[txt_slice, txt_slice] = mask[txt_slice, txt_slice]
if weight == 0:
context = transformer_options['RegContext'].context.to(context.dtype).to(context.device)
mask = None
else:
context = transformer_options['RegContext'].context.to(context.dtype).to(context.device)
if UNCOND and 'AttnMask_neg' in transformer_options: # and weight != 0:
AttnMask = transformer_options['AttnMask_neg']
mask = transformer_options['AttnMask_neg'].attn_mask.mask.to('cuda')
if mask_zero is None:
mask_zero = torch.ones_like(mask)
mask_zero[txt_slice, txt_slice] = mask[txt_slice, txt_slice]
if weight == 0:
context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device)
mask = None
else:
context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device)
elif UNCOND and 'AttnMask' in transformer_options:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
if mask_zero is None:
mask_zero = torch.ones_like(mask)
mask_zero[txt_slice, txt_slice] = mask[txt_slice, txt_slice]
if weight == 0: # ADDED 5/23/2025
context = transformer_options['RegContext'].context.to(context.dtype).to(context.device) # ADDED 5/26/2025 14:53
mask = None
else:
A = context
B = transformer_options['RegContext'].context
context = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :]
if y0_style_active and not RECON_MODE:
if mask is None:
context, y, _ = StyleMMDiT.apply_style_conditioning(
UNCOND = UNCOND,
base_context = context,
base_y = y,
base_llama3 = None,
)
else:
context = context.repeat(bsz_style + 1, 1, 1)
y = y.repeat(bsz_style + 1, 1) if y is not None else None
img_y0_style = img_y0_style_orig.clone()
if mask is not None and not type(mask[0][0].item()) == bool:
mask = mask.to(x.dtype)
if mask_zero is not None and not type(mask_zero[0][0].item()) == bool:
mask_zero = mask_zero.to(x.dtype)
clip = self.time_in(timestep_embedding(t, 256).to(x.dtype)) # 1 -> 1,3072
if self.params.guidance_embed:
if guidance is None:
print("Guidance strength is none, not using distilled guidance.")
else:
clip = clip + self.guidance_in(timestep_embedding(guidance, 256).to(x.dtype))
clip = clip + self.vector_in(y[:,:self.params.vec_in_dim]) #y.shape=1,768 y==all 0s
clip = clip.to(x)
img_in_dtype = self.img_in.weight.data.dtype
if img_in_dtype not in {torch.bfloat16, torch.float16, torch.float32, torch.float64}:
img_in_dtype = x.dtype
if ref_latents is not None:
h, w = 0, 0
for ref in ref_latents:
h_offset = 0
w_offset = 0
if ref.shape[-2] + h > ref.shape[-1] + w:
w_offset = w
else:
h_offset = h
kontext, kontext_ids = self.process_img(ref, index=1, h_offset=h_offset, w_offset=w_offset)
#kontext = self.img_in(kontext.to(img_in_dtype))
img, img_ids = self.process_img(x)
img = torch.cat([img, kontext], dim=1)
img_ids = torch.cat([img_ids, kontext_ids], dim=1)
h = max(h, ref.shape[-2] + h_offset)
w = max(w, ref.shape[-1] + w_offset)
img = self.img_in(img.to(img_in_dtype))
img_slice = slice(-2*img_len, None)
StyleMMDiT.KONTEXT = 1
for style_block in StyleMMDiT.double_blocks + StyleMMDiT.single_blocks:
style_block.KONTEXT = 1
for style_block_imgtxt in [style_block.img, getattr(style_block, "txt")]:
style_block_imgtxt.KONTEXT = 1
style_block_imgtxt.ATTN.KONTEXT = 1
StyleMMDiT.datashock_ref = ref_latents[0]
else:
img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=self.patch_size, pw=self.patch_size)
img = self.img_in(img.to(img_in_dtype))
img_ids = self._get_img_ids(img, bsz, h_len, w_len, 0, h_len, 0, w_len)
if y0_style_active and not RECON_MODE:
img_y0_style = rearrange(img_y0_style_orig, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=self.patch_size, pw=self.patch_size)
img_y0_style = self.img_in(img_y0_style.to(img_in_dtype)) # hidden_states 1,4032,2560 for 1024x1024: -> 1,4096,2560 ,64 -> ,2560 (x40)
if ref_latents is not None:
img_kontext = self.img_in(kontext.to(img_in_dtype))
#img_base = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=self.patch_size, pw=self.patch_size)
#img_base = self.img_in(img_base.to(img_in_dtype))
#img_ids = self._get_img_ids(img, bsz, h_len, w_len, 0, h_len, 0, w_len)
img_ids = img_ids .repeat(bsz,1,1)
#img_y0_style = img_y0_style.repeat(1,bsz,1) # torch.cat([img, img_y0_style], dim=0)
img_y0_style = torch.cat([img_y0_style, img_kontext.repeat(bsz-1,1,1)], dim=1)
StyleMMDiT.KONTEXT = 2
for style_block in StyleMMDiT.double_blocks + StyleMMDiT.single_blocks:
style_block.KONTEXT = 2
for style_block_imgtxt in [style_block.img, getattr(style_block, "txt")]:
style_block_imgtxt.KONTEXT = 2
style_block_imgtxt.ATTN.KONTEXT = 2
StyleMMDiT.datashock_ref = None
img = torch.cat([img, img_y0_style], dim=0)
# txt_ids -> 1,414,3
txt_ids = torch.zeros((bsz, context.shape[-2], 3), device=img.device, dtype=x.dtype)
ids = torch.cat((txt_ids, img_ids), dim=-2) # ids -> 1,4446,3 # flipped from hidream
rope = self.pe_embedder(ids) # rope -> 1, 4446, 1, 64, 2, 2
txt_init = self.txt_in(context)
txt_init_len = txt_init.shape[-2] # 271
img = StyleMMDiT(img, "proj_in")
img = img.to(x) if img is not None else None
total_layers = len(self.double_blocks) + len(self.single_blocks)
# DOUBLE STREAM
ca_idx = 0
for bid, (block, style_block) in enumerate(zip(self.double_blocks, StyleMMDiT.double_blocks)):
txt = txt_init
if weight > 0 and mask is not None and weight < bid/total_layers:
img, txt_init = block(img, txt, clip, rope, mask_zero, style_block=style_block)
elif (weight < 0 and mask is not None and abs(weight) < (1 - bid/total_layers)):
img_tmpZ, txt_tmpZ = img.clone(), txt.clone()
# more efficient than the commented lines below being used instead in the loop?
img_tmpZ, txt_init = block(img_tmpZ, txt_tmpZ, clip, rope, mask, style_block=style_block)
img , txt_tmpZ = block(img , txt , clip, rope, mask_zero, style_block=style_block)
elif floor > 0 and mask is not None and floor > bid/total_layers:
mask_tmp = mask.clone()
mask_tmp[img_slice,img_slice] = 1.0
img, txt_init = block(img, txt, clip, rope, mask_tmp, style_block=style_block)
elif floor < 0 and mask is not None and abs(floor) > (1 - bid/total_layers):
mask_tmp = mask.clone()
mask_tmp[img_slice,img_slice] = 1.0
img, txt_init = block(img, txt, clip, rope, mask_tmp, style_block=style_block)
elif update_cross_attn is not None and update_cross_attn['skip_cross_attn']:
img, txt_init = block(img, txt, clip, rope, mask, update_cross_attn=update_cross_attn)
else:
img, txt_init = block(img, txt, clip, rope, mask, update_cross_attn=update_cross_attn, style_block=style_block)
if control is not None:
control_i = control.get("input")
if bid < len(control_i):
add = control_i[bid]
if add is not None:
img[:1] += add
if hasattr(self, "pulid_data"):
if self.pulid_data:
if bid % self.pulid_double_interval == 0:
for _, node_data in self.pulid_data.items():
if torch.any((node_data['sigma_start'] >= timestep) & (timestep >= node_data['sigma_end'])):
img = img + node_data['weight'] * self.pulid_ca[ca_idx](node_data['embedding'], img)
ca_idx += 1
# END DOUBLE STREAM
#img = img[0:1]
#txt_init = txt_init[0:1]
img = torch.cat([txt_init, img], dim=-2) # 4032 + 271 -> 4303 # txt embed from double stream block # flipped from hidream
double_layers = len(self.double_blocks)
# SINGLE STREAM
for bid, (block, style_block) in enumerate(zip(self.single_blocks, StyleMMDiT.single_blocks)):
if weight > 0 and mask is not None and weight < (bid+double_layers)/total_layers:
img = block(img, clip, rope, mask_zero, style_block=style_block)
elif weight < 0 and mask is not None and abs(weight) < (1 - (bid+double_layers)/total_layers):
img = block(img, clip, rope, mask_zero, style_block=style_block)
elif floor > 0 and mask is not None and floor > (bid+double_layers)/total_layers:
mask_tmp = mask.clone()
mask_tmp[img_slice,img_slice] = 1.0
img = block(img, clip, rope, mask_tmp, style_block=style_block)
elif floor < 0 and mask is not None and abs(floor) > (1 - (bid+double_layers)/total_layers):
mask_tmp = mask.clone()
mask_tmp[img_slice,img_slice] = 1.0
img = block(img, clip, rope, mask_tmp, style_block=style_block)
else:
img = block(img, clip, rope, mask, style_block=style_block)
if control is not None: # Controlnet
control_o = control.get("output")
if bid < len(control_o):
add = control_o[bid]
if add is not None:
img[:1, txt_slice, ...] += add
if hasattr(self, "pulid_data"):
# PuLID attention
if self.pulid_data:
real_img, txt = img[:, img_slice, ...], img[:, txt_slice, ...]
if bid % self.pulid_single_interval == 0:
# Will calculate influence of all nodes at once
for _, node_data in self.pulid_data.items():
if torch.any((node_data['sigma_start'] >= timestep) & (timestep >= node_data['sigma_end'])):
real_img = real_img + node_data['weight'] * self.pulid_ca[ca_idx](node_data['embedding'], real_img)
ca_idx += 1
img = torch.cat((txt, real_img), 1)
# END SINGLE STREAM
img = img[..., img_slice, :]
#img = self.final_layer(img, clip) # 4096,2560 -> 4096,64
shift, scale = self.final_layer.adaLN_modulation(clip).chunk(2,dim=1)
img = (1 + scale[:, None, :]) * self.final_layer.norm_final(img) + shift[:, None, :]
img = StyleMMDiT(img, "proj_out")
if y0_style_active and not RECON_MODE:
img = img[0:1]
#img = img[1:2]
#img = self.final_layer.linear(img.to(self.final_layer.linear.weight.data))
img = self.final_layer.linear(img)
#img = self.unpatchify(img, img_sizes)
img = img[:,:img_len] # accomodate kontext
img = rearrange(img, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=self.patch_size, pw=self.patch_size)
out_list.append(img)
output = torch.cat(out_list, dim=0)
eps = output[:, :, :h, :w]
if recon_iter == 1:
denoised = new_x - SIGMA.to(new_x) * eps.to(new_x)
if x_tmp is not None:
eps = (x_tmp - denoised.to(x_tmp)) / SIGMA.to(x_tmp)
else:
eps = (x_orig - denoised.to(x_orig)) / SIGMA.to(x_orig)
freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method")
freqsep_sigma = transformer_options.get("freqsep_sigma")
freqsep_kernel_size = transformer_options.get("freqsep_kernel_size")
freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size")
freqsep_stride = transformer_options.get("freqsep_stride")
freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight")
freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight")
freqsep_mask = transformer_options.get("freqsep_mask")
y0_style_pos = transformer_options.get("y0_style_pos")
y0_style_neg = transformer_options.get("y0_style_neg")
# end recon loop
self.style_dtype = torch.float32 if self.style_dtype is None else self.style_dtype
dtype = eps.dtype if self.style_dtype is None else self.style_dtype
if y0_style_pos is not None:
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight")
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight")
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_pos_mask = transformer_options.get("y0_style_pos_mask")
y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge")
y0_style_pos = y0_style_pos.to(dtype)
x = x_orig.to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised)
y0_adain_embed = self.Retrojector.embed(y0_style_pos)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if EO("scattersort_median_LP"):
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7))
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7))
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad)
denoised_spatial = denoised_spatial_LP + denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
if freqsep_mask is not None:
freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float()
freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact')
if hasattr(self, "adain_tile"):
tile_h, tile_w = self.adain_tile
denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if self.adain_flag:
h_off = tile_h // 2
w_off = tile_w // 2
denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off]
self.adain_flag = False
else:
h_off = 0
w_off = 0
self.adain_flag = True
tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w))
y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w))
tiles_out = []
for i in range(tiles.shape[0]):
tile = tiles[i].unsqueeze(0)
y0_tile = y0_tiles[i].unsqueeze(0)
tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
tile = adain_seq_inplace(tile, y0_tile)
tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w))
tiles_out_tensor = torch.cat(tiles_out, dim=0)
tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides)
if h_off == 0:
denoised_pretile = tiles_out_tensor
else:
denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor
denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"):
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median_pw":
denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight)
elif freqsep_lowpass_method == "gaussian_pw":
denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median":
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size)
elif freqsep_lowpass_method == "gaussian":
denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
if EO("adain_fs_uhp"):
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP
y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP
#denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP
denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP
denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
else:
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
if transformer_options.get('y0_standard_guide') is not None:
y0_standard_guide = transformer_options.get('y0_standard_guide')
y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide)
f_cs = self.StyleWCT.get(y0_standard_guide_embed)
self.y0_standard_guide = self.Retrojector.unembed(f_cs)
if transformer_options.get('y0_inv_standard_guide') is not None:
y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide')
y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide)
f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed)
self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs)
elif transformer_options['y0_style_method'] == "WCT2":
self.WaveletStyleWCT.set(y0_adain_embed, h_len, w_len)
denoised_embed = self.WaveletStyleWCT.get(denoised_embed, h_len, w_len)
if transformer_options.get('y0_standard_guide') is not None:
y0_standard_guide = transformer_options.get('y0_standard_guide')
y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide)
f_cs = self.WaveletStyleWCT.get(y0_standard_guide_embed, h_len, w_len)
self.y0_standard_guide = self.Retrojector.unembed(f_cs)
if transformer_options.get('y0_inv_standard_guide') is not None:
y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide')
y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide)
f_cs = self.WaveletStyleWCT.get(y0_inv_standard_guide_embed, h_len, w_len)
self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs)
denoised_approx = self.Retrojector.unembed(denoised_embed)
eps = (x - denoised_approx) / sigma
if not UNCOND:
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
else:
eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
elif eps.shape[0] == 1 and UNCOND:
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
#eps = eps.float()
if y0_style_neg is not None:
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight")
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight")
y0_style_neg_synweight *= y0_style_neg_weight
y0_style_neg_mask = transformer_options.get("y0_style_neg_mask")
y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge")
y0_style_neg = y0_style_neg.to(dtype)
x = x_orig.to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised)
y0_adain_embed = self.Retrojector.embed(y0_style_neg)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
elif transformer_options['y0_style_method'] == "WCT2":
self.WaveletStyleWCT.set(y0_adain_embed, h_len, w_len)
denoised_embed = self.WaveletStyleWCT.get(denoised_embed, h_len, w_len)
denoised_approx = self.Retrojector.unembed(denoised_embed)
if UNCOND:
eps = (x - denoised_approx) / sigma
eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
elif eps.shape[0] == 1 and not UNCOND:
eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0])
#eps = eps.float()
if EO("model_eps_out"):
self.eps_out = eps.clone()
return eps
def expand_timesteps(self, t, batch_size, device):
if not torch.is_tensor(t):
is_mps = device.type == "mps"
if isinstance(t, float):
dtype = torch.float32 if is_mps else torch.float64
else:
dtype = torch.int32 if is_mps else torch.int64
t = Tensor([t], dtype=dtype, device=device)
elif len(t.shape) == 0:
t = t[None].to(device)
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
t = t.expand(batch_size)
return t
def clone_inputs(*args, index: int=None):
if index is None:
return tuple(x.clone() for x in args)
else:
return tuple(x[index].unsqueeze(0).clone() for x in args)
================================================
FILE: flux/redux.py
================================================
import torch
import comfy.ops
import torch.nn
import torch.nn.functional as F
ops = comfy.ops.manual_cast
class ReReduxImageEncoder(torch.nn.Module):
def __init__(
self,
redux_dim: int = 1152,
txt_in_features: int = 4096,
device=None,
dtype=None,
) -> None:
super().__init__()
self.redux_dim = redux_dim
self.device = device
self.dtype = dtype
self.style_dtype = None
self.redux_up = ops.Linear(redux_dim, txt_in_features * 3, dtype=dtype)
self.redux_down = ops.Linear(txt_in_features * 3, txt_in_features, dtype=dtype)
def forward(self, sigclip_embeds) -> torch.Tensor:
projected_x = self.redux_down(torch.nn.functional.silu(self.redux_up(sigclip_embeds)))
return projected_x
def feature_match(self, cond, clip_vision_output, mode="WCT"):
sigclip_embeds = clip_vision_output.last_hidden_state
dense_embed = torch.nn.functional.silu(self.redux_up(sigclip_embeds))
t_sqrt = int(dense_embed.shape[-2] ** 0.5)
dense_embed_sq = dense_embed.view(dense_embed.shape[-3], t_sqrt, t_sqrt, dense_embed.shape[-1])
t_cond_sqrt = int(cond[0][0].shape[-2] ** 0.5)
dense_embed256 = F.interpolate(dense_embed_sq.transpose(-3,-1), size=(t_cond_sqrt,t_cond_sqrt), mode="bicubic")
dense_embed256 = dense_embed256.flatten(-2,-1).transpose(-2,-1)
dtype = self.style_dtype if hasattr(self, "style_dtype") and self.style_dtype is not None else dense_embed.dtype
pinv_dtype = torch.float32 if dtype != torch.float64 else dtype
W = self.redux_down.weight.data.to(dtype) # shape [2560, 64]
b = self.redux_down.bias.data.to(dtype) # shape [2560]
cond_256 = cond[0][0].clone()
if not hasattr(self, "W_pinv"):
self.W_pinv = torch.linalg.pinv(W.to(pinv_dtype).cuda()).to(W)
#cond_256_embed = (cond_256 - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype)
cond_embed256 = (cond_256 - b.to(cond_256)) @ self.W_pinv.T.to(cond_256)
if mode == "AdaIN":
cond_embed256 = adain_seq_inplace(cond_embed256, dense_embed256)
#for adain_iter in range(EO("style_iter", 0)):
# cond_embed256 = adain_seq_inplace(cond_embed256, dense_embed256)
# cond_embed256 = (cond_embed256 - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype)
# cond_embed256 = F.linear(cond_embed256 .to(W), W, b).to(img)
# cond_embed256 = adain_seq_inplace(cond_embed256, dense_embed256)
elif mode == "WCT":
if not hasattr(self, "dense_embed256") or self.dense_embed256 is None or self.dense_embed256.shape != dense_embed256.shape or torch.norm(self.dense_embed256 - dense_embed256) > 0:
self.dense_embed256 = dense_embed256
f_s = dense_embed256[0].clone()
self.mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - self.mu_s
cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh((cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)).cuda())
S_eig = S_eig.to(cov)
U_eig = U_eig.to(cov)
S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values
whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T
self.y0_color = whiten.to(f_s_centered)
for wct_i in range(cond_embed256.shape[-3]):
f_c = cond_embed256[wct_i].clone()
mu_c = f_c.mean(dim=0, keepdim=True)
f_c_centered = f_c - mu_c
cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh((cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)).cuda())
S_eig = S_eig.to(cov)
U_eig = U_eig.to(cov)
inv_sqrt_eig = S_eig.clamp(min=0).rsqrt()
whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T
whiten = whiten.to(f_c_centered)
f_c_whitened = f_c_centered @ whiten.T
f_cs = f_c_whitened @ self.y0_color.T + self.mu_s
cond_embed256[wct_i] = f_cs
cond[0][0] = self.redux_down(cond_embed256)
return (cond,)
def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
mean_c = content.mean(1, keepdim=True)
std_c = content.std (1, keepdim=True).add_(eps)
mean_s = style.mean (1, keepdim=True)
std_s = style.std (1, keepdim=True).add_(eps)
content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s)
return content
def adain_seq(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
return ((content - content.mean(1, keepdim=True)) / (content.std(1, keepdim=True) + eps)) * (style.std(1, keepdim=True) + eps) + style.mean(1, keepdim=True)
================================================
FILE: helper.py
================================================
import torch
import torch.nn.functional as F
from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar, List
import re
import functools
import copy
from comfy.samplers import SCHEDULER_NAMES
from .res4lyf import RESplain
# EXTRA_OPTIONS OPS
class ExtraOptions():
def __init__(self, extra_options):
self.extra_options = extra_options
self.mute = False
# debugMode 0: Follow self.mute only
# debugMode 1: Print with debug flag if not muted
# debugMode 2: Never print
def __call__(self, option, default=None, ret_type=None, match_all_flags=False, debugMode=0):
if isinstance(option, (tuple, list)):
if match_all_flags:
return all(self(single_option, default, ret_type) for single_option in option)
else:
return any(self(single_option, default, ret_type) for single_option in option)
if default is None: # get flag
pattern = rf"^(?:{re.escape(option)}\s*$|{re.escape(option)}=)"
return bool(re.search(pattern, self.extra_options, flags=re.MULTILINE))
elif ret_type is None:
ret_type = type(default)
if ret_type.__module__ != "builtins":
mod = __import__(default.__module__)
ret_type = lambda v: getattr(mod, v, None)
if ret_type == list:
pattern = rf"^{re.escape(option)}\s*=\s*([a-zA-Z0-9_.,+-]+)\s*$"
match = re.search(pattern, self.extra_options, flags=re.MULTILINE)
if match:
value = match.group(1)
if not self.mute and debugMode != 2:
if debugMode == 1:
RESplain("Set extra_option: ", option, "=", value, debug=True)
else:
RESplain("Set extra_option: ", option, "=", value)
else:
value = default
if type(value) == str:
value = value.split(',')
if type(default[0]) == type:
ret_type = default[0]
else:
ret_type = type(default[0])
value = [ret_type(value[_]) for _ in range(len(value))]
else:
pattern = rf"^{re.escape(option)}\s*=\s*([a-zA-Z0-9_.+-]+)\s*$"
match = re.search(pattern, self.extra_options, flags=re.MULTILINE)
if match:
if ret_type == bool:
value_str = match.group(1).lower()
value = value_str in ("true", "1", "yes", "on")
else:
value = ret_type(match.group(1))
if not self.mute and debugMode != 2:
if debugMode == 1:
RESplain("Set extra_option: ", option, "=", value, debug=True)
else:
RESplain("Set extra_option: ", option, "=", value)
else:
value = default
return value
def extra_options_flag(flag, extra_options):
pattern = rf"^(?:{re.escape(flag)}\s*$|{re.escape(flag)}=)"
return bool(re.search(pattern, extra_options, flags=re.MULTILINE))
def get_extra_options_kv(key, default, extra_options, ret_type=None):
ret_type = type(default) if ret_type is None else ret_type
pattern = rf"^{re.escape(key)}\s*=\s*([a-zA-Z0-9_.+-]+)\s*$"
match = re.search(pattern, extra_options, flags=re.MULTILINE)
if match:
value = match.group(1)
else:
value = default
return ret_type(value)
def get_extra_options_list(key, default, extra_options, ret_type=None):
default = [default] if type(default) != list else default
#ret_type = type(default) if ret_type is None else ret_type
ret_type = type(default[0]) if ret_type is None else ret_type
pattern = rf"^{re.escape(key)}\s*=\s*([a-zA-Z0-9_.,+-]+)\s*$"
match = re.search(pattern, extra_options, flags=re.MULTILINE)
if match:
value = match.group(1)
else:
value = default
if type(value) == str:
value = value.split(',')
value = [ret_type(value[_]) for _ in range(len(value))]
return value
class OptionsManager:
APPEND_OPTIONS = {"extra_options"}
def __init__(self, options, **kwargs):
self.options_list = []
if options is not None:
self.options_list.append(options)
for key, value in kwargs.items():
if key.startswith('options') and value is not None:
self.options_list.append(value)
self._merged_dict = None
def add_option(self, option):
"""Add a single options dictionary"""
if option is not None:
self.options_list.append(option)
self._merged_dict = None # invalidate cached merged options
@property
def merged(self):
"""Get merged options with proper priority handling"""
if self._merged_dict is None:
self._merged_dict = {}
special_string_options = {
key: [] for key in self.APPEND_OPTIONS
}
for options_dict in self.options_list:
if options_dict is not None:
for key, value in options_dict.items():
if key in self.APPEND_OPTIONS and value:
special_string_options[key].append(value)
elif isinstance(value, dict):
# Deep merge dictionaries
if key not in self._merged_dict:
self._merged_dict[key] = {}
if isinstance(self._merged_dict[key], dict):
self._deep_update(self._merged_dict[key], value)
else:
self._merged_dict[key] = value.copy()
# Special case for FrameWeightsManager
elif key == "frame_weights_mgr" and hasattr(value, "_weight_configs"):
if key not in self._merged_dict:
self._merged_dict[key] = copy.deepcopy(value)
else:
existing_mgr = self._merged_dict[key]
if hasattr(value, "device") and value.device != torch.device('cpu'):
existing_mgr.device = value.device
if hasattr(value, "dtype") and value.dtype != torch.float64:
existing_mgr.dtype = value.dtype
# Merge all weight_configs
if hasattr(value, "_weight_configs"):
for name, config in value._weight_configs.items():
config_kwargs = config.copy()
existing_mgr.add_weight_config(name, **config_kwargs)
else:
self._merged_dict[key] = value
# append special case string options (e.g. extra_options)
for key, value in special_string_options.items():
if value:
self._merged_dict[key] = "\n".join(value)
return self._merged_dict
def update(self, key_or_dict, value=None, append=False):
"""Update options with a single key-value pair or a dictionary"""
if value is not None or isinstance(key_or_dict, (str, list)):
# single key-value update
key_path = key_or_dict
if isinstance(key_path, str):
key_path = key_path.split('.')
update_dict = {}
current = update_dict
for i, key in enumerate(key_path[:-1]):
current[key] = {}
current = current[key]
current[key_path[-1]] = value
self.add_option(update_dict)
else:
# dictionary update
flat_updates = {}
def _flatten_dict(d, prefix=""):
for key, value in d.items():
full_key = f"{prefix}.{key}" if prefix else key
if isinstance(value, dict):
_flatten_dict(value, full_key)
else:
flat_updates[full_key] = value
_flatten_dict(key_or_dict)
for key_path, value in flat_updates.items():
self.update(key_path, value) # Recursive call
return self
def get(self, key, default=None):
return self.merged.get(key, default)
def _deep_update(self, target_dict, source_dict):
for key, value in source_dict.items():
if isinstance(value, dict) and key in target_dict and isinstance(target_dict[key], dict):
# recursive dict update
self._deep_update(target_dict[key], value)
else:
target_dict[key] = value
def __getitem__(self, key):
"""Allow dictionary-like access to options"""
return self.merged[key]
def __contains__(self, key):
"""Allow 'in' operator for options"""
return key in self.merged
def as_dict(self):
"""Return the merged options as a dictionary"""
return self.merged.copy()
def __bool__(self):
"""Return True if there are any options"""
return len(self.options_list) > 0 and any(opt is not None for opt in self.options_list)
def debug_print_options(self):
for i, options_dict in enumerate(self.options_list):
RESplain(f"Options {i}:", debug=True)
if options_dict is not None:
for key, value in options_dict.items():
RESplain(f" {key}: {value}", debug=True)
else:
RESplain(" None", "\n", debug=True)
# MISCELLANEOUS OPS
def has_nested_attr(obj, attr_path):
attrs = attr_path.split('.')
for attr in attrs:
if not hasattr(obj, attr):
return False
obj = getattr(obj, attr)
return True
def safe_get_nested(d, keys, default=None):
for key in keys:
if isinstance(d, dict):
d = d.get(key, default)
else:
return default
return d
class AlwaysTrueList:
def __contains__(self, item):
return True
def __iter__(self):
while True:
yield True # kapow
def parse_range_string(s):
if "all" in s:
return AlwaysTrueList()
result = []
for part in s.split(','):
part = part.strip()
if not part:
continue
val = float(part) if '.' in part else int(part)
result.append(val)
return result
def parse_range_string_int(s):
if "all" in s:
return AlwaysTrueList()
result = []
for part in s.split(','):
if '-' in part:
start, end = part.split('-')
result.extend(range(int(start), int(end) + 1))
elif part.strip() != '':
result.append(int(part))
return result
def parse_tile_sizes(tile_sizes: str):
"""
Converts multiline string like:
"1024,1024\n768,1344\n1344,768"
into:
[(1024, 1024), (768, 1344), (1344, 768)]
"""
return [tuple(map(int, line.strip().split(',')))
for line in tile_sizes.strip().splitlines()
if line.strip()]
# COMFY OPS
def is_video_model(model):
is_video_model = False
try :
is_video_model = 'video' in model.inner_model.inner_model.model_config.unet_config['image_model'] or \
'cosmos' in model.inner_model.inner_model.model_config.unet_config['image_model'] or \
'wan2' in model.inner_model.inner_model.model_config.unet_config['image_model'] or \
'ltxv' in model.inner_model.inner_model.model_config.unet_config['image_model']
except:
pass
return is_video_model
def is_RF_model(model):
from comfy import model_sampling
modelsampling = model.inner_model.inner_model.model_sampling
return isinstance(modelsampling, model_sampling.CONST)
def get_res4lyf_scheduler_list():
scheduler_names = SCHEDULER_NAMES.copy()
if "beta57" not in scheduler_names:
scheduler_names.append("beta57")
return scheduler_names
def move_to_same_device(*tensors):
if not tensors:
return tensors
device = tensors[0].device
return tuple(tensor.to(device) for tensor in tensors)
def conditioning_set_values(conditioning, values={}):
c = []
for t in conditioning:
n = [t[0], t[1].copy()]
for k in values:
n[1][k] = values[k]
c.append(n)
return c
# MISC OPS
def initialize_or_scale(tensor, value, steps):
if tensor is None:
return torch.full((steps,), value)
else:
return value * tensor
def pad_tensor_list_to_max_len(tensors: List[torch.Tensor], dim: int = -2) -> List[torch.Tensor]:
"""Zero-pad each tensor in `tensors` along `dim` up to their common maximum length."""
max_len = max(t.shape[dim] for t in tensors)
padded = []
for t in tensors:
cur = t.shape[dim]
if cur < max_len:
pad_shape = list(t.shape)
pad_shape[dim] = max_len - cur
zeros = torch.zeros(*pad_shape, dtype=t.dtype, device=t.device)
t = torch.cat((t, zeros), dim=dim)
padded.append(t)
return padded
class PrecisionTool:
def __init__(self, cast_type='fp64'):
self.cast_type = cast_type
def cast_tensor(self, func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if self.cast_type not in ['fp64', 'fp32', 'fp16']:
return func(*args, **kwargs)
target_device = None
for arg in args:
if torch.is_tensor(arg):
target_device = arg.device
break
if target_device is None:
for v in kwargs.values():
if torch.is_tensor(v):
target_device = v.device
break
# recursively zs_recast tensors in nested dictionaries
def cast_and_move_to_device(data):
if torch.is_tensor(data):
if self.cast_type == 'fp64':
return data.to(torch.float64).to(target_device)
elif self.cast_type == 'fp32':
return data.to(torch.float32).to(target_device)
elif self.cast_type == 'fp16':
return data.to(torch.float16).to(target_device)
elif isinstance(data, dict):
return {k: cast_and_move_to_device(v) for k, v in data.items()}
return data
new_args = [cast_and_move_to_device(arg) for arg in args]
new_kwargs = {k: cast_and_move_to_device(v) for k, v in kwargs.items()}
return func(*new_args, **new_kwargs)
return wrapper
def set_cast_type(self, new_value):
if new_value in ['fp64', 'fp32', 'fp16']:
self.cast_type = new_value
else:
self.cast_type = 'fp64'
precision_tool = PrecisionTool(cast_type='fp64')
class FrameWeightsManager:
def __init__(self):
self._weight_configs = {}
self._default_config = {
"frame_weights": None, # Tensor of weights if directly specified
"dynamics": "linear", # Function type for dynamic period
"schedule": "moderate_early", # Schedule type
"scale": 0.5, # Amount of change
"is_reversed": False, # Whether to reverse weights
"custom_string": None, # Per-configuration custom string
}
self.dtype = torch.float64
self.device = torch.device('cpu')
def set_device_and_dtype(self, device=None, dtype=None):
"""Set the device and dtype for generated weights"""
if device is not None:
self.device = device
if dtype is not None:
self.dtype = dtype
return self
def set_custom_weights(self, config_name, weights):
"""Set custom weights for a specific configuration"""
if config_name not in self._weight_configs:
self._weight_configs[config_name] = self._default_config.copy()
self._weight_configs[config_name]["frame_weights"] = weights
return self
def add_weight_config(self, name, **kwargs):
if name not in self._weight_configs:
self._weight_configs[name] = self._default_config.copy()
for key, value in kwargs.items():
if key in self._default_config:
self._weight_configs[name][key] = value
# ignore unknown parameters
return self
def get_weight_config(self, name):
if name not in self._weight_configs:
return None
return self._weight_configs[name].copy()
def get_frame_weights_by_name(self, name, num_frames, step=None):
config = self.get_weight_config(name)
if config is None:
return None
weights_tensor = self._generate_frame_weights(
num_frames,
config["dynamics"],
config["schedule"],
config["scale"],
config["is_reversed"],
config["frame_weights"],
step=step,
custom_string=config["custom_string"]
)
if config["custom_string"] is not None and config["custom_string"].strip() != "" and weights_tensor is not None:
# ensure that the custom_string has more than just lines that begin with non-numeric characters
custom_string = config["custom_string"].strip()
custom_string = re.sub(r"^[^0-9].*", "", custom_string, flags=re.MULTILINE)
custom_string = re.sub(r"^\s*$", "", custom_string, flags=re.MULTILINE)
if custom_string.strip() != "":
# If the custom_string is not empty, show the custom weights
formatted_weights = [f"{w:.2f}" for w in weights_tensor.tolist()]
RESplain(f"Custom '{name}' for step {step}: {formatted_weights}", debug=True)
elif weights_tensor is None:
weights_tensor = torch.ones(num_frames, dtype=self.dtype, device=self.device)
return weights_tensor
def _generate_custom_weights(self, num_frames, custom_string, step=None):
"""
Generate custom weights based on the provided frame weights from a string with one line per step.
Args:
num_frames: Number of frames to generate weights for
custom_string: The custom weights string to parse
step: Specific step to use (0-indexed). If None, uses the last line.
Features:
- Each line represents weights for one step
- Add *[multiplier] at the end of a line to scale those weights (e.g., "1.0, 0.8, 0.6*1.5")
- Include "interpolate" on its own line to interpolate each line to match num_frames
- Prefix line with the steps to apply it to (e.g. "0-5: 1.0, 0.8, 0.6")
Example:
0-5:1.0, 0.8, 0.6, 0.4, 0.2, 0.0
6-10:0.0, 0.2, 0.4, 0.6, 0.8, 1.0*1.5
11-30:0.0, 0.5, 1.0, 0.5, 0.0, 0.0*0.8
interpolate
"""
if custom_string is not None:
interpolate_frames = "interpolate" in custom_string
lines = custom_string.strip().split('\n')
lines = [line for line in lines if line.strip() and not line.strip().startswith("interp")]
if not lines:
return None
if step is not None:
matching_line = None
for line in lines:
# Check if line has a step range prefix
step_range_match = re.match(r'^(\d+)-(\d+):(.*)', line.strip())
if step_range_match:
start_step = int(step_range_match.group(1))
end_step = int(step_range_match.group(2))
if start_step <= step <= end_step:
matching_line = step_range_match.group(3).strip()
if matching_line is not None:
weights_str = matching_line
else:
# if no matching line, try to use the step number line or the last line
if step < len(lines):
line_index = step
else:
line_index = len(lines) - 1
if line_index < 0:
return None
weights_str = lines[line_index].strip()
if ":" in weights_str:
weights_str = weights_str.split(":", 1)[1].strip()
else:
# When no specific step is provided, use the last line
line_index = len(lines) - 1
weights_str = lines[line_index].strip()
if ":" in weights_str:
weights_str = weights_str.split(":", 1)[1].strip()
if not weights_str:
return None
multiplier = 1.0
if "*" in weights_str:
parts = weights_str.rsplit("*", 1)
if len(parts) == 2:
weights_str = parts[0].strip()
try:
multiplier = float(parts[1].strip())
except ValueError as e:
RESplain(f"Invalid multiplier format: {parts[1]}")
try:
weights = [float(w.strip()) for w in weights_str.split(',')]
weights_tensor = torch.tensor(weights, dtype=self.dtype, device=self.device)
if multiplier != 1.0:
weights_tensor = weights_tensor * multiplier
if interpolate_frames and len(weights_tensor) != num_frames:
if len(weights_tensor) > 1:
orig_positions = torch.linspace(0, 1, len(weights_tensor), dtype=self.dtype, device=self.device)
new_positions = torch.linspace(0, 1, num_frames, dtype=self.dtype, device=self.device)
weights_tensor = torch.nn.functional.interpolate(
weights_tensor.view(1, 1, -1),
size=num_frames,
mode='linear',
align_corners=True
).squeeze()
else:
# If only one weight, repeat it for all frames
weights_tensor = weights_tensor.repeat(num_frames)
else:
if len(weights_tensor) < num_frames:
# If fewer weights than frames, repeat the last weight
weights_tensor = torch.cat([
weights_tensor,
torch.full((num_frames - len(weights_tensor),), weights_tensor[-1],
dtype=self.dtype, device=self.device)
])
# Trim if too many weights
if len(weights_tensor) > num_frames:
weights_tensor = weights_tensor[:num_frames]
return weights_tensor
except (ValueError, IndexError) as e:
RESplain(f"Error parsing custom frame weights: {e}")
return None
return None
def _generate_frame_weights(self, num_frames, dynamics, schedule, scale, is_reversed, frame_weights, step=None, custom_string=None):
# Look for the multiplier= parameter in the custom string and store it as a float value
multiplier = None
rate_factor = None
start_change_factor = None
if custom_string is not None:
if "multiplier" in custom_string:
multiplier_match = re.search(r"multiplier\s*=\s*([0-9.]+)", custom_string)
if multiplier_match:
multiplier = float(multiplier_match.group(1))
# Remove the multiplier= from the custom string
custom_string = re.sub(r"multiplier\s*=\s*[0-9.]+", "", custom_string).strip()
RESplain(f"Custom multiplier detected: {multiplier}", debug=True)
if "rate_factor" in custom_string:
rate_factor_match = re.search(r"rate_factor\s*=\s*([0-9.]+)", custom_string)
if rate_factor_match:
rate_factor = float(rate_factor_match.group(1))
# Remove the rate_factor= from the custom string
custom_string = re.sub(r"rate_factor\s*=\s*[0-9.]+", "", custom_string).strip()
RESplain(f"Custom rate factor detected: {rate_factor}", debug=True)
if "start_change_factor" in custom_string:
start_change_factor_match = re.search(r"start_change_factor\s*=\s*([0-9.]+)", custom_string)
if start_change_factor_match:
start_change_factor = float(start_change_factor_match.group(1))
# Remove the start_change_factor= from the custom string
custom_string = re.sub(r"start_change_factor\s*=\s*[0-9.]+", "", custom_string).strip()
RESplain(f"Custom start change factor detected: {start_change_factor}", debug=True)
if custom_string is not None and custom_string.strip() != "" and step is not None:
custom_weights = self._generate_custom_weights(num_frames, custom_string, step)
if custom_weights is not None:
weights = custom_weights
weights = torch.flip(weights, [0]) if is_reversed else weights
return weights
else:
RESplain("custom frame weights failed to parse, doing the normal thing...", debug=True)
if rate_factor is None:
if "fast" in schedule:
rate_factor = 0.25
elif "slow" in schedule:
rate_factor = 1.0
else: # moderate
rate_factor = 0.5
if start_change_factor is None:
if "early" in schedule:
start_change_factor = 0.0
elif "late" in schedule:
start_change_factor = 0.2
else:
start_change_factor = 0.0
change_frames = max(round(num_frames * rate_factor), 2)
change_start = round(num_frames * start_change_factor)
low_value = 1.0 - scale
if frame_weights is not None:
weights = torch.cat([frame_weights, torch.full((num_frames,), frame_weights[-1])])
weights = weights[:num_frames]
else:
if dynamics == "constant":
weights = self._generate_constant_schedule(change_start, change_frames, low_value, num_frames)
elif dynamics == "linear":
weights = self._generate_linear_schedule(change_start, change_frames, low_value, num_frames)
elif dynamics == "ease_out":
weights = self._generate_easeout_schedule(change_start, change_frames, low_value, num_frames)
elif dynamics == "ease_in":
weights = self._generate_easein_schedule(change_start, change_frames, low_value, num_frames)
elif dynamics == "middle":
weights = self._generate_middle_schedule(change_start, change_frames, low_value, num_frames)
elif dynamics == "trough":
weights = self._generate_trough_schedule(change_start, change_frames, low_value, num_frames)
else:
raise ValueError(f"Invalid schedule: {dynamics}")
if multiplier is None:
multiplier = 1.0
weights = torch.flip(weights, [0]) if is_reversed else weights
weights = weights * multiplier
weights = torch.clamp(weights, min=0.0, max=(max(1.0, multiplier)))
weights = weights.to(dtype=self.dtype, device=self.device)
return weights
def _generate_constant_schedule(self, change_start, change_frames, low_value, num_frames):
"""constant schedule with the scale as the low weight"""
return torch.ones(num_frames) * low_value
def _generate_linear_schedule(self, change_start, change_frames, low_value, num_frames):
"""linear schedule from 1 to the low weight"""
weights = torch.linspace(1, low_value, change_frames)
weights = torch.cat([torch.full((change_start,), 1.0), weights])
weights = torch.cat([weights, torch.full((num_frames,), weights[-1])])
weights = weights[:num_frames]
return weights
def _generate_easeout_schedule(self, change_start, change_frames, low_value, num_frames, k=4.0):
"""exponential schedule from 1 to the low weight"""
change_frames = max(change_frames, 4)
t = torch.linspace(0, 1, change_frames, dtype=self.dtype, device=self.device)
weights = 1.0 - (1.0 - low_value) * (1.0 - torch.exp(-k * t))
weights = torch.cat([torch.full((change_start,), 1.0), weights])
weights = torch.cat([weights, torch.full((num_frames,), weights[-1])])
weights = weights[:num_frames]
return weights
def _generate_easein_schedule(self, change_start, change_frames, low_value, num_frames):
"""a monomial power schedule from 1 to the low weight"""
change_frames = max(change_frames, 4)
t = torch.linspace(0, 1, change_frames, dtype=self.dtype, device=self.device)
weights = 1 - (1 - low_value) * torch.pow(t, 2)
# Prepend with change_start frames of 1.0
weights = torch.cat([torch.full((change_start,), 1.0), weights])
total_frames_to_pad = num_frames - len(weights)
if (total_frames_to_pad > 1):
mid_value_between_low_value_and_second_to_last_value = (weights[-2] + low_value) / 2.0
weights[-1] = mid_value_between_low_value_and_second_to_last_value
# Fill remaining with final value
weights = torch.cat([weights, torch.full((num_frames,), weights[-1])])
weights = weights[:num_frames]
return weights
def _generate_middle_schedule(self, change_start, change_frames, low_value, num_frames):
"""gaussian middle peaking schedule from 1 to the low weight"""
change_frames = max(change_frames, 4)
t = torch.linspace(0, 1, change_frames, dtype=self.dtype, device=self.device)
weights = torch.exp(-0.5 * ((t - 0.5) / 0.2) ** 2)
weights = weights / torch.max(weights)
weights = low_value + (1 - low_value) * weights
total_frames_to_pad = num_frames - len(weights)
pad_left = total_frames_to_pad // 2
pad_right = total_frames_to_pad - pad_left
weights = torch.cat([torch.full((pad_left,), low_value), weights, torch.full((pad_right,), low_value)])
if change_start > 0:
# Pad the beginning with the first value, and truncate to num_frames
weights = torch.cat([torch.full((change_start,), low_value), weights])
weights = weights[:num_frames]
return weights
def _generate_trough_schedule(self, change_start, change_frames, low_value, num_frames):
"""
Trough schedule with both ends at 1 and the middle at the low weight.
When change_start > 0, creates asymmetry with shorter decay at beginning and longer at end.
"""
change_frames = max(change_frames, 4)
# Calculate sigma based on change_frames - controls overall decay rate
sigma = max(0.2, change_frames / num_frames)
if change_start == 0:
t = torch.linspace(-1, 1, num_frames, dtype=self.dtype, device=self.device)
else:
asymmetry_factor = min(0.5, change_start / num_frames)
split_point = 0.5 - asymmetry_factor
first_size = int(split_point * num_frames)
first_size = max(1, first_size) # at least one frame
t1 = torch.linspace(-1, 0, first_size, dtype=self.dtype, device=self.device)
second_size = num_frames - first_size
t2 = torch.linspace(0, 1, second_size, dtype=self.dtype, device=self.device)
t = torch.cat([t1, t2])
# shape using Gaussian function
trough = 1.0 - torch.exp(-0.5 * (t / sigma) ** 2)
weights = low_value + (1.0 - low_value) * trough
return weights
def check_projection_consistency(x, W, b):
W_pinv = torch.linalg.pinv(W.T)
x_proj = (x - b) @ W_pinv
x_recon = x_proj @ W.T + b
error = torch.norm(x - x_recon)
in_subspace = error < 1e-3
return error, in_subspace
def get_max_dtype(device='cpu'):
if torch.backends.mps.is_available():
MAX_DTYPE = torch.float32
else:
try:
torch.tensor([0.0], dtype=torch.float64, device=device)
MAX_DTYPE = torch.float64
except (RuntimeError, TypeError):
MAX_DTYPE = torch.float32
return MAX_DTYPE
================================================
FILE: helper_sigma_preview_image_preproc.py
================================================
import torch
import torch.nn.functional as F
from typing import Optional, Callable, Tuple, Dict, Any, Union
import numpy as np
import folder_paths
from PIL.PngImagePlugin import PngInfo
from PIL import Image
import json
import os
import random
import copy
from io import BytesIO
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg') # use the Agg backend for non-interactive rendering... prevent crashes by not using tkinter (which requires running in the main thread)
from comfy.cli_args import args
import comfy.samplers
import comfy.utils
from nodes import MAX_RESOLUTION
from .beta.rk_method_beta import RK_Method_Beta
from .beta.rk_noise_sampler_beta import RK_NoiseSampler, NOISE_MODE_NAMES
from .helper import get_res4lyf_scheduler_list
from .sigmas import get_sigmas
from .images import image_resize
from .res4lyf import RESplain
class SaveImage:
def __init__(self):
self.output_dir = folder_paths.get_output_directory()
self.type = "output"
self.prefix_append = ""
self.compress_level = 4
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"images": ("IMAGE", { "tooltip": "The images to save."}),
"filename_prefix": ("STRING", {"default": "ComfyUI", "tooltip": "The prefix for the file to save. This may include formatting information such as %date:yyyy-MM-dd% or %Empty Latent Image.width% to include values from nodes."})
},
"hidden": {
"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"
},
}
RETURN_TYPES = ()
FUNCTION = "save_images"
OUTPUT_NODE = True
CATEGORY = "image"
DESCRIPTION = "Saves the input images to your ComfyUI output directory."
def save_images(self,
images,
filename_prefix = "ComfyUI",
prompt = None,
extra_pnginfo = None
):
filename_prefix += self.prefix_append
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir, images[0].shape[1], images[0].shape[0])
results = list()
for (batch_number, image) in enumerate(images):
i = 255. * image.cpu().numpy()
img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8))
metadata = None
if not args.disable_metadata:
metadata = PngInfo()
if prompt is not None:
metadata.add_text("prompt", json.dumps(prompt))
if extra_pnginfo is not None:
for x in extra_pnginfo:
metadata.add_text(x, json.dumps(extra_pnginfo[x]))
filename_with_batch_num = filename.replace("%batch_num%", str(batch_number))
file = f"{filename_with_batch_num}_{counter:05}_.png"
img.save(os.path.join(full_output_folder, file), pnginfo=metadata, compress_level=self.compress_level)
results.append({
"filename": file,
"subfolder": subfolder,
"type": self.type
})
counter += 1
return { "ui": { "images": results } }
# adapted from https://github.com/Extraltodeus/sigmas_tools_and_the_golden_scheduler
class SigmasPreview(SaveImage):
def __init__(self):
self.output_dir = folder_paths.get_temp_directory()
self.type = "temp"
self.prefix_append = "_temp_" + ''.join(random.choice("abcdefghijklmnopqrstupvxyz1234567890") for x in range(5))
self.compress_level = 4
@classmethod
def INPUT_TYPES(self):
return {
"required": {
"sigmas": ("SIGMAS",),
"print_as_list" : ("BOOLEAN", {"default": False}),
"line_color": ("STRING", {"default": "blue"}),
},
}
RETURN_TYPES = ("IMAGE",)
FUNCTION = "sigmas_preview"
OUTPUT_NODE = True
CATEGORY = 'RES4LYF/sigmas'
@staticmethod
def tensor_to_graph_image(tensor, color='blue'):
plt.figure()
plt.plot(tensor.numpy(), marker='o', linestyle='-', color=color)
plt.title("Graph from Tensor")
plt.xlabel("Step Number")
plt.ylabel("Sigma Value")
with BytesIO() as buf:
plt.savefig(buf, format='png')
buf.seek(0)
image = Image.open(buf).copy()
plt.close()
return image
def sigmas_preview(self, sigmas, print_as_list, line_color):
if print_as_list:
# Convert to list with 4 decimal places
sigmas_list = [round(float(s), 4) for s in sigmas.tolist()]
# Print header using RESplain
RESplain("\n" + "="*60)
RESplain("SIGMAS PREVIEW - PRINT LIST")
RESplain("="*60)
# Print basic stats
RESplain(f"Total steps: {len(sigmas_list)}")
RESplain(f"Min sigma: {min(sigmas_list):.4f}")
RESplain(f"Max sigma: {max(sigmas_list):.4f}")
# Print the clean sigma values
RESplain(f"\nSigma values ({len(sigmas_list)} steps):")
RESplain("-" * 40)
# Print in rows of 5 for readability
for i in range(0, len(sigmas_list), 5):
row = sigmas_list[i:i+5]
row_str = " ".join(f"{val:8.4f}" for val in row)
RESplain(f"Step {i:2d}-{min(i+4, len(sigmas_list)-1):2d}: {row_str}")
# Calculate and print percentages (normalized 0-1)
sigmas_percentages = ((sigmas-sigmas.min())/(sigmas.max()-sigmas.min())).tolist()
sigmas_percentages = [round(p, 4) for p in sigmas_percentages]
RESplain(f"\nNormalized percentages (0.0-1.0):")
RESplain("-" * 40)
# Print step-by-step breakdown
RESplain("Step | Sigma | Normalized | Step Size")
RESplain("-----|----------|------------|----------")
for i, (sigma, pct) in enumerate(zip(sigmas_list, sigmas_percentages)):
if i > 0:
step_size = sigmas_list[i-1] - sigma
RESplain(f"{i:4d} | {sigma:8.4f} | {pct:10.4f} | {step_size:8.4f}")
else:
RESplain(f"{i:4d} | {sigma:8.4f} | {pct:10.4f} | {'--':>8}")
RESplain("="*60 + "\n")
sigmas_graph = self.tensor_to_graph_image(sigmas.cpu(), line_color)
numpy_image = np.array(sigmas_graph)
numpy_image = numpy_image / 255.0
tensor_image = torch.from_numpy(numpy_image)
tensor_image = tensor_image.unsqueeze(0)
images_tensor = torch.cat([tensor_image], 0)
output = self.save_images(images_tensor, "SigmasPreview")
output["result"] = (images_tensor,)
return output
class VAEEncodeAdvanced:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"resize_to_input": (["false", "image_1", "image_2", "mask", "latent"], {"default": "false"},),
"width": ("INT", {"default": 1024, "min": 0, "max": MAX_RESOLUTION, "step": 1, }),
"height": ("INT", {"default": 1024, "min": 0, "max": MAX_RESOLUTION, "step": 1, }),
"mask_channel": (["red", "green", "blue", "alpha"],),
"invert_mask": ("BOOLEAN", {"default": False}),
"latent_type": (["4_channels", "16_channels"], {"default": "16_channels",}),
},
"optional": {
"image_1": ("IMAGE",),
"image_2": ("IMAGE",),
"mask": ("IMAGE",),
"latent": ("LATENT",),
"vae": ("VAE", ),
}
}
RETURN_TYPES = ("LATENT",
"LATENT",
"MASK",
"LATENT",
"INT",
"INT",
)
RETURN_NAMES = ("latent_1",
"latent_2",
"mask",
"empty_latent",
"width",
"height",
)
FUNCTION = "main"
CATEGORY = "RES4LYF/vae"
def main(self,
width,
height,
resize_to_input = "false",
image_1 = None,
image_2 = None,
mask = None,
invert_mask = False,
method = "stretch",
interpolation = "lanczos",
condition = "always",
multiple_of = 0,
keep_proportion = False,
mask_channel = "red",
latent = None,
latent_type = "16_channels",
vae = None
):
ratio = 8 # latent compression factor
# this is unfortunately required to avoid apparent non-deterministic outputs.
# without setting the seed each time, the outputs of the VAE encode will change with every generation.
torch .manual_seed (42)
torch.cuda.manual_seed_all(42)
image_1 = image_1.clone() if image_1 is not None else None
image_2 = image_2.clone() if image_2 is not None else None
if latent is not None and resize_to_input == "latent":
height, width = latent['samples'].shape[-2:]
#height, width = latent['samples'].shape[2:4]
height, width = height * ratio, width * ratio
elif image_1 is not None and resize_to_input == "image_1":
height, width = image_1.shape[1:3]
elif image_2 is not None and resize_to_input == "image_2":
height, width = image_2.shape[1:3]
elif mask is not None and resize_to_input == "mask":
height, width = mask.shape[1:3]
if latent is not None:
c = latent['samples'].shape[1]
else:
if latent_type == "4_channels":
c = 4
else:
c = 16
if image_1 is not None:
b = image_1.shape[0]
elif image_2 is not None:
b = image_2.shape[0]
else:
b = 1
latent = {"samples": torch.zeros((b, c, height // ratio, width // ratio))}
latent_1, latent_2 = None, None
if image_1 is not None:
image_1 = image_resize(image_1, width, height, method, interpolation, condition, multiple_of, keep_proportion)
latent_1 = {"samples": vae.encode(image_1[:,:,:,:3])}
if image_2 is not None:
image_2 = image_resize(image_2, width, height, method, interpolation, condition, multiple_of, keep_proportion)
latent_2 = {"samples": vae.encode(image_2[:,:,:,:3])}
if mask is not None and mask.shape[-1] > 1:
channels = ["red", "green", "blue", "alpha"]
mask = mask[:, :, :, channels.index(mask_channel)]
if mask is not None:
mask = F.interpolate(mask.unsqueeze(0), size=(height, width), mode='bilinear', align_corners=False).squeeze(0)
if invert_mask:
mask = 1.0 - mask
return (latent_1,
latent_2,
mask,
latent,
width,
height,
)
class VAEStyleTransferLatent:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"method": (["AdaIN", "WCT"], {"default": "AdaIN"}),
"latent": ("LATENT",),
"style_ref": ("LATENT",),
"vae": ("VAE", ),
},
"optional": {
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/vae"
def main(self,
method = None,
latent = None,
style_ref = None,
vae = False,
):
from comfy.ldm.cascade.stage_c_coder import StageC_coder
# this is unfortunately required to avoid apparent non-deterministic outputs.
# without setting the seed each time, the outputs of the VAE encode will change with every generation.
torch .manual_seed (42)
torch.cuda.manual_seed_all(42)
denoised = latent .get('state_info', {}).get('raw_x')
y0 = style_ref.get('state_info', {}).get('raw_x')
denoised = latent['samples'] if denoised is None else denoised
y0 = style_ref['samples'] if y0 is None else y0
#denoised = latent.get('state_info', latent['samples'].get('raw_x', latent['samples']))
#y0 = style_ref.get('state_info', style_ref['samples'].get('raw_x', style_ref['samples']))
if denoised.ndim > 4:
denoised = denoised.squeeze(0)
if y0.ndim > 4:
y0 = y0.squeeze(0)
if hasattr(vae.first_stage_model, "up_blocks"): # probably stable cascade stage A
x_embedder = copy.deepcopy(vae.first_stage_model.up_blocks[0][0]).to(torch.float64)
denoised_embed = x_embedder(denoised.to(x_embedder.weight))
y0_embed = x_embedder(y0.to(x_embedder.weight))
denoised_embed = apply_style_to_latent(denoised_embed, y0_embed, method)
denoised_styled = invert_conv2d(x_embedder, denoised_embed, denoised.shape).to(denoised)
elif hasattr(vae.first_stage_model, "decoder"): # probably sd15, sdxl, sd35, flux, wan, etc. vae
x_embedder = copy.deepcopy(vae.first_stage_model.decoder.conv_in).to(torch.float64)
denoised_embed = x_embedder(denoised.to(x_embedder.weight))
y0_embed = x_embedder(y0.to(x_embedder.weight))
denoised_embed = apply_style_to_latent(denoised_embed, y0_embed, method)
denoised_styled = invert_conv2d(x_embedder, denoised_embed, denoised.shape).to(denoised)
elif type(vae.first_stage_model) == StageC_coder:
x_embedder = copy.deepcopy(vae.first_stage_model.encoder.mapper[0]).to(torch.float64)
#x_embedder = copy.deepcopy(vae.first_stage_model.previewer.blocks[0]).to(torch.float64) # use with strategy for decoder above, but exploding latent problem, 1.E30 etc. quick to nan
denoised_embed = invert_conv2d(x_embedder, denoised, denoised.shape)
y0_embed = invert_conv2d(x_embedder, y0, y0.shape)
denoised_embed = apply_style_to_latent(denoised_embed, y0_embed, method)
denoised_styled = x_embedder(denoised_embed.to(x_embedder.weight))
latent_out = latent.copy()
#latent_out['state_info'] = copy.deepcopy(latent['state_info'])
if latent_out.get('state_info', {}).get('raw_x') is not None:
latent_out['state_info']['raw_x'] = denoised_styled
latent_out['samples'] = denoised_styled
return (latent_out, )
def apply_style_to_latent(denoised_embed, y0_embed, method="WCT"):
from einops import rearrange
import torch.nn as nn
denoised_embed_shape = denoised_embed.shape
denoised_embed = rearrange(denoised_embed, "B C H W -> B (H W) C")
y0_embed = rearrange(y0_embed, "B C H W -> B (H W) C")
if method == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_embed)
elif method == "WCT":
f_s = y0_embed[0].clone() # batched style guides not supported
mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - mu_s
cov = (f_s_centered.transpose(-2,-1).double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values
whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.transpose(-2,-1)
y0_color = whiten.to(f_s_centered)
for wct_i in range(denoised_embed_shape[0]):
f_c = denoised_embed[wct_i].clone()
mu_c = f_c.mean(dim=0, keepdim=True)
f_c_centered = f_c - mu_c
cov = (f_c_centered.transpose(-2,-1).double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
inv_sqrt_eig = S_eig.clamp(min=0).rsqrt()
whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.transpose(-2,-1)
whiten = whiten.to(f_c_centered)
f_c_whitened = f_c_centered @ whiten.transpose(-2,-1)
f_cs = f_c_whitened @ y0_color.transpose(-2,-1).to(f_c_whitened) + mu_s.to(f_c_whitened)
denoised_embed[wct_i] = f_cs
denoised_embed = rearrange(denoised_embed, "B (H W) C -> B C H W", W=denoised_embed_shape[-1])
return denoised_embed
def invert_conv2d(
conv: torch.nn.Conv2d,
z: torch.Tensor,
original_shape: torch.Size,
) -> torch.Tensor:
import torch.nn.functional as F
B, C_in, H, W = original_shape
C_out, _, kH, kW = conv.weight.shape
stride_h, stride_w = conv.stride
pad_h, pad_w = conv.padding
if conv.bias is not None:
b = conv.bias.view(1, C_out, 1, 1).to(z)
z_nobias = z - b
else:
z_nobias = z
W_flat = conv.weight.view(C_out, -1).to(z)
W_pinv = torch.linalg.pinv(W_flat)
Bz, Co, Hp, Wp = z_nobias.shape
z_flat = z_nobias.reshape(Bz, Co, -1)
x_patches = W_pinv @ z_flat
x_sum = F.fold(
x_patches,
output_size=(H + 2*pad_h, W + 2*pad_w),
kernel_size=(kH, kW),
stride=(stride_h, stride_w),
)
ones = torch.ones_like(x_patches)
count = F.fold(
ones,
output_size=(H + 2*pad_h, W + 2*pad_w),
kernel_size=(kH, kW),
stride=(stride_h, stride_w),
)
x_recon = x_sum / count.clamp(min=1e-6)
if pad_h > 0 or pad_w > 0:
x_recon = x_recon[..., pad_h:pad_h+H, pad_w:pad_w+W]
return x_recon
"""def invert_conv3d(conv: torch.nn.Conv3d,
z: torch.Tensor, original_shape: torch.Size, grid_sizes: Optional[Tuple[int,int,int]] = None) -> torch.Tensor:
import torch.nn.functional as F
B, C_in, D, H, W = original_shape
pD, pH, pW = 1,2,2
sD, sH, sW = pD, pH, pW
if z.ndim == 3:
# [B, S, C_out] -> reshape to [B, C_out, D', H', W']
S = z.shape[1]
if grid_sizes is None:
Dp = D // pD
Hp = H // pH
Wp = W // pW
else:
Dp, Hp, Wp = grid_sizes
C_out = z.shape[2]
z = z.transpose(1, 2).reshape(B, C_out, Dp, Hp, Wp)
else:
B2, C_out, Dp, Hp, Wp = z.shape
assert B2 == B, "Batch size mismatch... ya sharked it."
# kncokout bias
if conv.bias is not None:
b = conv.bias.view(1, C_out, 1, 1, 1)
z_nobias = z - b
else:
z_nobias = z
# 2D filter -> pinv
w3 = conv.weight # [C_out, C_in, 1, pH, pW]
w2 = w3.squeeze(2) # [C_out, C_in, pH, pW]
out_ch, in_ch, kH, kW = w2.shape
W_flat = w2.view(out_ch, -1) # [C_out, in_ch*pH*pW]
W_pinv = torch.linalg.pinv(W_flat) # [in_ch*pH*pW, C_out]
# merge depth for 2D unfold wackiness
z2 = z_nobias.permute(0,2,1,3,4).reshape(B*Dp, C_out, Hp, Wp)
# apply pinv ... get patch vectors
z_flat = z2.reshape(B*Dp, C_out, -1) # [B*Dp, C_out, L]
x_patches = W_pinv @ z_flat # [B*Dp, in_ch*pH*pW, L]
# fold -> spatial frames
x2 = F.fold(
x_patches,
output_size=(H, W),
kernel_size=(pH, pW),
stride=(sH, sW)
) # → [B*Dp, C_in, H, W]
# un-merge depth
x2 = x2.reshape(B, Dp, in_ch, H, W) # [B, Dp, C_in, H, W]
x_recon = x2.permute(0,2,1,3,4).contiguous() # [B, C_in, D, H, W]
return x_recon
"""
def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
mean_c = content.mean(1, keepdim=True)
std_c = content.std (1, keepdim=True).add_(eps) # in-place add
mean_s = style.mean (1, keepdim=True)
std_s = style.std (1, keepdim=True).add_(eps)
content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain
return content
class LatentUpscaleWithVAE:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT", ),
"width" : ("INT", {"default": 1024, "min": 8, "max": 1024 ** 2, "step": 8}),
"height": ("INT", {"default": 1024, "min": 8, "max": 1024 ** 2, "step": 8}),
"vae": ("VAE", ),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self,
latent,
width,
height,
vae,
method = "stretch",
interpolation = "lanczos",
condition = "always",
multiple_of = 0,
keep_proportion = False,
):
ratio = 8 # latent compression factor
# this is unfortunately required to avoid apparent non-deterministic outputs.
# without setting the seed each time, the outputs of the VAE encode will change with every generation.
torch .manual_seed (42)
torch.cuda.manual_seed_all(42)
images_prev_list, latent_prev_list = [], []
if 'state_info' in latent:
#images = vae.decode(latent['state_info']['raw_x'] ) # .to(latent['samples']) )
images = vae.decode(latent['state_info']['denoised'] ) # .to(latent['samples']) )
data_prev_ = latent['state_info']['data_prev_'].squeeze(0)
for i in range(data_prev_.shape[0]):
images_prev_list.append( vae.decode(data_prev_[i]) ) # .to(latent['samples']) )
else:
images = vae.decode(latent['samples'])
if len(images.shape) == 5: #Combine batches
images = images.reshape(-1, images.shape[-3], images.shape[-2], images.shape[-1])
images = image_resize(images, width, height, method, interpolation, condition, multiple_of, keep_proportion)
latent_tensor = vae.encode(images[:,:,:,:3])
if images_prev_list:
for i in range(data_prev_.shape[0]):
image_data_p = image_resize(images_prev_list[i], width, height, method, interpolation, condition, multiple_of, keep_proportion)
latent_prev_list.append( vae.encode(image_data_p[:,:,:,:3]) )
latent_prev = torch.stack(latent_prev_list).unsqueeze(0) #.view_as(latent['state_info']['data_prev_'])
#images_prev = image_resize(images_prev, width, height, method, interpolation, condition, multiple_of, keep_proportion)
#latent_tensor = vae.encode(image_1[:,:,:,:3])
if 'state_info' in latent:
#latent['state_info']['raw_x'] = latent_tensor
latent['state_info']['denoised'] = latent_tensor
latent['state_info']['data_prev_'] = latent_prev
latent['samples'] = latent_tensor.to(latent['samples'])
return (latent,)
class SigmasSchedulePreview(SaveImage):
def __init__(self):
self.output_dir = folder_paths.get_temp_directory()
self.type = "temp"
self.prefix_append = "_temp_" + ''.join(random.choice("abcdefghijklmnopqrstupvxyz1234567890") for x in range(5))
self.compress_level = 4
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"model": ("MODEL",),
"noise_mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"eta": ("FLOAT", {"default": 0.25, "step": 0.01, "min": -1000.0, "max": 1000.0}),
"s_noise": ("FLOAT", {"default": 1.00, "step": 0.01, "min": -1000.0, "max": 1000.0}),
"denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},),
"steps": ("INT", {"default": 30, "min": 1, "max": 10000}),
"plot_max": ("FLOAT", {"default": 2.1, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Set to a negative value to have the plot scale automatically."}),
"plot_min": ("FLOAT", {"default": 0.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Set to a negative value to have the plot scale automatically."}),
},
"optional": {
"sigmas": ("SIGMAS",),
},
}
FUNCTION = "plot_schedule"
CATEGORY = "RES4LYF/sigmas"
OUTPUT_NODE = True
@staticmethod
def tensor_to_graph_image(tensors, labels, colors, plot_min, plot_max, input_params):
plt.figure(figsize=(6.4, 6.4), dpi=320)
ax = plt.gca()
ax.set_facecolor("black")
ax.patch.set_alpha(1.0)
for _ in range(50):
for tensor, color in zip(tensors, colors):
plt.plot(tensor.numpy(), color=color, alpha=0.1)
plt.axhline(y=1.0, color='gray', linestyle='dotted', linewidth=1.5)
plt.xlabel("Step", color="white", weight="bold", antialiased=False)
plt.ylabel("Value", color="white", weight="bold", antialiased=False)
ax.tick_params(colors="white")
if plot_max > 0:
plt.ylim(plot_min, plot_max)
input_text = (
f"noise_mode: {input_params['noise_mode']} | "
f"eta: {input_params['eta']} | "
f"s_noise: {input_params['s_noise']} | "
f"denoise: {input_params['denoise']} | "
f"denoise_alt: {input_params['denoise_alt']} | "
f"scheduler: {input_params['scheduler']}"
)
plt.text(0.5, 1.05, input_text, ha='center', va='center', color='white', fontsize=8, transform=ax.transAxes)
from matplotlib.lines import Line2D
legend_handles = [Line2D([0], [0], color=color, lw=2, label=label) for label, color in zip(labels, colors)]
plt.legend(handles=legend_handles, facecolor="black", edgecolor="white", labelcolor="white", framealpha=1.0)
with BytesIO() as buf:
plt.savefig(buf, format='png', facecolor="black")
buf.seek(0)
image = Image.open(buf).copy()
plt.close()
return image
def plot_schedule(self, model, noise_mode, eta, s_noise, denoise, denoise_alt, scheduler, steps, plot_min, plot_max, sigmas=None):
sigma_vals = []
sigma_next_vals = []
sigma_down_vals = []
sigma_up_vals = []
sigma_plus_up_vals = []
sigma_hat_vals = []
alpha_ratio_vals = []
sigma_step_size_vals = []
sigma_step_size_sde_vals = []
eta_var = eta
rk_type = "res_2s"
noise_anchor = 1.0
if sigmas is not None:
sigmas = sigmas.clone()
else:
sigmas = get_sigmas(model, scheduler, steps, denoise)
sigmas *= denoise_alt
RK = RK_Method_Beta.create(model, rk_type, noise_anchor, model_device=sigmas.device, work_device=sigmas.device, dtype=sigmas.dtype, extra_options="")
NS = RK_NoiseSampler(RK, model, device=sigmas.device, dtype=sigmas.dtype, extra_options="")
for i in range(len(sigmas) - 1):
sigma = sigmas[i]
sigma_next = sigmas[i + 1]
su, sigma_hat, sd, alpha_ratio = NS.get_sde_step(sigma, sigma_next, eta, noise_mode_override=noise_mode, )
#su, sigma_hat, sd, alpha_ratio = get_res4lyf_step_with_model(model, sigma, sigma_next, eta, noise_mode)
su = su * s_noise
sigma_vals .append(sigma)
sigma_next_vals .append(sigma_next)
sigma_down_vals .append(sd)
sigma_up_vals .append(su)
sigma_plus_up_vals .append(sigma + su)
alpha_ratio_vals .append(alpha_ratio)
sigma_step_size_vals .append(sigma - sigma_next)
sigma_step_size_sde_vals.append(sigma + su - sd)
if sigma_hat != sigma:
sigma_hat_vals.append(sigma_hat)
sigma_tensor = torch.tensor(sigma_vals)
sigma_next_tensor = torch.tensor(sigma_next_vals)
sigma_down_tensor = torch.tensor(sigma_down_vals)
sigma_up_tensor = torch.tensor(sigma_up_vals)
sigma_plus_up_tensor = torch.tensor(sigma_plus_up_vals)
alpha_ratio_tensor = torch.tensor(alpha_ratio_vals)
sigma_step_size_tensor = torch.tensor(sigma_step_size_vals)
sigma_step_size_sde_tensor = torch.tensor(sigma_step_size_sde_vals)
tensors = [sigma_tensor, sigma_next_tensor, sigma_down_tensor, sigma_up_tensor]
labels = ["$σ$", "$σ_{next}$", "$σ_{down}$", "$σ_{up}$"]
colors = ["white", "dodgerblue", "green", "red"]
if torch.norm(sigma_next_tensor - sigma_down_tensor) < 1e-2:
tensors = [sigma_tensor, sigma_next_tensor, sigma_up_tensor]
labels = ["$σ$", "$σ_{next,down}$", "$σ_{up}$"]
colors = ["white", "cyan", "red"]
elif torch.norm(sigma_next_tensor - sigma_up_tensor) < 1e-2:
tensors = [sigma_tensor, sigma_next_tensor, sigma_down_tensor]
labels = ["$σ$", "$σ_{next,up}$", "$σ_{down}$"]
colors = ["white", "violet", "green",]
if torch.norm(sigma_tensor - sigma_plus_up_tensor) > 1e-2:
tensors.append(sigma_plus_up_tensor)
labels.append("$σ + σ_{up}$")
colors.append("brown")
if torch.norm(sigma_step_size_tensor - sigma_step_size_sde_tensor) > 1e-2:
tensors.append(sigma_step_size_sde_tensor)
labels.append("$Δ \hat{t}$")
colors.append("gold")
if sigma_hat_vals:
sigma_hat_tensor = torch.tensor(sigma_hat_vals)
tensors.append(sigma_hat_tensor)
labels.append("$σ̂$")
colors.append("maroon")
tensors.append(sigma_step_size_tensor)
labels.append("$σ̂ - σ_{next}$")
colors.append("darkorange")
else:
tensors.append(sigma_step_size_tensor)
#labels.append("$σ - σ_{next}$")
labels.append("$Δt$")
colors.append("darkorange")
tensors.append(alpha_ratio_tensor)
labels.append("$α_{ratio}$")
colors.append("grey")
graph_image = self.tensor_to_graph_image(
tensors, labels, colors, plot_min, plot_max,
input_params={
"noise_mode": noise_mode,
"eta": eta,
"s_noise": s_noise,
"denoise": denoise,
"denoise_alt": denoise_alt,
"scheduler": scheduler,
}
)
numpy_image = np.array(graph_image)
numpy_image = numpy_image / 255.0
tensor_image = torch.from_numpy(numpy_image)
tensor_image = tensor_image.unsqueeze(0)
images_tensor = torch.cat([tensor_image], 0)
return self.save_images(images_tensor, "SigmasSchedulePreview")
================================================
FILE: hidream/model.py
================================================
import torch
import torch.nn.functional as F
import math
import torch.nn as nn
from torch import Tensor, FloatTensor
from typing import Optional, Callable, Tuple, List, Dict, Any, Union, TYPE_CHECKING, TypeVar
from dataclasses import dataclass
import einops
from einops import repeat, rearrange
from comfy.ldm.lightricks.model import TimestepEmbedding, Timesteps
import torch.nn.functional as F
from comfy.ldm.flux.math import apply_rope, rope
#from comfy.ldm.flux.layers import LastLayer
#from ..flux.layers import LastLayer
from comfy.ldm.modules.attention import optimized_attention, attention_pytorch
import comfy.model_management
import comfy.ldm.common_dit
from ..helper import ExtraOptions
from ..latents import slerp_tensor, interpolate_spd, tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d
from ..style_transfer import StyleMMDiT_Model, apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch, adain_seq, apply_scattersort
@dataclass
class ModulationOut:
shift: Tensor
scale: Tensor
gate : Tensor
class BlockType:
Double = 2
Single = 1
Zero = 0
#########################################################################################################################################################################
class HDBlock(nn.Module):
def __init__(
self,
dim : int,
heads : int,
head_dim : int,
num_routed_experts : int = 4,
num_activated_experts : int = 2,
block_type : BlockType = BlockType.Zero,
dtype=None, device=None, operations=None
):
super().__init__()
block_classes = {
BlockType.Double : HDBlockDouble,
BlockType.Single : HDBlockSingle,
}
self.block = block_classes[block_type](dim, heads, head_dim, num_routed_experts, num_activated_experts, dtype=dtype, device=device, operations=operations)
def forward(
self,
img : FloatTensor,
img_masks : Optional[FloatTensor] = None,
txt : Optional[FloatTensor] = None,
clip : FloatTensor = None,
rope : FloatTensor = None,
mask : Optional[FloatTensor] = None,
update_cross_attn : Optional[Dict] = None,
style_block = None,
) -> FloatTensor:
return self.block(img, img_masks, txt, clip, rope, mask, update_cross_attn, style_block=style_block)
# Copied from https://github.com/black-forest-labs/flux/blob/main/src/flux/modules/layers.py
class EmbedND(nn.Module):
def __init__(self, theta: int, axes_dim: List[int]):
super().__init__()
self.theta = theta
self.axes_dim = axes_dim
def forward(self, ids: Tensor) -> Tensor:
n_axes = ids.shape[-1]
emb = torch.cat([ rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(n_axes)], dim=-3,)
return emb.unsqueeze(2)
class PatchEmbed(nn.Module):
def __init__(
self,
patch_size = 2,
in_channels = 4,
out_channels = 1024,
dtype=None, device=None, operations=None
):
super().__init__()
self.patch_size = patch_size
self.out_channels = out_channels
self.proj = operations.Linear(in_channels * patch_size * patch_size, out_channels, bias=True, dtype=dtype, device=device)
def forward(self, latent):
latent = self.proj(latent)
return latent
class PooledEmbed(nn.Module):
def __init__(self, text_emb_dim, hidden_size, dtype=None, device=None, operations=None):
super().__init__()
self.pooled_embedder = TimestepEmbedding(in_channels=text_emb_dim, time_embed_dim=hidden_size, dtype=dtype, device=device, operations=operations)
def forward(self, pooled_embed):
return self.pooled_embedder(pooled_embed)
class TimestepEmbed(nn.Module):
def __init__(self, hidden_size, frequency_embedding_size=256, dtype=None, device=None, operations=None):
super().__init__()
self.time_proj = Timesteps (num_channels=frequency_embedding_size, flip_sin_to_cos=True, downscale_freq_shift=0)
self.timestep_embedder = TimestepEmbedding(in_channels=frequency_embedding_size, time_embed_dim=hidden_size, dtype=dtype, device=device, operations=operations)
def forward(self, t, wdtype):
t_emb = self.time_proj(t).to(dtype=wdtype)
t_emb = self.timestep_embedder(t_emb)
return t_emb
class TextProjection(nn.Module):
def __init__(self, in_features, hidden_size, dtype=None, device=None, operations=None):
super().__init__()
self.linear = operations.Linear(in_features=in_features, out_features=hidden_size, bias=False, dtype=dtype, device=device)
def forward(self, caption):
hidden_states = self.linear(caption)
return hidden_states
class HDFeedForwardSwiGLU(nn.Module):
def __init__(
self,
dim : int,
hidden_dim : int,
multiple_of : int = 256,
ffn_dim_multiplier : Optional[float] = None,
dtype=None, device=None, operations=None
):
super().__init__()
hidden_dim = int(2 * hidden_dim / 3)
if ffn_dim_multiplier is not None: # custom dim factor multiplier
hidden_dim = int(ffn_dim_multiplier * hidden_dim)
hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
self.w1 = operations.Linear(dim, hidden_dim, bias=False, dtype=dtype, device=device)
self.w2 = operations.Linear(hidden_dim, dim, bias=False, dtype=dtype, device=device)
self.w3 = operations.Linear(dim, hidden_dim, bias=False, dtype=dtype, device=device)
def forward(self, x, style_block=None): # 1,4096,2560 ->
if style_block is not None and x.shape[0] > 1 and x.ndim == 3:
x1 = self.w1(x)
x1 = style_block(x1, "ff_1")
x1 = torch.nn.functional.silu(x1)
x1 = style_block(x1, "ff_1_silu")
x3 = self.w3(x)
x3 = style_block(x3, "ff_3")
x13 = x1 * x3
x13 = style_block(x13, "ff_13")
x2 = self.w2(x13)
x2 = style_block(x2, "ff_2")
return x2
else:
return self.w2(torch.nn.functional.silu(self.w1(x)) * self.w3(x))
# Modified from https://github.com/deepseek-ai/DeepSeek-V3/blob/main/inference/model.py
class HDMoEGate(nn.Module):
def __init__(self, dim, num_routed_experts=4, num_activated_experts=2, dtype=None, device=None):
super().__init__()
self.top_k = num_activated_experts # 2
self.n_routed_experts = num_routed_experts # 4
self.gating_dim = dim # 2560
self.weight = nn.Parameter(torch.empty((self.n_routed_experts, self.gating_dim), dtype=dtype, device=device))
def forward(self, x):
dtype = self.weight.dtype
if dtype not in {torch.bfloat16, torch.float16, torch.float32, torch.float64}:
dtype = torch.float32
self.weight.data = self.weight.data.to(dtype)
logits = F.linear(x.to(dtype), self.weight.to(x.device), None)
scores = logits.softmax(dim=-1).to(x) # logits.shape == 4032,4 scores.shape == 4032,4
return torch.topk(scores, k=self.top_k, dim=-1, sorted=False)
class HDMOEFeedForwardSwiGLU(nn.Module):
def __init__(
self,
dim : int,
hidden_dim : int,
num_routed_experts : int,
num_activated_experts : int,
dtype=None, device=None, operations=None
):
super().__init__()
self.shared_experts = HDFeedForwardSwiGLU(dim, hidden_dim // 2, dtype=dtype, device=device, operations=operations)
self.experts = nn.ModuleList([HDFeedForwardSwiGLU(dim, hidden_dim , dtype=dtype, device=device, operations=operations) for i in range(num_routed_experts)])
self.gate = HDMoEGate(dim, num_routed_experts, num_activated_experts, dtype=dtype, device=device)
self.num_activated_experts = num_activated_experts
def forward(self, x, style_block=None):
y_shared = self.shared_experts(x, style_block.FF_SHARED) # 1,4096,2560 -> 1,4096,2560
y_shared = style_block(y_shared, "shared")
topk_weight, topk_idx = self.gate(x) # -> 4096,2 4096,2
topk_weight = style_block(topk_weight, "topk_weight")
if y_shared.shape[0] > 1 and style_block.gate[0] and not HDModel.RECON_MODE:
topk_idx[0] = topk_idx[1]
tk_idx_flat = topk_idx.view(topk_idx.shape[0], -1)
x = x.repeat_interleave(self.num_activated_experts, dim=-2)
y = torch.empty_like(x)
if style_block.gate[0] and not HDModel.RECON_MODE and y_shared.shape[0] > 1:
for i, expert in enumerate(self.experts): # TODO: check for empty expert lists and continue if found to avoid CUBLAS errors
x_list = []
for b in range(x.shape[0]):
x_sel = x[b][tk_idx_flat[b]==i]
x_list.append(x_sel)
x_list = torch.stack(x_list, dim=0)
x_out = expert(x_list, style_block.FF_SEPARATE).to(x.dtype)
for b in range(y.shape[0]):
y[b][tk_idx_flat[b]==i] = x_out[b]
else:
for i, expert in enumerate(self.experts):
x_sel = x[tk_idx_flat == i, :]
if x_sel.shape[0] == 0:
continue
y[tk_idx_flat == i, :] = expert(x_sel).to(x.dtype)
y = style_block(y, "separate")
y_sum = torch.einsum('abk,abkd->abd', topk_weight, y.view(*topk_weight.shape, -1))
y_sum = style_block(y_sum, "sum")
y_sum = y_sum.view_as(y_shared) + y_shared
y_sum = style_block(y_sum, "out")
return y_sum
def apply_passthrough(denoised_embed, *args, **kwargs):
return denoised_embed
class AttentionBuffer:
buffer = {}
def attention(q: Tensor, k: Tensor, v: Tensor, rope: Tensor, mask: Optional[Tensor] = None):
q, k = apply_rope(q, k, rope)
if mask is not None:
AttentionBuffer.buffer = attention_pytorch(
q.view(q.shape[0], -1, q.shape[-1] * q.shape[-2]),
k.view(k.shape[0], -1, k.shape[-1] * k.shape[-2]),
v.view(v.shape[0], -1, v.shape[-1] * v.shape[-2]),
q.shape[2],
mask=mask,
)
else:
AttentionBuffer.buffer = optimized_attention(
q.view(q.shape[0], -1, q.shape[-1] * q.shape[-2]),
k.view(k.shape[0], -1, k.shape[-1] * k.shape[-2]),
v.view(v.shape[0], -1, v.shape[-1] * v.shape[-2]),
q.shape[2],
mask=mask,
)
return AttentionBuffer.buffer
class HDAttention(nn.Module):
def __init__(
self,
query_dim : int,
heads : int = 8,
dim_head : int = 64,
eps : float = 1e-5,
out_dim : int = None,
single : bool = False,
dtype=None, device=None, operations=None
):
super().__init__()
self.inner_dim = out_dim if out_dim is not None else dim_head * heads
self.query_dim = query_dim
self.out_dim = out_dim if out_dim is not None else query_dim
self.heads = out_dim // dim_head if out_dim is not None else heads
self.single = single
self.to_q = operations.Linear (self.query_dim, self.inner_dim, dtype=dtype, device=device)
self.to_k = operations.Linear (self.inner_dim, self.inner_dim, dtype=dtype, device=device)
self.to_v = operations.Linear (self.inner_dim, self.inner_dim, dtype=dtype, device=device)
self.to_out = operations.Linear (self.inner_dim, self.out_dim, dtype=dtype, device=device)
self.q_rms_norm = operations.RMSNorm(self.inner_dim, eps, dtype=dtype, device=device)
self.k_rms_norm = operations.RMSNorm(self.inner_dim, eps, dtype=dtype, device=device)
if not single:
self.to_q_t = operations.Linear (self.query_dim, self.inner_dim, dtype=dtype, device=device)
self.to_k_t = operations.Linear (self.inner_dim, self.inner_dim, dtype=dtype, device=device)
self.to_v_t = operations.Linear (self.inner_dim, self.inner_dim, dtype=dtype, device=device)
self.to_out_t = operations.Linear (self.inner_dim, self.out_dim, dtype=dtype, device=device)
self.q_rms_norm_t = operations.RMSNorm(self.inner_dim, eps, dtype=dtype, device=device)
self.k_rms_norm_t = operations.RMSNorm(self.inner_dim, eps, dtype=dtype, device=device)
def forward(
self,
img : FloatTensor,
img_masks : Optional[FloatTensor] = None,
txt : Optional[FloatTensor] = None,
rope : FloatTensor = None,
mask : Optional[FloatTensor] = None,
update_cross_attn : Optional[Dict]= None,
style_block = None,
) -> Tensor:
bsz = img.shape[0]
img_q = self.to_q(img)
img_k = self.to_k(img)
img_v = self.to_v(img)
img_q = style_block.img.ATTN(img_q, "q_proj")
img_k = style_block.img.ATTN(img_k, "k_proj")
img_v = style_block.img.ATTN(img_v, "v_proj")
img_q = self.q_rms_norm(img_q)
img_k = self.k_rms_norm(img_k)
img_q = style_block.img.ATTN(img_q, "q_norm")
img_k = style_block.img.ATTN(img_k, "k_norm")
inner_dim = img_k.shape[-1]
head_dim = inner_dim // self.heads
img_q = img_q.view(bsz, -1, self.heads, head_dim)
img_k = img_k.view(bsz, -1, self.heads, head_dim)
img_v = img_v.view(bsz, -1, self.heads, head_dim)
if img_masks is not None:
img_k = img_k * img_masks.view(bsz, -1, 1, 1)
if self.single:
attn = attention(img_q, img_k, img_v, rope=rope, mask=mask)
attn = style_block.img.ATTN(attn, "out")
return self.to_out(attn)
else:
txt_q = self.to_q_t(txt)
txt_k = self.to_k_t(txt)
txt_v = self.to_v_t(txt)
txt_q = style_block.txt.ATTN(txt_q, "q_proj")
txt_k = style_block.txt.ATTN(txt_k, "k_proj")
txt_v = style_block.txt.ATTN(txt_v, "v_proj")
txt_q = self.q_rms_norm_t(txt_q)
txt_k = self.k_rms_norm_t(txt_k)
txt_q = style_block.txt.ATTN(txt_q, "q_norm")
txt_k = style_block.txt.ATTN(txt_k, "k_norm")
txt_q = txt_q.view(bsz, -1, self.heads, head_dim)
txt_k = txt_k.view(bsz, -1, self.heads, head_dim)
txt_v = txt_v.view(bsz, -1, self.heads, head_dim)
img_len = img_q.shape[1]
txt_len = txt_q.shape[1]
attn = attention(torch.cat([img_q, txt_q], dim=1),
torch.cat([img_k, txt_k], dim=1),
torch.cat([img_v, txt_v], dim=1), rope=rope, mask=mask)
img_attn, txt_attn = torch.split(attn, [img_len, txt_len], dim=1) #1, 4480, 2560
img_attn = style_block.img.ATTN(img_attn, "out")
txt_attn = style_block.txt.ATTN(txt_attn, "out")
if update_cross_attn is not None:
if not update_cross_attn['skip_cross_attn']:
UNCOND = update_cross_attn['UNCOND']
if UNCOND:
llama_start = update_cross_attn['src_llama_start']
llama_end = update_cross_attn['src_llama_end']
t5_start = update_cross_attn['src_t5_start']
t5_end = update_cross_attn['src_t5_end']
txt_src = torch.cat([txt[:,t5_start:t5_end,:], txt[:,128+llama_start:128+llama_end,:], txt[:,256+llama_start:256+llama_end],], dim=-2).float()
self.c_src = txt_src.transpose(-2,-1).squeeze(0) # shape [C,1]
else:
llama_start = update_cross_attn['tgt_llama_start']
llama_end = update_cross_attn['tgt_llama_end']
t5_start = update_cross_attn['tgt_t5_start']
t5_end = update_cross_attn['tgt_t5_end']
lamb = update_cross_attn['lamb']
erase = update_cross_attn['erase']
txt_guide = torch.cat([txt[:,t5_start:t5_end,:], txt[:,128+llama_start:128+llama_end,:], txt[:,256+llama_start:256+llama_end],], dim=-2).float()
c_guide = txt_guide.transpose(-2,-1).squeeze(0) # [C,1]
Wv_old = self.to_v_t.weight.data.float() # [C,C]
Wk_old = self.to_k_t.weight.data.float() # [C,C]
v_star = Wv_old @ c_guide # [C,1]
k_star = Wk_old @ c_guide # [C,1]
c_src = self.c_src # [C,1]
erase_scale = erase
d = c_src.shape[0]
C = c_src @ c_src.T # [C,C]
I = torch.eye(d, device=C.device, dtype=C.dtype)
mat1_v = lamb*Wv_old + erase_scale*(v_star @ c_src.T) # [C,C]
mat2_v = lamb*I + erase_scale*(C) # [C,C]
Wv_new = mat1_v @ torch.inverse(mat2_v) # [C,C]
mat1_k = lamb*Wk_old + erase_scale*(k_star @ c_src.T) # [C,C]
mat2_k = lamb*I + erase_scale*(C) # [C,C]
Wk_new = mat1_k @ torch.inverse(mat2_k) # [C,C]
self.to_v_t.weight.data.copy_(Wv_new.to(self.to_v_t.weight.data.dtype))
self.to_k_t.weight.data.copy_(Wk_new.to(self.to_k_t.weight.data.dtype))
return self.to_out(img_attn), self.to_out_t(txt_attn)
#########################################################################################################################################################################
class HDBlockDouble(nn.Module):
buffer = {}
def __init__(
self,
dim : int,
heads : int,
head_dim : int,
num_routed_experts : int = 4,
num_activated_experts : int = 2,
dtype=None, device=None, operations=None
):
super().__init__()
self.adaLN_modulation = nn.Sequential(
nn.SiLU(),
operations.Linear(dim, 12*dim, bias=True, dtype=dtype, device=device)
)
self.norm1_i = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device)
self.norm1_t = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device)
self.attn1 = HDAttention (dim, heads, head_dim, single=False, dtype=dtype, device=device, operations=operations)
self.norm3_i = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device)
self.ff_i = HDMOEFeedForwardSwiGLU(dim, 4*dim, num_routed_experts, num_activated_experts, dtype=dtype, device=device, operations=operations)
self.norm3_t = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device)
self.ff_t = HDFeedForwardSwiGLU(dim, 4*dim, dtype=dtype, device=device, operations=operations)
def forward(
self,
img : FloatTensor,
img_masks : Optional[FloatTensor] = None,
txt : Optional[FloatTensor] = None,
clip : Optional[FloatTensor] = None, # clip = t + p_embedder (from pooled)
rope : FloatTensor = None,
mask : Optional[FloatTensor] = None,
update_cross_attn : Optional[Dict]= None,
style_block = None,
) -> FloatTensor:
img_msa_shift, img_msa_scale, img_msa_gate, img_mlp_shift, img_mlp_scale, img_mlp_gate, \
txt_msa_shift, txt_msa_scale, txt_msa_gate, txt_mlp_shift, txt_mlp_scale, txt_mlp_gate = self.adaLN_modulation(clip)[:,None].chunk(12, dim=-1) # 1,1,2560
img_norm = self.norm1_i(img)
txt_norm = self.norm1_t(txt)
img_norm = style_block.img(img_norm, "attn_norm")
txt_norm = style_block.txt(txt_norm, "attn_norm")
img_norm = img_norm * (1+img_msa_scale) + img_msa_shift
txt_norm = txt_norm * (1+txt_msa_scale) + txt_msa_shift
img_norm = style_block.img(img_norm, "attn_norm_mod")
txt_norm = style_block.txt(txt_norm, "attn_norm_mod")
img_attn, txt_attn = self.attn1(img_norm, img_masks, txt_norm, rope=rope, mask=mask, update_cross_attn=update_cross_attn, style_block=style_block)
img_attn = style_block.img(img_attn, "attn")
txt_attn = style_block.txt(txt_attn, "attn")
img_attn *= img_msa_gate
txt_attn *= txt_msa_gate
img_attn = style_block.img(img_attn, "attn_gated")
txt_attn = style_block.txt(txt_attn, "attn_gated")
img += img_attn
txt += txt_attn
img = style_block.img(img, "attn_res")
txt = style_block.txt(txt, "attn_res")
# FEED FORWARD
img_norm = self.norm3_i(img)
txt_norm = self.norm3_t(txt)
img_norm = style_block.img(img_norm, "ff_norm")
txt_norm = style_block.txt(txt_norm, "ff_norm")
img_norm = img_norm * (1+img_mlp_scale) + img_mlp_shift
txt_norm = txt_norm * (1+txt_mlp_scale) + txt_mlp_shift
img_norm = style_block.img(img_norm, "ff_norm_mod")
txt_norm = style_block.txt(txt_norm, "ff_norm_mod")
img_ff_i = self.ff_i(img_norm, style_block.img.FF)
txt_ff_t = self.ff_t(txt_norm, style_block.txt.FF)
img_ff_i = style_block.img(img_ff_i, "ff")
txt_ff_t = style_block.txt(txt_ff_t, "ff")
img_ff_i *= img_mlp_gate
txt_ff_t *= txt_mlp_gate
img_ff_i = style_block.img(img_ff_i, "ff_gated")
txt_ff_t = style_block.txt(txt_ff_t, "ff_gated")
img += img_ff_i
txt += txt_ff_t
img = style_block.img(img, "ff_res")
txt = style_block.txt(txt, "ff_res")
return img, txt
#########################################################################################################################################################################
class HDBlockSingle(nn.Module):
buffer = {}
def __init__(
self,
dim : int,
heads : int,
head_dim : int,
num_routed_experts : int = 4,
num_activated_experts : int = 2,
dtype=None, device=None, operations=None
):
super().__init__()
self.adaLN_modulation = nn.Sequential(
nn.SiLU(),
operations.Linear(dim, 6 * dim, bias=True, dtype=dtype, device=device)
)
self.norm1_i = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device)
self.attn1 = HDAttention (dim, heads, head_dim, single=True, dtype=dtype, device=device, operations=operations)
self.norm3_i = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device)
self.ff_i = HDMOEFeedForwardSwiGLU(dim, 4*dim, num_routed_experts, num_activated_experts, dtype=dtype, device=device, operations=operations)
def forward(
self,
img : FloatTensor,
img_masks : Optional[FloatTensor] = None,
txt : Optional[FloatTensor] = None,
clip : Optional[FloatTensor] = None,
rope : FloatTensor = None,
mask : Optional[FloatTensor] = None,
update_cross_attn : Optional[Dict] = None,
style_block = None,
) -> FloatTensor:
img_msa_shift, img_msa_scale, img_msa_gate, img_mlp_shift, img_mlp_scale, img_mlp_gate = self.adaLN_modulation(clip)[:,None].chunk(6, dim=-1)
img_norm = self.norm1_i(img)
img_norm = style_block.img(img_norm, "attn_norm") #
img_norm = img_norm * (1+img_msa_scale) + img_msa_shift
img_norm = style_block.img(img_norm, "attn_norm_mod") #
img_attn = self.attn1(img_norm, img_masks, rope=rope, mask=mask, style_block=style_block)
img_attn = style_block.img(img_attn, "attn")
img_attn *= img_msa_gate
img_attn = style_block.img(img_attn, "attn_gated")
img += img_attn
img = style_block.img(img, "attn_res")
img_norm = self.norm3_i(img)
img_norm = style_block.img(img_norm, "ff_norm")
img_norm = img_norm * (1+img_mlp_scale) + img_mlp_shift
img_norm = style_block.img(img_norm, "ff_norm_mod")
img_ff_i = self.ff_i(img_norm, style_block.img.FF)
img_ff_i = style_block.img(img_ff_i, "ff") # fused... "ff" + "attn"
img_ff_i *= img_mlp_gate
img_ff_i = style_block.img(img_ff_i, "ff_gated") #
img += img_ff_i
img = style_block.img(img, "ff_res") #
return img
#########################################################################################################################################################################
class HDModel(nn.Module):
CHANNELS = 2560
RECON_MODE = False
def __init__(
self,
patch_size : Optional[int] = None,
in_channels : int = 64,
out_channels : Optional[int] = None,
num_layers : int = 16,
num_single_layers : int = 32,
attention_head_dim : int = 128,
num_attention_heads : int = 20,
caption_channels : List[int] = None,
text_emb_dim : int = 2048,
num_routed_experts : int = 4,
num_activated_experts : int = 2,
axes_dims_rope : Tuple[int, int] = ( 32, 32),
max_resolution : Tuple[int, int] = (128, 128),
llama_layers : List[int] = None,
image_model = None, # unused, what was this supposed to be??
dtype=None, device=None, operations=None
):
self.patch_size = patch_size
self.num_attention_heads = num_attention_heads
self.attention_head_dim = attention_head_dim
self.num_layers = num_layers
self.num_single_layers = num_single_layers
self.gradient_checkpointing = False
super().__init__()
self.dtype = dtype
self.out_channels = out_channels or in_channels
self.inner_dim = self.num_attention_heads * self.attention_head_dim
self.llama_layers = llama_layers
self.t_embedder = TimestepEmbed( self.inner_dim, dtype=dtype, device=device, operations=operations)
self.p_embedder = PooledEmbed(text_emb_dim, self.inner_dim, dtype=dtype, device=device, operations=operations)
self.x_embedder = PatchEmbed(
patch_size = patch_size,
in_channels = in_channels,
out_channels = self.inner_dim,
dtype=dtype, device=device, operations=operations
)
self.pe_embedder = EmbedND(theta=10000, axes_dim=axes_dims_rope)
self.double_stream_blocks = nn.ModuleList(
[
HDBlock(
dim = self.inner_dim,
heads = self.num_attention_heads,
head_dim = self.attention_head_dim,
num_routed_experts = num_routed_experts,
num_activated_experts = num_activated_experts,
block_type = BlockType.Double,
dtype=dtype, device=device, operations=operations
)
for i in range(self.num_layers)
]
)
self.single_stream_blocks = nn.ModuleList(
[
HDBlock(
dim = self.inner_dim,
heads = self.num_attention_heads,
head_dim = self.attention_head_dim,
num_routed_experts = num_routed_experts,
num_activated_experts = num_activated_experts,
block_type = BlockType.Single,
dtype=dtype, device=device, operations=operations
)
for i in range(self.num_single_layers)
]
)
self.final_layer = HDLastLayer(self.inner_dim, patch_size, self.out_channels, dtype=dtype, device=device, operations=operations)
caption_channels = [caption_channels[1], ] * (num_layers + num_single_layers) + [caption_channels[0], ]
caption_projection = []
for caption_channel in caption_channels:
caption_projection.append(TextProjection(in_features=caption_channel, hidden_size=self.inner_dim, dtype=dtype, device=device, operations=operations))
self.caption_projection = nn.ModuleList(caption_projection)
self.max_seq = max_resolution[0] * max_resolution[1] // (patch_size * patch_size)
def prepare_contexts(self, llama3, context, bsz, img_num_fea):
contexts = llama3.movedim(1, 0)
contexts = [contexts[k] for k in self.llama_layers] # len == 48..... of tensors that are 1,143,4096
if self.caption_projection is not None:
contexts_list = []
for i, cxt in enumerate(contexts):
cxt = self.caption_projection[i](cxt) # linear in_features=4096, out_features=2560 len(self.caption_projection) == 49
cxt = cxt.view(bsz, -1, img_num_fea)
contexts_list.append(cxt)
contexts = contexts_list
context = self.caption_projection[-1](context)
context = context.view(bsz, -1, img_num_fea)
contexts.append(context) # len == 49...... of tensors that are 1,143,2560. last chunk is T5
return contexts
### FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ###
def forward(
self,
x : Tensor,
t : Tensor,
y : Optional[Tensor] = None,
context : Optional[Tensor] = None,
encoder_hidden_states_llama3 = None, # 1,32,143,4096
image_cond = None, # HiDream E1
control = None,
transformer_options = {},
mask : Optional[Tensor] = None,
) -> Tensor:
x_orig = x.clone()
b, c, h, w = x.shape
if image_cond is not None: # HiDream E1
x = torch.cat([x, image_cond], dim=-1)
h_len = ((h + (self.patch_size // 2)) // self.patch_size) # h_len 96
w_len = ((w + (self.patch_size // 2)) // self.patch_size) # w_len 96
img_len = h_len * w_len
txt_slice = slice(img_len, None)
img_slice = slice(None, img_len)
SIGMA = t[0].clone() / 1000
EO = transformer_options.get("ExtraOptions", ExtraOptions(""))
if EO is not None:
EO.mute = True
if EO("zero_heads"):
HEADS = 0
else:
HEADS = 20
StyleMMDiT = transformer_options.get('StyleMMDiT', StyleMMDiT_Model())
StyleMMDiT.set_len(h_len, w_len, img_slice, txt_slice, HEADS=HEADS)
StyleMMDiT.Retrojector = self.Retrojector if hasattr(self, "Retrojector") else None
transformer_options['StyleMMDiT'] = None
x_tmp = transformer_options.get("x_tmp")
if x_tmp is not None:
x_tmp = x_tmp.expand(x.shape[0], -1, -1, -1).clone()
img = comfy.ldm.common_dit.pad_to_patch_size(x_tmp, (self.patch_size, self.patch_size))
else:
img = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size))
y0_style, img_y0_style = None, None
img_orig, t_orig, y_orig, context_orig, llama3_orig = clone_inputs(img, t, y, context, encoder_hidden_states_llama3)
weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0)
floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0)
update_cross_attn = transformer_options.get("update_cross_attn")
z_ = transformer_options.get("z_") # initial noise and/or image+noise from start of rk_sampler_beta()
rk_row = transformer_options.get("row") # for "smart noise"
if z_ is not None:
x_init = z_[rk_row].to(x)
elif 'x_init' in transformer_options:
x_init = transformer_options.get('x_init').to(x)
# recon loop to extract exact noise pred for scattersort guide assembly
HDModel.RECON_MODE = StyleMMDiT.noise_mode == "recon"
recon_iterations = 2 if StyleMMDiT.noise_mode == "recon" else 1
for recon_iter in range(recon_iterations):
y0_style = StyleMMDiT.guides
y0_style_active = True if type(y0_style) == torch.Tensor else False
HDModel.RECON_MODE = True if StyleMMDiT.noise_mode == "recon" and recon_iter == 0 else False
if StyleMMDiT.noise_mode == "recon" and recon_iter == 1:
x_recon = x_tmp if x_tmp is not None else x_orig
noise_prediction = x_recon + (1-SIGMA.to(x_recon)) * eps.to(x_recon)
denoised = x_recon - SIGMA.to(x_recon) * eps.to(x_recon)
denoised = StyleMMDiT.apply_recon_lure(denoised, y0_style)
new_x = (1-SIGMA.to(denoised)) * denoised + SIGMA.to(denoised) * noise_prediction
img_orig = img = comfy.ldm.common_dit.pad_to_patch_size(new_x, (self.patch_size, self.patch_size))
x_init = noise_prediction
elif StyleMMDiT.noise_mode == "bonanza":
x_init = torch.randn_like(x_init)
if y0_style_active:
SIGMA_ADAIN = (SIGMA * EO("eps_adain_sigma_factor", 1.0)).to(y0_style)
y0_style_noised = (1-SIGMA_ADAIN) * y0_style + SIGMA_ADAIN * x_init[0:1].to(y0_style) #always only use first batch of noise to avoid broadcasting
img_y0_style_orig = comfy.ldm.common_dit.pad_to_patch_size(y0_style_noised, (self.patch_size, self.patch_size))
mask_zero = None
out_list = []
for cond_iter in range(len(transformer_options['cond_or_uncond'])):
UNCOND = transformer_options['cond_or_uncond'][cond_iter] == 1
if update_cross_attn is not None:
update_cross_attn['UNCOND'] = UNCOND
bsz_style = y0_style.shape[0] if y0_style_active else 0
bsz = 1 if HDModel.RECON_MODE else bsz_style + 1
img, t, y, context, llama3 = clone_inputs(img_orig, t_orig, y_orig, context_orig, llama3_orig, index=cond_iter)
mask = None
if not UNCOND and 'AttnMask' in transformer_options: # and weight != 0:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
if mask_zero is None:
mask_zero = torch.ones_like(mask)
#img_len = transformer_options['AttnMask'].img_len
mask_zero[img_len:, img_len:] = mask[img_len:, img_len:]
if weight == 0:
context = transformer_options['RegContext'].context.to(context.dtype).to(context.device)
context = context.view(128, -1, context.shape[-1]).sum(dim=-2) # 128 !!!
llama3 = transformer_options['RegContext'].llama3 .to(llama3 .dtype).to(llama3 .device)
mask = None
else:
context = transformer_options['RegContext'].context.to(context.dtype).to(context.device)
llama3 = transformer_options['RegContext'].llama3 .to(llama3 .dtype).to(llama3 .device)
if UNCOND and 'AttnMask_neg' in transformer_options: # and weight != 0:
AttnMask = transformer_options['AttnMask_neg']
mask = transformer_options['AttnMask_neg'].attn_mask.mask.to('cuda')
if mask_zero is None:
mask_zero = torch.ones_like(mask)
img_len = transformer_options['AttnMask_neg'].img_len
mask_zero[img_len:, img_len:] = mask[img_len:, img_len:]
if weight == 0:
context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device)
context = context.view(128, -1, context.shape[-1]).sum(dim=-2) # 128 !!!
llama3 = transformer_options['RegContext_neg'].llama3 .to(llama3 .dtype).to(llama3 .device)
mask = None
else:
context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device)
llama3 = transformer_options['RegContext_neg'].llama3 .to(llama3 .dtype).to(llama3 .device)
elif UNCOND and 'AttnMask' in transformer_options:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
if mask_zero is None:
mask_zero = torch.ones_like(mask)
#img_len = transformer_options['AttnMask'].img_len
mask_zero[img_len:, img_len:] = mask[img_len:, img_len:]
if weight == 0: # ADDED 5/23/2025
context = transformer_options['RegContext'].context.to(context.dtype).to(context.device) # ADDED 5/26/2025 14:53
context = context.view(128, -1, context.shape[-1]).sum(dim=-2) # 128 !!!
llama3 = transformer_options['RegContext'].llama3 .to(llama3 .dtype).to(llama3 .device)
mask = None
else:
A = context
B = transformer_options['RegContext'].context
context = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :]
A = llama3
B = transformer_options['RegContext'].llama3
llama3 = A.repeat(1, 1, (B.shape[2] // A.shape[2]) + 1, 1)[:,:, :B.shape[2], :]
if y0_style_active and not HDModel.RECON_MODE:
if mask is None:
context, y, llama3 = StyleMMDiT.apply_style_conditioning(
UNCOND = UNCOND,
base_context = context,
base_y = y,
base_llama3 = llama3,
)
else:
context = context.repeat(bsz_style + 1, 1, 1)
y = y.repeat(bsz_style + 1, 1) if y is not None else None
llama3 = llama3.repeat(bsz_style + 1, 1, 1, 1) if llama3 is not None else None
img_y0_style = img_y0_style_orig.clone()
if mask is not None and not type(mask[0][0].item()) == bool:
mask = mask.to(x.dtype)
if mask_zero is not None and not type(mask_zero[0][0].item()) == bool:
mask_zero = mask_zero.to(x.dtype)
# prep embeds
t = self.expand_timesteps(t, bsz, x.device)
t = self.t_embedder (t, x.dtype)
clip = t + self.p_embedder(y)
x_embedder_dtype = self.x_embedder.proj.weight.data.dtype
if x_embedder_dtype not in {torch.bfloat16, torch.float16, torch.float32, torch.float64}:
x_embedder_dtype = x.dtype
img_sizes = None
img, img_masks, img_sizes = self.patchify(img, self.max_seq, img_sizes) # for 1024x1024: output is 1,4096,64 None [[64,64]] hidden_states rearranged not shrunk, patch_size 1x1???
if img_masks is None:
pH, pW = img_sizes[0]
img_ids = torch.zeros(pH, pW, 3, device=img.device)
img_ids[..., 1] = img_ids[..., 1] + torch.arange(pH, device=img.device)[:, None]
img_ids[..., 2] = img_ids[..., 2] + torch.arange(pW, device=img.device)[None, :]
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bsz)
img = self.x_embedder(img.to(x_embedder_dtype))
#img_len = img.shape[-2]
if y0_style_active and not HDModel.RECON_MODE:
img_y0_style, _, _ = self.patchify(img_y0_style_orig.clone(), self.max_seq, None) # for 1024x1024: output is 1,4096,64 None [[64,64]] hidden_states rearranged not shrunk, patch_size 1x1???
img_y0_style = self.x_embedder(img_y0_style.to(x_embedder_dtype)) # hidden_states 1,4032,2560 for 1024x1024: -> 1,4096,2560 ,64 -> ,2560 (x40)
img = torch.cat([img, img_y0_style], dim=0)
contexts = self.prepare_contexts(llama3, context, bsz, img.shape[-1])
# txt_ids -> 1,414,3
txt_ids = torch.zeros(bsz, contexts[-1].shape[1] + contexts[-2].shape[1] + contexts[0].shape[1], 3, device=img_ids.device, dtype=img_ids.dtype)
ids = torch.cat((img_ids, txt_ids), dim=-2) # ids -> 1,4446,3
rope = self.pe_embedder(ids) # rope -> 1, 4446, 1, 64, 2, 2
txt_init = torch.cat([contexts[-1], contexts[-2]], dim=-2) # shape[1] == 128, 143 then on another step/call it's 128, 128...??? cuz the contexts is now 1,128,2560
txt_init_len = txt_init.shape[-2] # 271
if mask is not None:
txt_init_list = []
offset_t5_start = 0
for i in range(transformer_options['AttnMask'].num_regions):
offset_t5_end = offset_t5_start + transformer_options['AttnMask'].context_lens_list[i][0]
txt_init_list.append(contexts[-1][:,offset_t5_start:offset_t5_end,:])
offset_t5_start = offset_t5_end
offset_llama_start = 0
for i in range(transformer_options['AttnMask'].num_regions):
offset_llama_end = offset_llama_start + transformer_options['AttnMask'].context_lens_list[i][1]
txt_init_list.append(contexts[-2][:,offset_llama_start:offset_llama_end,:])
offset_llama_start = offset_llama_end
txt_init = torch.cat(txt_init_list, dim=-2) #T5,LLAMA3 (last block)
txt_init_len = txt_init.shape[-2]
img = StyleMMDiT(img, "proj_in")
img = img.to(x) if img is not None else None
# DOUBLE STREAM
for bid, (block, style_block) in enumerate(zip(self.double_stream_blocks, StyleMMDiT.double_blocks)):
txt_llama = contexts[bid]
txt = torch.cat([txt_init, txt_llama], dim=-2) # 1,384,2560 # cur_contexts = T5, LLAMA3 (last block), LLAMA3 (current block)
if weight > 0 and mask is not None and weight < bid/48:
img, txt_init = block(img, img_masks, txt, clip, rope, mask_zero, style_block=style_block)
elif (weight < 0 and mask is not None and abs(weight) < (1 - bid/48)):
img_tmpZ, txt_tmpZ = img.clone(), txt.clone()
# more efficient than the commented lines below being used instead in the loop?
img_tmpZ, txt_init = block(img_tmpZ, img_masks, txt_tmpZ, clip, rope, mask, style_block=style_block)
img , txt_tmpZ = block(img , img_masks, txt , clip, rope, mask_zero, style_block=style_block)
elif floor > 0 and mask is not None and floor > bid/48:
mask_tmp = mask.clone()
mask_tmp[:img_len,:img_len] = 1.0
img, txt_init = block(img, img_masks, txt, clip, rope, mask_tmp, style_block=style_block)
elif floor < 0 and mask is not None and abs(floor) > (1 - bid/48):
mask_tmp = mask.clone()
mask_tmp[:img_len,:img_len] = 1.0
img, txt_init = block(img, img_masks, txt, clip, rope, mask_tmp, style_block=style_block)
elif update_cross_attn is not None and update_cross_attn['skip_cross_attn']:
img, txt_init = block(img, img_masks, txt, clip, rope, mask, update_cross_attn=update_cross_attn)
else:
img, txt_init = block(img, img_masks, txt, clip, rope, mask, update_cross_attn=update_cross_attn, style_block=style_block)
txt_init = txt_init[..., :txt_init_len, :]
# END DOUBLE STREAM
img = torch.cat([img, txt_init], dim=-2) # 4032 + 271 -> 4303 # txt embed from double stream block
joint_len = img.shape[-2]
if img_masks is not None:
img_masks_ones = torch.ones( (bsz, txt_init.shape[-2] + txt_llama.shape[-2]), device=img_masks.device, dtype=img_masks.dtype) # encoder_attention_mask_ones= padding for txt embed concatted onto end of img
img_masks = torch.cat([img_masks, img_masks_ones], dim=-2)
# SINGLE STREAM
for bid, (block, style_block) in enumerate(zip(self.single_stream_blocks, StyleMMDiT.single_blocks)):
txt_llama = contexts[bid+16] # T5 pre-embedded for single stream blocks
img = torch.cat([img, txt_llama], dim=-2) # cat img,txt opposite of flux which is txt,img 4303 + 143 -> 4446
if weight > 0 and mask is not None and weight < (bid+16)/48:
img = block(img, img_masks, None, clip, rope, mask_zero, style_block=style_block)
elif weight < 0 and mask is not None and abs(weight) < (1 - (bid+16)/48):
img = block(img, img_masks, None, clip, rope, mask_zero, style_block=style_block)
elif floor > 0 and mask is not None and floor > (bid+16)/48:
mask_tmp = mask.clone()
mask_tmp[:img_len,:img_len] = 1.0
img = block(img, img_masks, None, clip, rope, mask_tmp, style_block=style_block)
elif floor < 0 and mask is not None and abs(floor) > (1 - (bid+16)/48):
mask_tmp = mask.clone()
mask_tmp[:img_len,:img_len] = 1.0
img = block(img, img_masks, None, clip, rope, mask_tmp, style_block=style_block)
else:
img = block(img, img_masks, None, clip, rope, mask, style_block=style_block)
img = img[..., :joint_len, :] # slice off txt_llama
# END SINGLE STREAM
img = img[..., :img_len, :]
#img = self.final_layer(img, clip) # 4096,2560 -> 4096,64
shift, scale = self.final_layer.adaLN_modulation(clip).chunk(2,dim=1)
img = (1 + scale[:, None, :]) * self.final_layer.norm_final(img) + shift[:, None, :]
if not EO("endojector"):
img = StyleMMDiT(img, "proj_out")
if y0_style_active and not HDModel.RECON_MODE:
img = img[0:1]
if EO("endojector"):
if EO("dumb"):
eps_style = x_init[0:1].to(y0_style) - y0_style
else:
eps_style = (x_tmp[0:1].to(y0_style) - y0_style) / SIGMA.to(y0_style)
eps_embed = self.Endojector.embed(eps_style)
img = StyleMMDiT.scattersort_(img.to(eps_embed), eps_embed)
img = self.final_layer.linear(img.to(self.final_layer.linear.weight.data))
img = self.unpatchify(img, img_sizes)
out_list.append(img)
output = torch.cat(out_list, dim=0)
eps = -output[:, :, :h, :w]
if recon_iter == 1:
denoised = new_x - SIGMA.to(new_x) * eps.to(new_x)
if x_tmp is not None:
eps = (x_tmp - denoised.to(x_tmp)) / SIGMA.to(x_tmp)
else:
eps = (x_orig - denoised.to(x_orig)) / SIGMA.to(x_orig)
freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method")
freqsep_sigma = transformer_options.get("freqsep_sigma")
freqsep_kernel_size = transformer_options.get("freqsep_kernel_size")
freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size")
freqsep_stride = transformer_options.get("freqsep_stride")
freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight")
freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight")
freqsep_mask = transformer_options.get("freqsep_mask")
y0_style_pos = transformer_options.get("y0_style_pos")
y0_style_neg = transformer_options.get("y0_style_neg")
# end recon loop
self.style_dtype = torch.float32 if self.style_dtype is None else self.style_dtype
dtype = eps.dtype if self.style_dtype is None else self.style_dtype
if y0_style_pos is not None:
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight")
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight")
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_pos_mask = transformer_options.get("y0_style_pos_mask")
y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge")
y0_style_pos = y0_style_pos.to(dtype)
x = x_orig.to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised)
y0_adain_embed = self.Retrojector.embed(y0_style_pos)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if EO("scattersort_median_LP"):
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7))
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7))
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad)
denoised_spatial = denoised_spatial_LP + denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
if freqsep_mask is not None:
freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float()
freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact')
if hasattr(self, "adain_tile"):
tile_h, tile_w = self.adain_tile
denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if self.adain_flag:
h_off = tile_h // 2
w_off = tile_w // 2
denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off]
self.adain_flag = False
else:
h_off = 0
w_off = 0
self.adain_flag = True
tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w))
y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w))
tiles_out = []
for i in range(tiles.shape[0]):
tile = tiles[i].unsqueeze(0)
y0_tile = y0_tiles[i].unsqueeze(0)
tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
tile = adain_seq_inplace(tile, y0_tile)
tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w))
tiles_out_tensor = torch.cat(tiles_out, dim=0)
tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides)
if h_off == 0:
denoised_pretile = tiles_out_tensor
else:
denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor
denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"):
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median_pw":
denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight)
elif freqsep_lowpass_method == "gaussian_pw":
denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median":
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size)
elif freqsep_lowpass_method == "gaussian":
denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
if EO("adain_fs_uhp"):
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP
y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP
#denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP
denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP
denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
else:
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
if transformer_options.get('y0_standard_guide') is not None:
y0_standard_guide = transformer_options.get('y0_standard_guide')
y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide)
f_cs = self.StyleWCT.get(y0_standard_guide_embed)
self.y0_standard_guide = self.Retrojector.unembed(f_cs)
if transformer_options.get('y0_inv_standard_guide') is not None:
y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide')
y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide)
f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed)
self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs)
elif transformer_options['y0_style_method'] == "WCT2":
self.WaveletStyleWCT.set(y0_adain_embed, h_len, w_len)
denoised_embed = self.WaveletStyleWCT.get(denoised_embed, h_len, w_len)
if transformer_options.get('y0_standard_guide') is not None:
y0_standard_guide = transformer_options.get('y0_standard_guide')
y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide)
f_cs = self.WaveletStyleWCT.get(y0_standard_guide_embed, h_len, w_len)
self.y0_standard_guide = self.Retrojector.unembed(f_cs)
if transformer_options.get('y0_inv_standard_guide') is not None:
y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide')
y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide)
f_cs = self.WaveletStyleWCT.get(y0_inv_standard_guide_embed, h_len, w_len)
self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs)
denoised_approx = self.Retrojector.unembed(denoised_embed)
eps = (x - denoised_approx) / sigma
if not UNCOND:
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
else:
eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
elif eps.shape[0] == 1 and UNCOND:
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
#eps = eps.float()
if y0_style_neg is not None:
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight")
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight")
y0_style_neg_synweight *= y0_style_neg_weight
y0_style_neg_mask = transformer_options.get("y0_style_neg_mask")
y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge")
y0_style_neg = y0_style_neg.to(dtype)
x = x_orig.to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised)
y0_adain_embed = self.Retrojector.embed(y0_style_neg)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
elif transformer_options['y0_style_method'] == "WCT2":
self.WaveletStyleWCT.set(y0_adain_embed, h_len, w_len)
denoised_embed = self.WaveletStyleWCT.get(denoised_embed, h_len, w_len)
denoised_approx = self.Retrojector.unembed(denoised_embed)
if UNCOND:
eps = (x - denoised_approx) / sigma
eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
elif eps.shape[0] == 1 and not UNCOND:
eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0])
#eps = eps.float()
if EO("model_eps_out"):
self.eps_out = eps.clone()
return eps
def expand_timesteps(self, t, batch_size, device):
if not torch.is_tensor(t):
is_mps = device.type == "mps"
if isinstance(t, float):
dtype = torch.float32 if is_mps else torch.float64
else:
dtype = torch.int32 if is_mps else torch.int64
t = Tensor([t], dtype=dtype, device=device)
elif len(t.shape) == 0:
t = t[None].to(device)
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
t = t.expand(batch_size)
return t
def unpatchify(self, x: Tensor, img_sizes: List[Tuple[int, int]]) -> List[Tensor]:
x_arr = []
for i, img_size in enumerate(img_sizes): # [[64,64]]
pH, pW = img_size
x_arr.append(
einops.rearrange(x[i, :pH*pW].reshape(1, pH, pW, -1), 'B H W (p1 p2 C) -> B C (H p1) (W p2)',
p1=self.patch_size, p2=self.patch_size)
)
x = torch.cat(x_arr, dim=0)
return x
def patchify(self, x, max_seq, img_sizes=None):
pz2 = self.patch_size * self.patch_size
if isinstance(x, Tensor):
B = x.shape[0]
device = x.device
dtype = x.dtype
else:
B = len(x)
device = x[0].device
dtype = x[0].dtype
x_masks = torch.zeros((B, max_seq), dtype=dtype, device=device)
if img_sizes is not None:
for i, img_size in enumerate(img_sizes): # [[64,64]]
x_masks[i, 0:img_size[0] * img_size[1]] = 1
x = einops.rearrange(x, 'B C S p -> B S (p C)', p=pz2)
elif isinstance(x, Tensor):
pH, pW = x.shape[-2] // self.patch_size, x.shape[-1] // self.patch_size
x = einops.rearrange(x, 'B C (H p1) (W p2) -> B (H W) (p1 p2 C)', p1=self.patch_size, p2=self.patch_size)
img_sizes = [[pH, pW]] * B
x_masks = None
else:
raise NotImplementedError
return x, x_masks, img_sizes
def clone_inputs(*args, index: int=None):
if index is None:
return tuple(x.clone() for x in args)
else:
return tuple(x[index].unsqueeze(0).clone() for x in args)
def attention_rescale(
query,
key,
value,
attn_mask=None
) -> torch.Tensor:
L, S = query.size(-2), key.size(-2)
scale_factor = 1 / math.sqrt(query.size(-1))
attn_weight = query @ key.transpose(-2, -1) * scale_factor
if attn_mask is not None:
attn_weight *= attn_mask
attn_weight = torch.softmax(attn_weight, dim=-1)
return attn_weight @ value
class HDLastLayer(nn.Module):
def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None):
super().__init__()
self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device)
self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device))
def forward(self, x: Tensor, vec: Tensor, modulation_dims=None) -> Tensor:
x_dtype = x.dtype
dtype = self.linear.weight.dtype
if dtype not in {torch.bfloat16, torch.float16, torch.float32, torch.float64}:
dtype = torch.float32
self.linear.weight.data = self.linear.weight.data.to(dtype)
self.linear.bias.data = self.linear.bias.data.to(dtype)
self.adaLN_modulation[1].weight.data = self.adaLN_modulation[1].weight.data.to(dtype)
self.adaLN_modulation[1].bias.data = self.adaLN_modulation[1].bias.data.to(dtype)
x = x.to(dtype)
vec = vec.to(dtype)
if vec.ndim == 2:
vec = vec[:, None, :]
shift, scale = self.adaLN_modulation(vec).chunk(2, dim=-1)
x = apply_mod(self.norm_final(x), (1 + scale), shift, modulation_dims)
x = self.linear(x)
return x #.to(x_dtype)
def apply_mod(tensor, m_mult, m_add=None, modulation_dims=None):
if modulation_dims is None:
if m_add is not None:
return tensor * m_mult + m_add
else:
return tensor * m_mult
else:
for d in modulation_dims:
tensor[:, d[0]:d[1]] *= m_mult[:, d[2]]
if m_add is not None:
tensor[:, d[0]:d[1]] += m_add[:, d[2]]
return tensor
================================================
FILE: images.py
================================================
import torch
import torch.nn.functional as F
import math
from torchvision import transforms
from torch import Tensor
from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar, List
import numpy as np
import kornia
import cv2
from PIL import Image, ImageFilter, ImageEnhance
import comfy
# tensor -> PIL
def tensor2pil(image):
return Image.fromarray(np.clip(255. * image.cpu().numpy().squeeze(), 0, 255).astype(np.uint8))
# PIL -> tensor
def pil2tensor(image):
return torch.from_numpy(np.array(image).astype(np.float32) / 255.0).unsqueeze(0)
def freq_sep_fft(img, cutoff=5, sigma=10):
fft_img = torch.fft.fft2(img, dim=(-2, -1))
fft_shifted = torch.fft.fftshift(fft_img)
_, _, h, w = img.shape
# freq domain -> meshgrid
y, x = torch.meshgrid(torch.arange(h, device=img.device), torch.arange(w, device=img.device))
center_y, center_x = h // 2, w // 2
distance = torch.sqrt((x - center_x) ** 2 + (y - center_y) ** 2)
# smoother low-pass filter via gaussian filter
low_pass_filter = torch.exp(-distance**2 / (2 * sigma**2))
low_pass_filter = low_pass_filter.unsqueeze(0).unsqueeze(0)
low_pass_fft = fft_shifted * low_pass_filter
high_pass_fft = fft_shifted * (1 - low_pass_filter)
# inverse FFT -> return to spatial domain
low_pass_img = torch.fft.ifft2(torch.fft.ifftshift( low_pass_fft), dim=(-2, -1)).real
high_pass_img = torch.fft.ifft2(torch.fft.ifftshift(high_pass_fft), dim=(-2, -1)).real
return low_pass_img, high_pass_img
def color_dodge_blend(base, blend):
return torch.clamp(base / (1 - blend + 1e-8), 0, 1)
def color_scorch_blend(base, blend):
return torch.clamp(1 - (1 - base) / (1 - blend + 1e-8), 0, 1)
def divide_blend(base, blend):
return torch.clamp(base / (blend + 1e-8), 0, 1)
def color_burn_blend(base, blend):
return torch.clamp(1 - (1 - base) / (blend + 1e-8), 0, 1)
def hard_light_blend(base, blend):
return torch.where(blend <= 0.5,
2 * base * blend,
1 - 2 * (1 - base) * (1 - blend))
def hard_light_freq_sep(original, low_pass):
high_pass = (color_burn_blend(original, (1 - low_pass)) + divide_blend(original, low_pass)) / 2
return high_pass
def linear_light_blend(base, blend):
return torch.where(blend <= 0.5,
base + 2 * blend - 1,
base + 2 * (blend - 0.5))
def linear_light_freq_sep(base, blend):
return (base + (1-blend)) / 2
def scale_to_range(value, min_old, max_old, min_new, max_new):
return (value - min_old) / (max_old - min_old) * (max_new - min_new) + min_new
def normalize_lab(lab_image):
L, A, B = lab_image[:, 0:1, :, :], lab_image[:, 1:2, :, :], lab_image[:, 2:3, :, :]
L_normalized = L / 100.0
A_normalized = scale_to_range(A, -128, 127, 0, 1)
B_normalized = scale_to_range(B, -128, 127, 0, 1)
lab_normalized = torch.cat([L_normalized, A_normalized, B_normalized], dim=1)
return lab_normalized
def denormalize_lab(lab_normalized):
L_normalized, A_normalized, B_normalized = torch.split(lab_normalized, 1, dim=1)
L = L_normalized * 100.0
A = scale_to_range(A_normalized, 0, 1, -128, 127)
B = scale_to_range(B_normalized, 0, 1, -128, 127)
lab_image = torch.cat([L, A, B], dim=1)
return lab_image
def rgb_to_lab(image):
return kornia.color.rgb_to_lab(image)
def lab_to_rgb(image):
return kornia.color.lab_to_rgb(image)
# cv2_layer() and ImageMedianBlur adapted from: https://github.com/Nourepide/ComfyUI-Allor/
def cv2_layer(tensor, function):
"""
This function applies a given function to each channel of an input tensor and returns the result as a PyTorch tensor.
:param tensor: A PyTorch tensor of shape (H, W, C) or (N, H, W, C), where C is the number of channels, H is the height, and W is the width of the image.
:param function: A function that takes a numpy array of shape (H, W, C) as input and returns a numpy array of the same shape.
:return: A PyTorch tensor of the same shape as the input tensor, where the given function has been applied to each channel of each image in the tensor.
"""
shape_size = tensor.shape.__len__()
def produce(image):
channels = image[0, 0, :].shape[0]
rgb = image[:, :, 0:3].numpy()
result_rgb = function(rgb)
if channels <= 3:
return torch.from_numpy(result_rgb)
elif channels == 4:
alpha = image[:, :, 3:4].numpy()
result_alpha = function(alpha)[..., np.newaxis]
result_rgba = np.concatenate((result_rgb, result_alpha), axis=2)
return torch.from_numpy(result_rgba)
if shape_size == 3:
return torch.from_numpy(produce(tensor))
elif shape_size == 4:
return torch.stack([
produce(tensor[i]) for i in range(len(tensor))
])
else:
raise ValueError("Incompatible tensor dimension.")
# adapted from https://github.com/cubiq/ComfyUI_essentials
def image_resize(image,
width,
height,
method = "stretch",
interpolation = "nearest",
condition = "always",
multiple_of = 0,
keep_proportion = False):
_, oh, ow, _ = image.shape
x = y = x2 = y2 = 0
pad_left = pad_right = pad_top = pad_bottom = 0
if keep_proportion:
method = "keep proportion"
if multiple_of > 1:
width = width - (width % multiple_of)
height = height - (height % multiple_of)
if method == 'keep proportion' or method == 'pad':
if width == 0 and oh < height:
width = MAX_RESOLUTION
elif width == 0 and oh >= height:
width = ow
if height == 0 and ow < width:
height = MAX_RESOLUTION
elif height == 0 and ow >= width:
height = oh
ratio = min(width / ow, height / oh)
new_width = round(ow*ratio)
new_height = round(oh*ratio)
if method == 'pad':
pad_left = (width - new_width) // 2
pad_right = width - new_width - pad_left
pad_top = (height - new_height) // 2
pad_bottom = height - new_height - pad_top
width = new_width
height = new_height
elif method.startswith('fill'):
width = width if width > 0 else ow
height = height if height > 0 else oh
ratio = max(width / ow, height / oh)
new_width = round(ow*ratio)
new_height = round(oh*ratio)
x = (new_width - width) // 2
y = (new_height - height) // 2
x2 = x + width
y2 = y + height
if x2 > new_width:
x -= (x2 - new_width)
if x < 0:
x = 0
if y2 > new_height:
y -= (y2 - new_height)
if y < 0:
y = 0
width = new_width
height = new_height
else:
width = width if width > 0 else ow
height = height if height > 0 else oh
if "always" in condition \
or ("downscale if bigger" == condition and (oh > height or ow > width)) or ("upscale if smaller" == condition and (oh < height or ow < width)) \
or ("bigger area" in condition and (oh * ow > height * width)) or ("smaller area" in condition and (oh * ow < height * width)):
outputs = image.permute(0,3,1,2)
if interpolation == "lanczos":
outputs = comfy.utils.lanczos(outputs, width, height)
else:
outputs = F.interpolate(outputs, size=(height, width), mode=interpolation)
if method == 'pad':
if pad_left > 0 or pad_right > 0 or pad_top > 0 or pad_bottom > 0:
outputs = F.pad(outputs, (pad_left, pad_right, pad_top, pad_bottom), value=0)
outputs = outputs.permute(0,2,3,1)
if method.startswith('fill'):
if x > 0 or y > 0 or x2 > 0 or y2 > 0:
outputs = outputs[:, y:y2, x:x2, :]
else:
outputs = image
if multiple_of > 1 and (outputs.shape[2] % multiple_of != 0 or outputs.shape[1] % multiple_of != 0):
width = outputs.shape[2]
height = outputs.shape[1]
x = (width % multiple_of) // 2
y = (height % multiple_of) // 2
x2 = width - ((width % multiple_of) - x)
y2 = height - ((height % multiple_of) - y)
outputs = outputs[:, y:y2, x:x2, :]
outputs = torch.clamp(outputs, 0, 1)
return outputs
class ImageRepeatTileToSize:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"image": ("IMAGE",),
"width": ("INT", {"default": 1024, "min": 1, "max": 1048576, "step": 1,}),
"height": ("INT", {"default": 1024, "min": 1, "max": 1048576, "step": 1,}),
"crop": ("BOOLEAN", {"default": True}),
},
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("image",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, image, width, height, crop,
method = "stretch",
interpolation = "lanczos",
condition = "always",
multiple_of = 0,
keep_proportion = False,
):
img = image.clone().detach()
b, h, w, c = img.shape
h_tgt = int(torch.ceil(torch.div(height, h)))
w_tgt = int(torch.ceil(torch.div(width, w)))
img_tiled = torch.tile(img, (h_tgt, w_tgt, 1))
if crop:
img_tiled = img_tiled[:,:height, :width, :]
else:
img_tiled = image_resize(img_tiled, width, height, method, interpolation, condition, multiple_of, keep_proportion)
return (img_tiled,)
# Rewrite of the WAS Film Grain node, much improved speed and efficiency (https://github.com/WASasquatch/was-node-suite-comfyui)
class Film_Grain:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"image": ("IMAGE",),
"density": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 1.0, "step": 0.01}),
"intensity": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 1.0, "step": 0.01}),
"highlights": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 255.0, "step": 0.01}),
"supersample_factor": ("INT", {"default": 4, "min": 1, "max": 8, "step": 1}),
"repeats": ("INT", {"default": 1, "min": 1, "max": 1000, "step": 1})
}
}
RETURN_TYPES = ("IMAGE",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, image, density, intensity, highlights, supersample_factor, repeats=1):
image = image.repeat(repeats, 1, 1, 1)
return (self.apply_film_grain(image, density, intensity, highlights, supersample_factor), )
def apply_film_grain(self, img, density=0.1, intensity=1.0, highlights=1.0, supersample_factor=4):
img_batch = img.clone()
img_list = []
for i in range(img_batch.shape[0]):
img = img_batch[i].unsqueeze(0)
img = tensor2pil(img)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# apply grayscale noise with specified density/intensity/highlights to PIL image
img_gray = img.convert('L')
original_size = img.size
img_gray = img_gray.resize(
((img.size[0] * supersample_factor), (img.size[1] * supersample_factor)), Image.Resampling(2))
num_pixels = int(density * img_gray.size[0] * img_gray.size[1])
img_gray_tensor = torch.from_numpy(np.array(img_gray).astype(np.float32) / 255.0).to(device)
img_gray_flat = img_gray_tensor.view(-1)
num_pixels = int(density * img_gray_flat.numel())
indices = torch.randint(0, img_gray_flat.numel(), (num_pixels,), device=img_gray_flat.device)
values = torch.randint(0, 256, (num_pixels,), device=img_gray_flat.device, dtype=torch.float32) / 255.0
img_gray_flat[indices] = values
img_gray = img_gray_flat.view(img_gray_tensor.shape)
img_gray_np = (img_gray.cpu().numpy() * 255).astype(np.uint8)
img_gray = Image.fromarray(img_gray_np)
img_noise = img_gray.convert('RGB')
img_noise = img_noise.filter(ImageFilter.GaussianBlur(radius=0.125))
img_noise = img_noise.resize(original_size, Image.Resampling(1))
img_noise = img_noise.filter(ImageFilter.EDGE_ENHANCE_MORE)
img_final = Image.blend(img, img_noise, intensity)
enhancer = ImageEnhance.Brightness(img_final)
img_highlights = enhancer.enhance(highlights)
img_list.append(pil2tensor(img_highlights).squeeze(dim=0))
img_highlights = torch.stack(img_list, dim=0)
return img_highlights
class Image_Grain_Add:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"image": ("IMAGE",),
"weight": ("FLOAT", {"default": 0.5, "min": -10000.0, "max": 10000.0, "step": 0.01}),
#"density": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 1.0, "step": 0.01}),
#"intensity": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 1.0, "step": 0.01}),
#"highlights": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 255.0, "step": 0.01}),
#"supersample_factor": ("INT", {"default": 4, "min": 1, "max": 8, "step": 1}),
#"repeats": ("INT", {"default": 1, "min": 1, "max": 1000, "step": 1})
}
}
RETURN_TYPES = ("IMAGE",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, image, weight=0.5, density=1.0, intensity=1.0, highlights=1.0, supersample_factor=1.0, repeats=1):
image = image.repeat(repeats, 1, 1, 1)
image_grain = self.apply_film_grain(image, density, intensity, highlights, supersample_factor)
return (image + weight * (hard_light_blend(image_grain, image) - image), )
def apply_film_grain(self, img, density=0.1, intensity=1.0, highlights=1.0, supersample_factor=4):
img_batch = img.clone()
img_list = []
for i in range(img_batch.shape[0]):
img = img_batch[i].unsqueeze(0)
img = tensor2pil(img)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# apply grayscale noise with specified density/intensity/highlights to PIL image
img_gray = img.convert('L')
original_size = img.size
img_gray = img_gray.resize(
((img.size[0] * supersample_factor), (img.size[1] * supersample_factor)), Image.Resampling(2))
num_pixels = int(density * img_gray.size[0] * img_gray.size[1])
img_gray_tensor = torch.from_numpy(np.array(img_gray).astype(np.float32) / 255.0).to(device)
img_gray_flat = img_gray_tensor.view(-1)
num_pixels = int(density * img_gray_flat.numel())
indices = torch.randint(0, img_gray_flat.numel(), (num_pixels,), device=img_gray_flat.device)
values = torch.randint(0, 256, (num_pixels,), device=img_gray_flat.device, dtype=torch.float32) / 255.0
img_gray_flat[indices] = values
img_gray = img_gray_flat.view(img_gray_tensor.shape)
img_gray_np = (img_gray.cpu().numpy() * 255).astype(np.uint8)
img_gray = Image.fromarray(img_gray_np)
img_noise = img_gray.convert('RGB')
img_noise = img_noise.filter(ImageFilter.GaussianBlur(radius=0.125))
img_noise = img_noise.resize(original_size, Image.Resampling(1))
img_noise = img_noise.filter(ImageFilter.EDGE_ENHANCE_MORE)
img_final = Image.blend(img, img_noise, intensity)
enhancer = ImageEnhance.Brightness(img_final)
img_highlights = enhancer.enhance(highlights)
img_list.append(pil2tensor(img_highlights).squeeze(dim=0))
img_highlights = torch.stack(img_list, dim=0)
return img_highlights
class Frequency_Separation_Hard_Light:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"optional": {
"high_pass": ("IMAGE",),
"original": ("IMAGE",),
"low_pass": ("IMAGE",),
},
"required": {
},
}
RETURN_TYPES = ("IMAGE","IMAGE","IMAGE",)
RETURN_NAMES = ("high_pass", "original", "low_pass",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, high_pass=None, original=None, low_pass=None):
if high_pass is None:
high_pass = hard_light_freq_sep(original.to(torch.float64).to('cuda'), low_pass.to(torch.float64).to('cuda'))
if original is None:
original = hard_light_blend(low_pass.to(torch.float64).to('cuda'), high_pass.to(torch.float64).to('cuda'))
return (high_pass, original, low_pass,)
class Frequency_Separation_Hard_Light_LAB:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"optional": {
"high_pass": ("IMAGE",),
"original": ("IMAGE",),
"low_pass": ("IMAGE",),
},
"required": {
},
}
RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE",)
RETURN_NAMES = ("high_pass", "original", "low_pass",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, high_pass=None, original=None, low_pass=None):
if original is not None:
lab_original = rgb_to_lab(original.to(torch.float64).permute(0, 3, 1, 2))
lab_original_normalized = normalize_lab(lab_original)
if low_pass is not None:
lab_low_pass = rgb_to_lab(low_pass.to(torch.float64).permute(0, 3, 1, 2))
lab_low_pass_normalized = normalize_lab(lab_low_pass)
if high_pass is not None:
lab_high_pass = rgb_to_lab(high_pass.to(torch.float64).permute(0, 3, 1, 2))
lab_high_pass_normalized = normalize_lab(lab_high_pass)
#original_l = lab_original_normalized[:, :1, :, :]
#low_pass_l = lab_low_pass_normalized[:, :1, :, :]
if high_pass is None:
lab_high_pass_normalized = hard_light_freq_sep(lab_original_normalized.permute(0, 2, 3, 1), lab_low_pass_normalized.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
lab_high_pass = denormalize_lab(lab_high_pass_normalized)
high_pass = lab_to_rgb(lab_high_pass).permute(0, 2, 3, 1)
if original is None:
lab_original_normalized = hard_light_blend(lab_low_pass_normalized.permute(0, 2, 3, 1), lab_high_pass_normalized.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
lab_original = denormalize_lab(lab_original_normalized)
original = lab_to_rgb(lab_original).permute(0, 2, 3, 1)
return (high_pass, original, low_pass)
class Frame_Select:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"frames": ("IMAGE",),
"select": ("INT", {"default": 0, "min": 0, "max": 10000}),
},
"optional": {
},
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("image",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, frames=None, select=0):
frame = frames[select].unsqueeze(0).clone()
return (frame,)
class Frames_Slice:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"frames": ("IMAGE",),
"start": ("INT", {"default": 0, "min": 0, "max": 10000}),
"stop": ("INT", {"default": 1, "min": 1, "max": 10000}),
},
"optional": {
},
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("image",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, frames=None, start=0, stop=1):
frames_slice = frames[start:stop].clone()
return (frames_slice,)
class Frames_Concat:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"frames_0": ("IMAGE",),
"frames_1": ("IMAGE",),
},
"optional": {
},
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("image",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, frames_0, frames_1):
frames_concat = torch.cat((frames_0, frames_1), dim=0).squeeze(0).clone()
return (frames_concat,)
class Image_Channels_LAB:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"optional": {
"RGB": ("IMAGE",),
"L": ("IMAGE",),
"A": ("IMAGE",),
"B": ("IMAGE",),
},
"required": {
},
}
RETURN_TYPES = ("IMAGE","IMAGE","IMAGE","IMAGE",)
RETURN_NAMES = ("RGB","L","A","B",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, RGB=None, L=None, A=None, B=None):
if RGB is not None:
LAB = rgb_to_lab(RGB.to(torch.float64).permute(0, 3, 1, 2))
L, A, B = LAB[:, 0:1, :, :], LAB[:, 1:2, :, :], LAB[:, 2:3, :, :]
else:
LAB = torch.cat([L,A,B], dim=1)
RGB = lab_to_rgb(LAB.to(torch.float64)).permute(0,2,3,1)
return (RGB, L, A, B,)
class Frequency_Separation_Vivid_Light:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"optional": {
"high_pass": ("IMAGE",),
"original": ("IMAGE",),
"low_pass": ("IMAGE",),
},
"required": {
},
}
RETURN_TYPES = ("IMAGE","IMAGE","IMAGE",)
RETURN_NAMES = ("high_pass", "original", "low_pass",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, high_pass=None, original=None, low_pass=None):
if high_pass is None:
high_pass = hard_light_freq_sep(low_pass.to(torch.float64), original.to(torch.float64))
if original is None:
original = hard_light_blend(high_pass.to(torch.float64), low_pass.to(torch.float64))
return (high_pass, original, low_pass,)
class Frequency_Separation_Linear_Light:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"optional": {
"high_pass": ("IMAGE",),
"original": ("IMAGE",),
"low_pass": ("IMAGE",),
},
"required": {
},
}
RETURN_TYPES = ("IMAGE","IMAGE","IMAGE",)
RETURN_NAMES = ("high_pass", "original", "low_pass",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, high_pass=None, original=None, low_pass=None):
if high_pass is None:
high_pass = linear_light_freq_sep(original.to(torch.float64).to('cuda'), low_pass.to(torch.float64).to('cuda'))
if original is None:
original = linear_light_blend(low_pass.to(torch.float64).to('cuda'), high_pass.to(torch.float64).to('cuda'))
return (high_pass, original, low_pass,)
class Frequency_Separation_FFT:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"optional": {
"high_pass": ("IMAGE",),
"original": ("IMAGE",),
"low_pass": ("IMAGE",),
},
"required": {
"cutoff": ("FLOAT", {"default": 5.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"sigma": ("FLOAT", {"default": 5.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
},
}
RETURN_TYPES = ("IMAGE","IMAGE","IMAGE",)
RETURN_NAMES = ("high_pass", "original", "low_pass",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, high_pass=None, original=None, low_pass=None, cutoff=5.0, sigma=5.0):
if high_pass is None:
low_pass, high_pass = freq_sep_fft(original.to(torch.float64), cutoff=cutoff, sigma=sigma)
if original is None:
original = low_pass + high_pass
return (high_pass, original, low_pass,)
class ImageSharpenFS:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"images": ("IMAGE",),
#"method": (["hard", "linear", "vivid"], {"default": "hard"}),
"method": (["hard", "linear"], {"default": "hard"}),
"type": (["median", "gaussian"], {"default": "median"}),
"intensity": ("INT", {"default": 6, "min": 1, "step": 1,
}),
},
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("image",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, images, method, type, intensity):
match type:
case "median":
IB = ImageMedianBlur()
case "gaussian":
IB = ImageGaussianBlur()
match method:
case "hard":
FS = Frequency_Separation_Hard_Light()
case "linear":
FS = Frequency_Separation_Linear_Light()
img_lp = IB.main(images, intensity)
fs_hp, fs_orig, fs_lp = FS.main(None, images, *img_lp)
_, img_sharpened, _ = FS.main(high_pass=fs_hp, original=None, low_pass=images)
return (img_sharpened,)
class ImageMedianBlur:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"images": ("IMAGE",),
"size": ("INT", {"default": 6, "min": 1, "step": 1,}),
},
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("image",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, images, size):
size -= 1
img = images.clone().detach()
img = (img * 255).to(torch.uint8)
return ((cv2_layer(img, lambda x: cv2.medianBlur(x, size)) / 255),)
class ImageGaussianBlur:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"images": ("IMAGE",),
"size": ("INT", {"default": 6, "min": 1, "step": 1,}),
},
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("image",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, images, size):
size -= 1
img = images.clone().detach()
img = (img * 255).to(torch.uint8)
return ((cv2_layer(img, lambda x: cv2.GaussianBlur(x, (size, size), 0)) / 255),)
def fast_smudge_blur_comfyui(img, kernel_size=51):
img = img.to('cuda').float()
# (b, h, w, c) to (b, c, h, w)
img = img.permute(0, 3, 1, 2)
num_channels = img.shape[1]
box_kernel_1d = torch.ones(num_channels, 1, kernel_size, device=img.device, dtype=img.dtype) / kernel_size
# apply box blur separately in horizontal and vertical directions
blurred_img = F.conv2d( img, box_kernel_1d.unsqueeze(2), padding=kernel_size // 2, groups=num_channels)
blurred_img = F.conv2d(blurred_img, box_kernel_1d.unsqueeze(3), padding=kernel_size // 2, groups=num_channels)
# (b, c, h, w) to (b, h, w, c)
blurred_img = blurred_img.permute(0, 2, 3, 1)
return blurred_img
class FastSmudgeBlur:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"images": ("IMAGE",),
"kernel_size": ("INT", {"default": 51, "min": 1, "step": 1,}),
},
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("image",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, images, kernel_size):
img = images.clone().detach().to('cuda').float()
# (b, h, w, c) to (b, c, h, w)
img = img.permute(0, 3, 1, 2)
num_channels = img.shape[1]
# box blur kernel (separable convolution)
box_kernel_1d = torch.ones(num_channels, 1, kernel_size, device=img.device, dtype=img.dtype) / kernel_size
padding_size = kernel_size // 2
# apply box blur in horizontal/vertical dim separately
blurred_img = F.conv2d(
img, box_kernel_1d.unsqueeze(2), padding=(padding_size, 0), groups=num_channels
)
blurred_img = F.conv2d(
blurred_img, box_kernel_1d.unsqueeze(3), padding=(0, padding_size), groups=num_channels
)
# (b, c, h, w) to (b, h, w, c)
blurred_img = blurred_img.permute(0, 2, 3, 1)
return (blurred_img,)
class Image_Pair_Split:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"img_pair": ("IMAGE",),
}
}
RETURN_TYPES = ("IMAGE","IMAGE",)
RETURN_NAMES = ("img_0","img_1",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, img_pair):
img_0, img_1 = img_pair.chunk(2, dim=0)
return (img_0, img_1,)
class Image_Crop_Location_Exact:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"image": ("IMAGE",),
"x": ("INT", {"default": 0, "max": 10000000, "min": 0, "step": 1}),
"y": ("INT", {"default": 0, "max": 10000000, "min": 0, "step": 1}),
"width": ("INT", {"default": 256, "max": 10000000, "min": 1, "step": 1}),
"height": ("INT", {"default": 256, "max": 10000000, "min": 1, "step": 1}),
"edge": (["original", "short", "long"],),
}
}
RETURN_TYPES = ("IMAGE", "CROP_DATA",)
RETURN_NAMES = ("image", "crop_data",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
def main(self, image, x=0, y=0, width=256, height=256, edge="original"):
if image.dim() != 4:
raise ValueError("Expected a 4D tensor (batch, channels, height, width).")
if edge == "short":
side = width if width < height else height
width, height = side, side
if edge == "long":
side = width if width > height else height
width, height = side, side
batch_size, img_height, img_width, channels = image.size()
crop_left = max(x, 0)
crop_top = max(y, 0)
crop_right = min(x + width, img_width)
crop_bottom = min(y + height, img_height)
crop_width = crop_right - crop_left
crop_height = crop_bottom - crop_top
if crop_width <= 0 or crop_height <= 0:
raise ValueError("Invalid crop dimensions. Please check the values for x, y, width, and height.")
cropped_image = image[:, crop_top:crop_bottom, crop_left:crop_right, :]
crop_data = ((crop_width, crop_height), (crop_left, crop_top, crop_right, crop_bottom))
return cropped_image, crop_data
class Masks_Unpack4:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"masks": ("MASK",),
}
}
RETURN_TYPES = ("MASK","MASK","MASK","MASK",)
RETURN_NAMES = ("masks","masks","masks","masks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
DESCRIPTION = "Unpack a list of masks into separate outputs."
def main(self, masks,):
return (*masks,)
class Masks_Unpack8:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"masks": ("MASK",),
}
}
RETURN_TYPES = ("MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK",)
RETURN_NAMES = ("masks","masks","masks","masks","masks","masks","masks","masks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
DESCRIPTION = "Unpack a list of masks into separate outputs."
def main(self, masks,):
return (*masks,)
class Masks_Unpack16:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"masks": ("MASK",),
}
}
RETURN_TYPES = ("MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK",)
RETURN_NAMES = ("masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
DESCRIPTION = "Unpack a list of masks into separate outputs."
def main(self, masks,):
return (*masks,)
class Image_Get_Color_Swatches:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"image_color_swatches": ("IMAGE",),
}
}
RETURN_TYPES = ("COLOR_SWATCHES",)
RETURN_NAMES = ("color_swatches",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
DESCRIPTION = "Get color swatches, in the order they appear, from top to bottom, in an input image. For use with color masks."
def main(self, image_color_swatches):
rgb = (image_color_swatches * 255).round().clamp(0, 255).to(torch.uint8)
color_swatches = read_swatch_colors(rgb.squeeze().numpy(), min_fraction=0.01)
#color_swatches = read_swatch_colors(rgb.squeeze().numpy(), ignore=(255,255,255), min_fraction=0.01)
return (color_swatches,)
class Masks_From_Color_Swatches:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"image_color_mask": ("IMAGE",),
"color_swatches": ("COLOR_SWATCHES",),
}
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("masks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
DESCRIPTION = "Create masks from a multicolor image using color swatches to identify regions. Returns them as a list."
def main(self, image_color_mask, color_swatches):
rgb = (image_color_mask * 255).round().clamp(0, 255).to(torch.uint8)
masks = build_masks_from_swatch(rgb.squeeze().numpy(), color_swatches, tol=8)
masks = cleanup_and_fill_masks(masks)
masks = torch.stack(masks, dim=0).unsqueeze(1)
return (masks,)
class Masks_From_Colors:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"image_color_swatches": ("IMAGE",),
"image_color_mask": ("IMAGE",),
}
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("masks",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
DESCRIPTION = "Create masks from a multicolor image using color swatches to identify regions. Returns them as a list."
def main(self, image_color_swatches, image_color_mask, ):
rgb = (image_color_swatches * 255).round().clamp(0, 255).to(torch.uint8)
color_swatches = read_swatch_colors(rgb.squeeze().numpy(), min_fraction=0.01)
#color_swatches = read_swatch_colors(rgb.squeeze().numpy(), ignore=(255,255,255), min_fraction=0.01)
rgb = (image_color_mask * 255).round().clamp(0, 255).to(torch.uint8)
masks = build_masks_from_swatch(rgb.squeeze().numpy(), color_swatches, tol=8)
masks = cleanup_and_fill_masks(masks)
original_len = len(masks)
masks = [m for m in masks if m.sum() != 0]
removed = original_len - len(masks)
print(f"Removed {removed} empty masks.")
masks = torch.stack(masks, dim=0).unsqueeze(1)
return (masks,)
from PIL import Image
import numpy as np
def read_swatch_colors(
img,
ignore: Tuple[int,int,int] = (-1,-1,-1),
min_fraction: float = 0.2
) -> List[Tuple[int,int,int]]:
"""
1. Load swatch, RGB.
2. Count every unique color (except `ignore`).
3. Discard any color whose count < (min_fraction * largest_count).
4. Sort the remaining by their first y-position (top→bottom).
"""
H, W, _ = img.shape
flat = img.reshape(-1,3)
# count all colors
colors, counts = np.unique(flat, axis=0, return_counts=True)
# build list of (color, count), skipping white
cc = [
(tuple(c.tolist()), cnt)
for c, cnt in zip(colors, counts)
if tuple(c.tolist()) != ignore
]
if not cc:
return []
# find largest band size
max_cnt = max(cnt for _,cnt in cc)
# filter by relative size
kept = [c for c,cnt in cc if cnt >= max_cnt * min_fraction]
# find first‐y for each kept color
first_y = {}
for color in kept:
# mask of where that color lives
mask = np.all(img == color, axis=-1)
ys, xs = np.nonzero(mask)
first_y[color] = int(np.min(ys))
# sort top→bottom
kept.sort(key=lambda c: first_y[c])
return kept
import numpy as np
import torch
from typing import List, Tuple
from PIL import Image
def build_masks_from_swatch(
mask_img: np.ndarray,
swatch_colors: List[Tuple[int,int,int]],
tol: int = 8
) -> List[torch.Tensor]:
"""
1. Normalize mask_img → uint8 H×W×3 (handles float [0,1] or [0,255], channel-first too).
2. Bin every pixel into buckets of size `tol`.
3. Detect user-painted region (non-black).
4. In swatch order, claim all exact matches (first-wins).
5. Fill in any *painted but unclaimed* pixel by nearest‐swatch in RGB distance.
Returns a list of BoolTensors [H,W], one per swatch color.
"""
# --- 1) ensure H×W×3 uint8 ---
img = mask_img
# channel-first → channel-last
if img.ndim == 3 and img.shape[0] == 3:
img = np.transpose(img, (1,2,0))
# float → uint8
if np.issubdtype(img.dtype, np.floating):
m = img.max()
if m <= 1.01:
img = (img * 255.0).round()
else:
img = img.round()
img = img.clip(0,255).astype(np.uint8)
H, W, _ = img.shape
# --- 2) bin into tol-sized buckets ---
binned = (img // tol) * tol # still uint8
# --- 3) painted region mask (non-black) ---
painted = np.any(img != 0, axis=2) # H×W bool
# --- snap swatch colors into same buckets ---
snapped = np.array([
((np.array(c)//tol)*tol).astype(np.uint8)
for c in swatch_colors
]) # C×3
claimed = np.zeros((H, W), dtype=bool)
masks = []
# --- 4) first-pass exact matches ---
for s in snapped:
m = (
(binned[:,:,0] == s[0]) &
(binned[:,:,1] == s[1]) &
(binned[:,:,2] == s[2])
)
m &= ~claimed
masks.append(torch.from_numpy(m))
claimed |= m
# --- 5) fill-in only within painted & unclaimed pixels ---
miss = painted & (~claimed)
if miss.any():
flat = binned.reshape(-1,3).astype(int) # (H*W)×3
flat_miss = miss.reshape(-1) # (H*W,)
# squared RGB distances to each swatch: → (H*W)×C
d2 = np.sum((flat[:,None,:] - snapped[None,:,:])**2, axis=2)
nearest = np.argmin(d2, axis=1) # (H*W,)
for i in range(len(masks)):
assign = (flat_miss & (nearest == i)).reshape(H, W)
masks[i] = masks[i] | torch.from_numpy(assign)
return masks
import numpy as np
import torch
from typing import List
from collections import deque
def _remove_small_components(
mask: np.ndarray,
rel_thresh: float = 0.01
) -> np.ndarray:
"""
Remove connected components smaller than rel_thresh * max_component_size.
4-connectivity.
"""
H, W = mask.shape
visited = np.zeros_like(mask, bool)
comps = [] # list of (size, pixels_list)
# 1) find all components
for y in range(H):
for x in range(W):
if mask[y,x] and not visited[y,x]:
q = deque([(y,x)])
visited[y,x] = True
pix = [(y,x)]
while q:
cy,cx = q.popleft()
for dy,dx in ((1,0),(-1,0),(0,1),(0,-1)):
ny,nx = cy+dy, cx+dx
if 0<=ny= min_size:
for (y,x) in pix:
out[y,x] = True
return out
def cleanup_and_fill_masks(
masks: List[torch.Tensor],
rel_thresh: float = 0.01
) -> List[torch.Tensor]:
"""
1) Remove any component < rel_thresh * (largest component) per mask
2) Then re-assign any freed pixels to nearest-swatches by neighbor-count
"""
# stack into C×H×W
np_masks = np.stack([m.cpu().numpy() for m in masks], axis=0)
C, H, W = np_masks.shape
# 1) component pruning
for c in range(C):
np_masks[c] = _remove_small_components(np_masks[c], rel_thresh)
# 2) figure out what’s still unclaimed
claimed = np_masks.any(axis=0) # H×W
# 3) build neighbor‐counts to know who's closest
# (reuse the same 8-neighbor idea to bias to the largest local region)
shifts = [(1,0),(-1,0),(0,1),(0,-1),(1,1),(1,-1),(-1,1),(-1,-1)]
neighbor_counts = np.zeros_like(np_masks, int)
for dy,dx in shifts:
neighbor_counts += np.roll(np.roll(np_masks, dy, axis=1), dx, axis=2)
# 4) for every pixel still unclaimed, pick the mask with the highest neighbor count
miss = ~claimed
if miss.any():
# which mask “wins” that pixel?
winner = np.argmax(neighbor_counts, axis=0) # H×W
for c in range(C):
assign = (miss & (winner == c))
np_masks[c][assign] = True
# back to torch
cleaned = [torch.from_numpy(np_masks[c]) for c in range(C)]
return cleaned
import os
import folder_paths
class MaskSketch:
@classmethod
def INPUT_TYPES(s):
input_dir = folder_paths.get_input_directory()
files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]
return {"required":
{"image": (sorted(files), {"image_upload": True})},
}
CATEGORY = "image"
RETURN_TYPES = ("IMAGE", "MASK")
FUNCTION = "load_image"
def load_image(self, image):
width, height = 512, 512 # or whatever size you prefer
# White image: RGB values all set to 1.0
white_image = torch.ones((1, height, width, 3), dtype=torch.float32)
# White mask: all ones (or zeros if you're using inverse alpha)
white_mask = torch.zeros((1, height, width), dtype=torch.float32)
return (white_image, white_mask)
def load_image_orig(self, image):
image_path = folder_paths.get_annotated_filepath(image)
img = node_helpers.pillow(Image.open, image_path)
output_images = []
output_masks = []
w, h = None, None
excluded_formats = ['MPO']
for i in ImageSequence.Iterator(img):
i = node_helpers.pillow(ImageOps.exif_transpose, i)
if i.mode == 'I':
i = i.point(lambda i: i * (1 / 255))
image = i.convert("RGB")
if len(output_images) == 0:
w = image.size[0]
h = image.size[1]
if image.size[0] != w or image.size[1] != h:
continue
image = np.array(image).astype(np.float32) / 255.0
image = torch.from_numpy(image)[None,]
if 'A' in i.getbands():
mask = np.array(i.getchannel('A')).astype(np.float32) / 255.0
mask = 1. - torch.from_numpy(mask)
else:
mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
output_images.append(image)
output_masks.append(mask.unsqueeze(0))
if len(output_images) > 1 and img.format not in excluded_formats:
output_image = torch.cat(output_images, dim=0)
output_mask = torch.cat(output_masks, dim=0)
else:
output_image = output_images[0]
output_mask = output_masks[0]
return (output_image, output_mask)
@classmethod
def IS_CHANGED(s, image):
image_path = folder_paths.get_annotated_filepath(image)
m = hashlib.sha256()
with open(image_path, 'rb') as f:
m.update(f.read())
return m.digest().hex()
@classmethod
def VALIDATE_INPUTS(s, image):
if not folder_paths.exists_annotated_filepath(image):
return "Invalid image file: {}".format(image)
return True
# based on https://github.com/cubiq/ComfyUI_essentials/blob/main/mask.py
import math
import torch
import torch.nn.functional as F
import torchvision.transforms.v2 as T
import numpy as np
from scipy.ndimage import distance_transform_edt
class MaskBoundingBoxAspectRatio:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"padding": ("INT", { "default": 0, "min": 0, "max": 4096, "step": 1 }),
"blur": ("INT", { "default": 0, "min": 0, "max": 256, "step": 1 }),
"aspect_ratio": ("FLOAT", { "default": 1.0, "min": 0.01,"max": 10.0, "step": 0.01 }),
"transpose": ("BOOLEAN",{"default": False}),
},
"optional": {
"image": ("IMAGE",),
"mask": ("MASK",),
},
}
RETURN_TYPES = ("IMAGE","MASK","MASK","INT","INT","INT","INT")
RETURN_NAMES = ("image","mask","mask_blurred","x","y","width","height")
FUNCTION = "execute"
CATEGORY = "essentials/mask"
def execute(self, mask, padding, blur, aspect_ratio, transpose, image=None):
if mask.dim() == 2:
mask = mask.unsqueeze(0)
B, H, W = mask.shape
hard = mask.clone()
# build outward-only “blurred” mask via distance transform
if blur > 0:
m_bool = hard[0].cpu().numpy().astype(bool)
d_out = distance_transform_edt(~m_bool)
d_in = distance_transform_edt( m_bool)
alpha = np.zeros_like(d_out, np.float32)
alpha[d_in>0] = 1.0
ramp = np.clip(1.0 - (d_out / blur), 0.0, 1.0)
alpha[d_out>0] = ramp[d_out>0]
mask_blur_full = torch.from_numpy(alpha)[None,...].to(hard.device)
else:
mask_blur_full = hard.clone()
# calc tight bbox + padding on the "hard" mask
ys, xs = torch.where(hard[0] > 0)
x1 = max(0, int(xs.min()) - padding)
x2 = min(W, int(xs.max()) + 1 + padding)
y1 = max(0, int(ys.min()) - padding)
y2 = min(H, int(ys.max()) + 1 + padding)
w0 = x2 - x1
h0 = y2 - y1
if image is None:
img_full = hard.unsqueeze(-1).repeat(1,1,1,3).to(torch.float32)
else:
img_full = image
if img_full.shape[1:3] != (H, W):
img_full = comfy.utils.common_upscale(
img_full.permute(0,3,1,2),
W, H, upscale_method="bicubic", crop="center"
).permute(0,2,3,1)
ar = aspect_ratio
req_w = math.ceil(h0 * ar) # how wide we'd need to be to hit AR at h0
req_h = math.floor(w0 / ar) # how tall we'd need to be to hit AR at w0
new_x1, new_x2 = x1, x2
new_y1, new_y2 = y1, y2
flush_left = (x1 == 0)
flush_right = (x2 == W)
flush_top = (y1 == 0)
flush_bot = (y2 == H)
if not transpose:
if req_w > w0: # widen?
target_w = min(W, req_w)
delta = target_w - w0
if flush_right:
new_x1, new_x2 = W - target_w, W
elif flush_left:
new_x1, new_x2 = 0, target_w
else:
off = delta // 2
new_x1 = max(0, x1 - off)
new_x2 = new_x1 + target_w
if new_x2 > W:
new_x2 = W
new_x1 = W - target_w
elif req_h > h0: # vertical bloater?
target_h = min(H, req_h)
delta = target_h - h0
if flush_bot:
new_y1, new_y2 = H - target_h, H
elif flush_top:
new_y1, new_y2 = 0, target_h
else:
off = delta // 2
new_y1 = max(0, y1 - off)
new_y2 = new_y1 + target_h
if new_y2 > H:
new_y2 = H
new_y1 = H - target_h
else:
if req_h > h0:
target_h = min(H, req_h)
delta = target_h - h0
if flush_bot:
new_y1, new_y2 = H - target_h, H
elif flush_top:
new_y1, new_y2 = 0, target_h
else:
off = delta // 2
new_y1 = max(0, y1 - off)
new_y2 = new_y1 + target_h
if new_y2 > H:
new_y2 = H
new_y1 = H - target_h
elif req_w > w0:
target_w = min(W, req_w)
delta = target_w - w0
if flush_right:
new_x1, new_x2 = W - target_w, W
elif flush_left:
new_x1, new_x2 = 0, target_w
else:
off = delta // 2
new_x1 = max(0, x1 - off)
new_x2 = new_x1 + target_w
if new_x2 > W:
new_x2 = W
new_x1 = W - target_w
final_w = new_x2 - new_x1
final_h = new_y2 - new_y1
# done... crop image & masks
img_crop = img_full[:, new_y1:new_y2, new_x1:new_x2, :]
mask_crop = hard[:, new_y1:new_y2, new_x1:new_x2 ]
mask_blurred = mask_blur_full[:, new_y1:new_y2, new_x1:new_x2]
return (
img_crop,
mask_crop,
mask_blurred,
new_x1,
new_y1,
final_w,
final_h,
)
================================================
FILE: latent_images.py
================================================
import comfy.samplers
import comfy.sample
import comfy.sampler_helpers
import comfy.utils
import itertools
import torch
import math
import re
from .beta.noise_classes import *
def initialize_or_scale(tensor, value, steps):
if tensor is None:
return torch.full((steps,), value)
else:
return value * tensor
def latent_normalize_channels(x):
mean = x.mean(dim=(-2, -1), keepdim=True)
std = x.std (dim=(-2, -1), keepdim=True)
return (x - mean) / std
def latent_stdize_channels(x):
std = x.std (dim=(-2, -1), keepdim=True)
return x / std
def latent_meancenter_channels(x):
mean = x.mean(dim=(-2, -1), keepdim=True)
return x - mean
class latent_channelwise_match:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model": ("MODEL",),
"latent_target": ("LATENT", ),
"latent_source": ("LATENT", ),
},
"optional": {
"mask_target": ("MASK", ),
"mask_source": ("MASK", ),
"extra_options": ("STRING", {"default": "", "multiline": True}),
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent_matched",)
CATEGORY = "RES4LYF/latents"
FUNCTION = "main"
def main(self, model, latent_target, mask_target, latent_source, mask_source, extra_options):
dtype = latent_target['samples'].dtype
exclude_channels_match = re.search(r"exclude_channels=([\d,]+)", extra_options)
exclude_channels = []
if exclude_channels_match:
exclude_channels = [int(ch.strip()) for ch in exclude_channels_match.group(1).split(",")]
if re.search(r"\bdisable_process_latent\b", extra_options):
x_target = latent_target['samples'].clone()
x_source = latent_source['samples'].clone()
else:
#x_target = model.inner_model.inner_model.process_latent_in(latent_target['samples']).clone()
#x_source = model.inner_model.inner_model.process_latent_in(latent_source['samples']).clone()
x_target = model.model.process_latent_in(latent_target['samples']).clone().to(torch.float64)
x_source = model.model.process_latent_in(latent_source['samples']).clone().to(torch.float64)
if mask_target is None:
mask_target = torch.ones_like(x_target)
else:
mask_target = mask_target.unsqueeze(1)
mask_target = mask_target.repeat(1, x_target.shape[1], 1, 1)
mask_target = F.interpolate(mask_target, size=(x_target.shape[2], x_target.shape[3]), mode='bilinear', align_corners=False)
mask_target = mask_target.to(x_target.dtype).to(x_target.device)
if mask_source is None:
mask_source = torch.ones_like(x_target)
else:
mask_source = mask_source.unsqueeze(1)
mask_source = mask_source.repeat(1, x_target.shape[1], 1, 1)
mask_source = F.interpolate(mask_source, size=(x_target.shape[2], x_target.shape[3]), mode='bilinear', align_corners=False)
mask_source = mask_source.to(x_target.dtype).to(x_target.device)
x_target_masked = x_target * ((mask_target==1)*mask_target)
x_target_masked_inv = x_target - x_target_masked
#x_source_masked = x_source * ((mask_source==1)*mask_source)
x_matched = torch.zeros_like(x_target)
for n in range(x_matched.shape[1]):
if n in exclude_channels:
x_matched[0][n] = x_target[0][n]
continue
x_target_masked_values = x_target[0][n][mask_target[0][n] == 1]
x_source_masked_values = x_source[0][n][mask_source[0][n] == 1]
x_target_masked_values_mean = x_target_masked_values.mean()
x_target_masked_values_std = x_target_masked_values.std()
x_target_masked_source_mean = x_source_masked_values.mean()
x_target_masked_source_std = x_source_masked_values.std()
x_target_mean = x_target.mean()
x_target_std = x_target.std()
x_source_mean = x_source.mean()
x_source_std = x_source.std()
if re.search(r"\benable_std\b", extra_options) == None:
x_target_std = x_target_masked_values_std = x_target_masked_source_std = 1
if re.search(r"\bdisable_mean\b", extra_options):
x_target_mean = x_target_masked_values_mean = x_target_masked_source_mean = 1
if re.search(r"\bdisable_masks\b", extra_options):
x_matched[0][n] = (x_target[0][n] - x_target_mean) / x_target_std
x_matched[0][n] = (x_matched[0][n] * x_source_std) + x_source_mean
else:
x_matched[0][n] = (x_target_masked[0][n] - x_target_masked_values_mean) / x_target_masked_values_std
x_matched[0][n] = (x_matched[0][n] * x_target_masked_source_std) + x_target_masked_source_mean
x_matched[0][n] = x_target_masked_inv[0][n] + x_matched[0][n] * ((mask_target[0][n]==1)*mask_target[0][n])
if re.search(r"\bdisable_process_latent\b", extra_options) == None:
x_matched = model.model.process_latent_out(x_matched).clone()
return ({"samples": x_matched.to(dtype)}, )
================================================
FILE: latents.py
================================================
import torch
import torch.nn.functional as F
from typing import Tuple, List, Union
import math
# TENSOR PROJECTION OPS
def get_cosine_similarity_manual(a, b):
return (a * b).sum() / (torch.norm(a) * torch.norm(b))
def get_cosine_similarity(a, b, mask=None, dim=0):
if a.ndim == 5 and b.ndim == 5 and b.shape[2] == 1:
b = b.expand(-1, -1, a.shape[2], -1, -1)
if mask is not None:
return F.cosine_similarity((mask * a).flatten(), (mask * b).flatten(), dim=dim)
else:
return F.cosine_similarity(a.flatten(), b.flatten(), dim=dim)
def get_pearson_similarity(a, b, mask=None, dim=0, norm_dim=None):
if a.ndim == 5 and b.ndim == 5 and b.shape[2] == 1:
b = b.expand(-1, -1, a.shape[2], -1, -1)
if norm_dim is None:
if a.ndim == 4:
norm_dim=(-2,-1)
elif a.ndim == 5:
norm_dim=(-4,-2,-1)
a = a - a.mean(dim=norm_dim, keepdim=True)
b = b - b.mean(dim=norm_dim, keepdim=True)
if mask is not None:
return F.cosine_similarity((mask * a).flatten(), (mask * b).flatten(), dim=dim)
else:
return F.cosine_similarity(a.flatten(), b.flatten(), dim=dim)
def get_collinear(x, y):
return get_collinear_flat(x, y).reshape_as(x)
def get_orthogonal(x, y):
x_flat = x.reshape(x.size(0), -1).clone()
x_ortho_y = x_flat - get_collinear_flat(x, y)
return x_ortho_y.view_as(x)
def get_collinear_flat(x, y):
y_flat = y.reshape(y.size(0), -1).clone()
x_flat = x.reshape(x.size(0), -1).clone()
y_flat /= y_flat.norm(dim=-1, keepdim=True)
x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat
return x_proj_y
def get_orthogonal_noise_from_channelwise(*refs, max_iter=500, max_score=1e-15):
noise, *refs = refs
noise_tmp = noise.clone()
#b,c,h,w = noise.shape
if (noise.ndim == 4):
b,ch,h,w = noise.shape
elif (noise.ndim == 5):
b,ch,t,h,w = noise.shape
for i in range(max_iter):
noise_tmp = gram_schmidt_channels_optimized(noise_tmp, *refs)
cossim_scores = []
for ref in refs:
#for c in range(noise.shape[-3]):
for c in range(ch):
cossim_scores.append(get_cosine_similarity(noise_tmp[0][c], ref[0][c]).abs())
cossim_scores.append(get_cosine_similarity(noise_tmp[0], ref[0]).abs())
if max(cossim_scores) < max_score:
break
return noise_tmp
def gram_schmidt_channels_optimized(A, *refs):
if (A.ndim == 4):
b,c,h,w = A.shape
elif (A.ndim == 5):
b,c,t,h,w = A.shape
A_flat = A.view(b, c, -1)
for ref in refs:
ref_flat = ref.view(b, c, -1).clone()
ref_flat /= ref_flat.norm(dim=-1, keepdim=True)
proj_coeff = torch.sum(A_flat * ref_flat, dim=-1, keepdim=True)
projection = proj_coeff * ref_flat
A_flat -= projection
return A_flat.view_as(A)
# Efficient implementation equivalent to the following:
def attention_weights(
query,
key,
attn_mask=None
) -> torch.Tensor:
L, S = query.size(-2), key.size(-2)
scale_factor = 1 / math.sqrt(query.size(-1))
attn_bias = torch.zeros(L, S, dtype=query.dtype).to(query.device)
if attn_mask is not None:
if attn_mask.dtype == torch.bool:
attn_bias.masked_fill_(attn_mask.logical_not(), float("-inf"))
else:
attn_bias += attn_mask
attn_weight = query @ key.transpose(-2, -1) * scale_factor
attn_weight += attn_bias
attn_weight = torch.softmax(attn_weight, dim=-1)
return attn_weight
def attention_weights_orig(q, k):
# implementation of in-place softmax to reduce memory req
scores = torch.matmul(q, k.transpose(-2, -1))
scores.div_(math.sqrt(q.size(-1)))
torch.exp(scores, out=scores)
summed = torch.sum(scores, dim=-1, keepdim=True)
scores /= summed
return scores.nan_to_num_(0.0, 65504., -65504.)
# calculate slerp ratio needed to hit a target cosine similarity score
def get_slerp_weight_for_cossim(cos_sim, target_cos):
# assumes unit vector matrices used for cossim
import math
c = cos_sim
T = target_cos
K = 1 - c
A = K**2 - 2 * T**2 * K
B = 2 * (1 - c) * (c + T**2)
C = c**2 - T**2
if abs(A) < 1e-8: # nearly collinear
return 0.5 # just mix 50:50
disc = B**2 - 4*A*C
if disc < 0:
return None # no valid solution... blow up somewhere to get user's attention
sqrt_disc = math.sqrt(disc)
w1 = (-B + sqrt_disc) / (2 * A)
w2 = (-B - sqrt_disc) / (2 * A)
candidates = [w for w in [w1, w2] if 0 <= w <= 1]
if candidates:
return candidates[0]
else:
return max(0.0, min(1.0, w1))
def get_slerp_ratio(cos_sim_A, cos_sim_B, target_cos):
import math
alpha = math.acos(cos_sim_A)
beta = math.acos(cos_sim_B)
delta = math.acos(target_cos)
if abs(beta - alpha) < 1e-6:
return 0.5
t = (delta - alpha) / (beta - alpha)
t = max(0.0, min(1.0, t))
return t
def find_slerp_ratio_grid(A: torch.Tensor, B: torch.Tensor, D: torch.Tensor, E: torch.Tensor,
target_ratio: float = 1.0, num_samples: int = 100) -> float:
"""
Finds the interpolation parameter t (in [0,1]) for which:
f(t) = cos(slerp(t, A, B), D) - target_ratio * cos(slerp(t, A, B), E)
is minimized in absolute value.
Instead of requiring a sign change for bisection, we sample t values uniformly and pick the one that minimizes |f(t)|.
"""
ts = torch.linspace(0.0, 1.0, steps=num_samples, device=A.device, dtype=A.dtype)
best_t = 0.0
best_val = float('inf')
for t_val in ts:
t_tensor = torch.tensor(t_val, dtype=A.dtype, device=A.device)
C = slerp_tensor(t_tensor, A, B)
diff = get_pearson_similarity(C, D) - target_ratio * get_pearson_similarity(C, E)
if abs(diff) < best_val:
best_val = abs(diff)
best_t = t_val
return best_t
def compute_slerp_ratio_for_target(A: torch.Tensor, B: torch.Tensor, D: torch.Tensor, target: float) -> float:
"""
Given three unit vectors A, B, and D (all assumed to be coplanar)
and a target cosine similarity (target) for the slerp result C with D,
compute the interpolation parameter t such that:
C = slerp(t, A, B)
and cos(C, D) ≈ target.
Args:
A: Tensor of shape (D,), starting vector.
B: Tensor of shape (D,), ending vector.
D: Tensor of shape (D,), the reference vector.
target: Desired cosine similarity between C and D.
Returns:
t: A float between 0 and 1.
"""
A = A / (A.norm() + 1e-8)
B = B / (B.norm() + 1e-8)
D = D / (D.norm() + 1e-8)
alpha = math.acos(max(-1.0, min(1.0, float(torch.dot(D, A))))) # angel between D and A
beta = math.acos(max(-1.0, min(1.0, float(torch.dot(D, B))))) # angle between D and B
delta = math.acos(max(-1.0, min(1.0, target))) # target cosine similarity... angle etc...
if abs(beta - alpha) < 1e-6:
return 0.5
t = (delta - alpha) / (beta - alpha)
t = max(0.0, min(1.0, t))
return t
# TENSOR NORMALIZATION OPS
def normalize_zscore(x, channelwise=False, inplace=False):
if inplace:
if channelwise:
return x.sub_(x.mean(dim=(-2,-1), keepdim=True)).div_(x.std(dim=(-2,-1), keepdim=True))
else:
return x.sub_(x.mean()).div_(x.std())
else:
if channelwise:
return (x - x.mean(dim=(-2,-1), keepdim=True) / x.std(dim=(-2,-1), keepdim=True))
else:
return (x - x.mean()) / x.std()
def latent_normalize_channels(x):
mean = x.mean(dim=(-2, -1), keepdim=True)
std = x.std (dim=(-2, -1), keepdim=True)
return (x - mean) / std
def latent_stdize_channels(x):
std = x.std (dim=(-2, -1), keepdim=True)
return x / std
def latent_meancenter_channels(x):
mean = x.mean(dim=(-2, -1), keepdim=True)
return x - mean
# TENSOR INTERPOLATION OPS
def lagrange_interpolation(x_values, y_values, x_new):
if not isinstance(x_values, torch.Tensor):
x_values = torch.tensor(x_values, dtype=torch.get_default_dtype())
if x_values.ndim != 1:
raise ValueError("x_values must be a 1D tensor or a list of scalars.")
if not isinstance(x_new, torch.Tensor):
x_new = torch.tensor(x_new, dtype=x_values.dtype, device=x_values.device)
if x_new.ndim == 0:
x_new = x_new.unsqueeze(0)
if isinstance(y_values, list):
y_values = torch.stack(y_values, dim=0)
if y_values.ndim < 1:
raise ValueError("y_values must have at least one dimension (the sample dimension).")
n = x_values.shape[0]
if y_values.shape[0] != n:
raise ValueError(f"Mismatch: x_values has length {n} but y_values has {y_values.shape[0]} samples.")
m = x_new.shape[0]
result_shape = (m,) + y_values.shape[1:]
result = torch.zeros(result_shape, dtype=y_values.dtype, device=y_values.device)
for i in range(n):
Li = torch.ones_like(x_new, dtype=y_values.dtype, device=y_values.device)
xi = x_values[i]
for j in range(n):
if i == j:
continue
xj = x_values[j]
Li = Li * ((x_new - xj) / (xi - xj))
extra_dims = (1,) * (y_values.ndim - 1)
Li = Li.view(m, *extra_dims)
result = result + Li * y_values[i]
return result
def line_intersection(a: torch.Tensor, d1: torch.Tensor, b: torch.Tensor, d2: torch.Tensor, eps=1e-8) -> torch.Tensor:
"""
Computes the intersection (or closest point average) of two lines in R^D.
The first line is defined by: L1: x = a + t * d1
The second line is defined by: L2: x = b + s * d2
If the lines do not exactly intersect, this function returns the average of the closest points.
a, d1, b, d2: Tensors of shape (D,) or with an extra batch dimension (B, D).
Returns: Tensor of shape (D,) or (B, D) representing the intersection (or midpoint of closest approach).
"""
# Compute dot products
d1d1 = (d1 * d1).sum(dim=-1, keepdim=True) # shape (B,1) or (1,)
d2d2 = (d2 * d2).sum(dim=-1, keepdim=True)
d1d2 = (d1 * d2).sum(dim=-1, keepdim=True)
r = b - a # shape (B, D) or (D,)
r_d1 = (r * d1).sum(dim=-1, keepdim=True)
r_d2 = (r * d2).sum(dim=-1, keepdim=True)
# Solve for t and s:
# t * d1d1 - s * d1d2 = r_d1
# t * d1d2 - s * d2d2 = r_d2
# Solve using determinants:
denom = d1d1 * d2d2 - d1d2 * d1d2
# Avoid division by zero
denom = torch.where(denom.abs() < eps, torch.full_like(denom, eps), denom)
t = (r_d1 * d2d2 - r_d2 * d1d2) / denom
s = (r_d1 * d1d2 - r_d2 * d1d1) / denom
point1 = a + t * d1
point2 = b + s * d2
# If they intersect exactly, point1 and point2 are identical.
# Otherwise, return the midpoint of the closest points.
return (point1 + point2) / 2
def slerp_direction(t: float, u0: torch.Tensor, u1: torch.Tensor, DOT_THRESHOLD=0.9995) -> torch.Tensor:
dot = (u0 * u1).sum(-1).clamp(-1.0, 1.0) #u0, u1 are unit vectors... should not be affected by clamp
if dot.item() > DOT_THRESHOLD: # u0, u1 nearly aligned, fallback to lerp
return torch.lerp(u0, u1, t)
theta_0 = torch.acos(dot)
sin_theta_0 = torch.sin(theta_0)
theta_t = theta_0 * t
sin_theta_t = torch.sin(theta_t)
s0 = torch.sin(theta_0 - theta_t) / sin_theta_0
s1 = sin_theta_t / sin_theta_0
return s0 * u0 + s1 * u1
def magnitude_aware_interpolation(t: float, v0: torch.Tensor, v1: torch.Tensor) -> torch.Tensor:
m0 = v0.norm(dim=-1, keepdim=True)
m1 = v1.norm(dim=-1, keepdim=True)
u0 = v0 / (m0 + 1e-8)
u1 = v1 / (m1 + 1e-8)
u = slerp_direction(t, u0, u1)
m = (1 - t) * m0 + t * m1 # tinerpolate magnitudes linearly
return m * u
def slerp_tensor(val: torch.Tensor, low: torch.Tensor, high: torch.Tensor, dim=-3) -> torch.Tensor:
#dim = (2,3)
if low.ndim == 4 and low.shape[-3] > 1:
dim=-3
elif low.ndim == 5 and low.shape[-3] > 1:
dim=-4
elif low.ndim == 2:
dim=(-2,-1)
if type(val) == float:
val = torch.Tensor([val]).expand_as(low).to(low.dtype).to(low.device)
if val.shape != low.shape:
val = val.expand_as(low)
low_norm = low / (torch.norm(low, dim=dim, keepdim=True))
high_norm = high / (torch.norm(high, dim=dim, keepdim=True))
dot = (low_norm * high_norm).sum(dim=dim, keepdim=True).clamp(-1.0, 1.0)
#near = ~(-0.9995 < dot < 0.9995) #dot > 0.9995 or dot < -0.9995
near = dot > 0.9995
opposite = dot < -0.9995
condition = torch.logical_or(near, opposite)
omega = torch.acos(dot)
so = torch.sin(omega)
if val.ndim < low.ndim:
val = val.unsqueeze(dim)
factor_low = torch.sin((1 - val) * omega) / so
factor_high = torch.sin(val * omega) / so
res = factor_low * low + factor_high * high
res = torch.where(condition, low * (1 - val) + high * val, res)
return res
# pytorch slerp implementation from https://gist.github.com/Birch-san/230ac46f99ec411ed5907b0a3d728efa
from torch import FloatTensor, LongTensor, Tensor, Size, lerp, zeros_like
from torch.linalg import norm
# adapted to PyTorch from:
# https://gist.github.com/dvschultz/3af50c40df002da3b751efab1daddf2c
# most of the extra complexity is to support:
# - many-dimensional vectors
# - v0 or v1 with last dim all zeroes, or v0 ~colinear with v1
# - falls back to lerp()
# - conditional logic implemented with parallelism rather than Python loops
# - many-dimensional tensor for t
# - you can ask for batches of slerp outputs by making t more-dimensional than the vectors
# - slerp(
# v0: torch.Size([2,3]),
# v1: torch.Size([2,3]),
# t: torch.Size([4,1,1]),
# )
# - this makes it interface-compatible with lerp()
def slerp(v0: FloatTensor, v1: FloatTensor, t: float|FloatTensor, DOT_THRESHOLD=0.9995):
'''
Spherical linear interpolation
Args:
v0: Starting vector
v1: Final vector
t: Float value between 0.0 and 1.0
DOT_THRESHOLD: Threshold for considering the two vectors as
colinear. Not recommended to alter this.
Returns:
Interpolation vector between v0 and v1
'''
assert v0.shape == v1.shape, "shapes of v0 and v1 must match"
# Normalize the vectors to get the directions and angles
v0_norm: FloatTensor = norm(v0, dim=-1)
v1_norm: FloatTensor = norm(v1, dim=-1)
v0_normed: FloatTensor = v0 / v0_norm.unsqueeze(-1)
v1_normed: FloatTensor = v1 / v1_norm.unsqueeze(-1)
# Dot product with the normalized vectors
dot: FloatTensor = (v0_normed * v1_normed).sum(-1)
dot_mag: FloatTensor = dot.abs()
# if dp is NaN, it's because the v0 or v1 row was filled with 0s
# If absolute value of dot product is almost 1, vectors are ~colinear, so use lerp
gotta_lerp: LongTensor = dot_mag.isnan() | (dot_mag > DOT_THRESHOLD)
can_slerp: LongTensor = ~gotta_lerp
t_batch_dim_count: int = max(0, t.ndim-v0.ndim) if isinstance(t, Tensor) else 0
t_batch_dims: Size = t.shape[:t_batch_dim_count] if isinstance(t, Tensor) else Size([])
out: FloatTensor = zeros_like(v0.expand(*t_batch_dims, *[-1]*v0.ndim))
# if no elements are lerpable, our vectors become 0-dimensional, preventing broadcasting
if gotta_lerp.any():
lerped: FloatTensor = lerp(v0, v1, t)
out: FloatTensor = lerped.where(gotta_lerp.unsqueeze(-1), out)
# if no elements are slerpable, our vectors become 0-dimensional, preventing broadcasting
if can_slerp.any():
# Calculate initial angle between v0 and v1
theta_0: FloatTensor = dot.arccos().unsqueeze(-1)
sin_theta_0: FloatTensor = theta_0.sin()
# Angle at timestep t
theta_t: FloatTensor = theta_0 * t
sin_theta_t: FloatTensor = theta_t.sin()
# Finish the slerp algorithm
s0: FloatTensor = (theta_0 - theta_t).sin() / sin_theta_0
s1: FloatTensor = sin_theta_t / sin_theta_0
slerped: FloatTensor = s0 * v0 + s1 * v1
out: FloatTensor = slerped.where(can_slerp.unsqueeze(-1), out)
return out
# this is silly...
def normalize_latent(target, source=None, mean=True, std=True, set_mean=None, set_std=None, channelwise=True):
target = target.clone()
source = source.clone() if source is not None else None
def normalize_single_latent(single_target, single_source=None):
y = torch.zeros_like(single_target)
for b in range(y.shape[0]):
if channelwise:
for c in range(y.shape[1]):
single_source_mean = single_source[b][c].mean() if set_mean is None else set_mean
single_source_std = single_source[b][c].std() if set_std is None else set_std
if mean and std:
y[b][c] = (single_target[b][c] - single_target[b][c].mean()) / single_target[b][c].std()
if single_source is not None:
y[b][c] = y[b][c] * single_source_std + single_source_mean
elif mean:
y[b][c] = single_target[b][c] - single_target[b][c].mean()
if single_source is not None:
y[b][c] = y[b][c] + single_source_mean
elif std:
y[b][c] = single_target[b][c] / single_target[b][c].std()
if single_source is not None:
y[b][c] = y[b][c] * single_source_std
else:
single_source_mean = single_source[b].mean() if set_mean is None else set_mean
single_source_std = single_source[b].std() if set_std is None else set_std
if mean and std:
y[b] = (single_target[b] - single_target[b].mean()) / single_target[b].std()
if single_source is not None:
y[b] = y[b] * single_source_std + single_source_mean
elif mean:
y[b] = single_target[b] - single_target[b].mean()
if single_source is not None:
y[b] = y[b] + single_source_mean
elif std:
y[b] = single_target[b] / single_target[b].std()
if single_source is not None:
y[b] = y[b] * single_source_std
return y
if isinstance(target, (list, tuple)):
if source is not None:
assert isinstance(source, (list, tuple)) and len(source) == len(target), \
"If target is a list/tuple, source must be a list/tuple of the same length."
return [normalize_single_latent(t, s) for t, s in zip(target, source)]
else:
return [normalize_single_latent(t) for t in target]
else:
return normalize_single_latent(target, source)
def hard_light_blend(base_latent, blend_latent):
if base_latent.sum() == 0 and base_latent.std() == 0:
return base_latent
blend_latent = (blend_latent - blend_latent.min()) / (blend_latent.max() - blend_latent.min())
positive_mask = base_latent >= 0
negative_mask = base_latent < 0
positive_latent = base_latent * positive_mask.float()
negative_latent = base_latent * negative_mask.float()
positive_result = torch.where(blend_latent < 0.5,
2 * positive_latent * blend_latent,
1 - 2 * (1 - positive_latent) * (1 - blend_latent))
negative_result = torch.where(blend_latent < 0.5,
2 * negative_latent.abs() * blend_latent,
1 - 2 * (1 - negative_latent.abs()) * (1 - blend_latent))
negative_result = -negative_result
combined_result = positive_result * positive_mask.float() + negative_result * negative_mask.float()
#combined_result *= base_latent.max()
ks = combined_result
ks2 = torch.zeros_like(base_latent)
for n in range(base_latent.shape[1]):
ks2[0][n] = (ks[0][n]) / ks[0][n].std()
ks2[0][n] = (ks2[0][n] * base_latent[0][n].std())
combined_result = ks2
return combined_result
def make_checkerboard(tile_size: int, num_tiles: int, dtype=torch.float16, device="cpu"):
pattern = torch.tensor([[0, 1], [1, 0]], dtype=dtype, device=device)
board = pattern.repeat(num_tiles // 2 + 1, num_tiles // 2 + 1)[:num_tiles, :num_tiles]
board_expanded = board.repeat_interleave(tile_size, dim=0).repeat_interleave(tile_size, dim=1)
return board_expanded
def get_edge_mask_slug(mask: torch.Tensor, dilation: int = 3) -> torch.Tensor:
mask = mask.float()
eroded = -F.max_pool2d(-mask.unsqueeze(0).unsqueeze(0), kernel_size=3, stride=1, padding=1)
eroded = eroded.squeeze(0).squeeze(0)
edge = mask - eroded
edge = (edge > 0).float()
dilated_edge = F.max_pool2d(edge.unsqueeze(0).unsqueeze(0), kernel_size=dilation, stride=1, padding=dilation//2)
dilated_edge = dilated_edge.squeeze(0).squeeze(0)
return dilated_edge
def get_edge_mask(mask: torch.Tensor, dilation: int = 3) -> torch.Tensor:
if dilation == 0: # safeguard for zero kernel size...
return mask
mask_tmp = mask.squeeze().to('cuda')
mask_tmp = mask_tmp.float()
eroded = -F.max_pool2d(-mask_tmp.unsqueeze(0).unsqueeze(0), kernel_size=3, stride=1, padding=1)
eroded = eroded.squeeze(0).squeeze(0)
edge = mask_tmp - eroded
edge = (edge > 0).float()
dilated_edge = F.max_pool2d(edge.unsqueeze(0).unsqueeze(0), kernel_size=dilation, stride=1, padding=dilation//2)
dilated_edge = dilated_edge.squeeze(0).squeeze(0)
return dilated_edge[...,:mask.shape[-2], :mask.shape[-1]].view_as(mask).to(mask.device)
def checkerboard_variable(widths, dtype=torch.float16, device='cpu'):
total = sum(widths)
mask = torch.zeros((total, total), dtype=dtype, device=device)
x_start = 0
for i, w_x in enumerate(widths):
y_start = 0
for j, w_y in enumerate(widths):
if (i + j) % 2 == 0: # checkerboard logic
mask[x_start:x_start+w_x, y_start:y_start+w_y] = 1.0
y_start += w_y
x_start += w_x
return mask
def interpolate_spd(cov1, cov2, t, eps=1e-5):
"""
Geodesic interpolation on the SPD manifold between cov1 and cov2.
Args:
cov1, cov2: [D×D] symmetric positive-definite covariances (torch.Tensor).
t: interpolation factor in [0,1].
eps: jitter added to diagonal for numerical stability.
Returns:
cov_t: the SPD matrix at fraction t along the geodesic from cov1 to cov2.
"""
cov1 = cov1.double()
cov2 = cov2.double()
M1 = cov1.clone()
M1.diagonal().add_(eps)
M2 = cov2.clone()
M2.diagonal().add_(eps)
S1, U1 = torch.linalg.eigh(M1)
S1_clamped = S1.clamp(min=eps)
inv_sqrt_S1 = S1_clamped.rsqrt()
M1_inv_sqrt = U1 @ torch.diag(inv_sqrt_S1) @ U1.T
middle = M1_inv_sqrt @ M2 @ M1_inv_sqrt
Sm, Um = torch.linalg.eigh(middle)
Sm_clamped = Sm.clamp(min=eps)
Sm_t = Sm_clamped.pow(t)
middle_t = Um @ torch.diag(Sm_t) @ Um.T
sqrt_S1 = S1_clamped.sqrt()
M1_sqrt = U1 @ torch.diag(sqrt_S1) @ U1.T
cov_t = M1_sqrt @ middle_t @ M1_sqrt
return cov_t.to(cov1.dtype)
def tile_latent(latent: torch.Tensor,
tile_size: Tuple[int,int]
) -> Tuple[torch.Tensor,
Tuple[int,...],
Tuple[int,int],
Tuple[List[int],List[int]]]:
"""
Split `latent` into spatial tiles of shape (t_h, t_w).
Works on either:
- 4D [B,C,H,W]
- 5D [B,C,T,H,W]
Returns:
tiles: [B*rows*cols, C, (T,), t_h, t_w]
orig_shape: the full shape of `latent`
tile_hw: (t_h, t_w)
positions: (pos_h, pos_w) lists of start y and x positions
"""
*lead, H, W = latent.shape
B, C = lead[0], lead[1]
has_time = (latent.ndim == 5)
if has_time:
T = lead[2]
t_h, t_w = tile_size
rows = (H + t_h - 1) // t_h
cols = (W + t_w - 1) // t_w
if rows == 1:
pos_h = [0]
else:
pos_h = [round(i*(H - t_h)/(rows-1)) for i in range(rows)]
if cols == 1:
pos_w = [0]
else:
pos_w = [round(j*(W - t_w)/(cols-1)) for j in range(cols)]
tiles = []
for y in pos_h:
for x in pos_w:
if has_time:
tile = latent[:, :, :, y:y+t_h, x:x+t_w]
else:
tile = latent[:, :, y:y+t_h, x:x+t_w]
tiles.append(tile)
tiles = torch.cat(tiles, dim=0)
orig_shape = tuple(latent.shape)
return tiles, orig_shape, (t_h, t_w), (pos_h, pos_w)
def untile_latent(tiles: torch.Tensor,
orig_shape: Tuple[int,...],
tile_hw: Tuple[int,int],
positions: Tuple[List[int],List[int]]
) -> torch.Tensor:
"""
Reconstruct latent from tiles + their start positions.
Works on either 4D or 5D original.
Args:
tiles: [B*rows*cols, C, (T,), t_h, t_w]
orig_shape: shape of original latent (B,C,H,W) or (B,C,T,H,W)
tile_hw: (t_h, t_w)
positions: (pos_h, pos_w)
Returns:
reconstructed latent of shape `orig_shape`
"""
*lead, H, W = orig_shape
B, C = lead[0], lead[1]
has_time = (len(orig_shape) == 5)
if has_time:
T = lead[2]
t_h, t_w = tile_hw
pos_h, pos_w = positions
rows, cols = len(pos_h), len(pos_w)
if has_time:
out = torch.zeros(B, C, T, H, W, device=tiles.device, dtype=tiles.dtype)
count = torch.zeros_like(out)
tiles = tiles.view(B, rows, cols, C, T, t_h, t_w)
for bi in range(B):
for i, y in enumerate(pos_h):
for j, x in enumerate(pos_w):
tile = tiles[bi, i, j]
out[bi, :, :, y:y+t_h, x:x+t_w] += tile
count[bi, :, :, y:y+t_h, x:x+t_w] += 1
else:
out = torch.zeros(B, C, H, W, device=tiles.device, dtype=tiles.dtype)
count = torch.zeros_like(out)
tiles = tiles.view(B, rows, cols, C, t_h, t_w)
for bi in range(B):
for i, y in enumerate(pos_h):
for j, x in enumerate(pos_w):
tile = tiles[bi, i, j]
out[bi, :, y:y+t_h, x:x+t_w] += tile
count[bi, :, y:y+t_h, x:x+t_w] += 1
valid = count > 0
out[valid] = out[valid] / count[valid]
return out
def upscale_to_match_spatial(tensor_5d, ref_4d, mode='bicubic'):
"""
Upscales a 5D tensor [B, C, T, H1, W1] to match the spatial size of a 4D tensor [1, C, H2, W2].
Args:
tensor_5d: Tensor of shape [B, C, T, H1, W1]
ref_4d: Tensor of shape [1, C, H2, W2] — used as spatial reference
mode: Interpolation mode ('bilinear' or 'bicubic')
Returns:
Resized tensor of shape [B, C, T, H2, W2]
"""
b, c, t, _, _ = tensor_5d.shape
_, _, h_target, w_target = ref_4d.shape
tensor_reshaped = tensor_5d.reshape(b * c, t, tensor_5d.shape[-2], tensor_5d.shape[-1])
upscaled = F.interpolate(tensor_reshaped, size=(h_target, w_target), mode=mode, align_corners=False)
return upscaled.view(b, c, t, h_target, w_target)
def gaussian_blur_2d(img: torch.Tensor, sigma: float, kernel_size: int = None) -> torch.Tensor:
B, C, H, W = img.shape
dtype = img.dtype
device = img.device
if kernel_size is None:
kernel_size = int(2 * math.ceil(3 * sigma) + 1)
if kernel_size % 2 == 0:
kernel_size += 1
coords = torch.arange(kernel_size, dtype=torch.float64) - kernel_size // 2
g = torch.exp(-0.5 * (coords / sigma) ** 2)
g = g / g.sum()
kernel_2d = g[:, None] * g[None, :]
kernel_2d = kernel_2d.to(dtype=dtype, device=device)
kernel = kernel_2d.expand(C, 1, kernel_size, kernel_size)
pad = kernel_size // 2
img_padded = F.pad(img, (pad, pad, pad, pad), mode='reflect')
return F.conv2d(img_padded, kernel, groups=C)
def median_blur_2d(img: torch.Tensor, kernel_size: int = 3) -> torch.Tensor:
if kernel_size % 2 == 0:
kernel_size += 1
pad = kernel_size // 2
B, C, H, W = img.shape
img_padded = F.pad(img, (pad, pad, pad, pad), mode='reflect')
unfolded = img_padded.unfold(2, kernel_size, 1).unfold(3, kernel_size, 1)
# unfolded: [B, C, H, W, kH, kW] → flatten to patches
patches = unfolded.contiguous().view(B, C, H, W, -1)
median = patches.median(dim=-1).values
return median
def apply_to_state_info_tensors(obj, ref_shape, modify_func, *args, **kwargs):
"""
Recursively traverse obj and apply modify_func to tensors whose last 5 dimensions
match ref_shape's last 5 dimensions.
Used to apply function to all relevant tensors in latent state_info.
Args:
obj: The object to traverse (dict, list, tuple, tensor, etc.)
ref_shape: Reference tensor shape to match against
modify_func: Function to apply to matching tensors. Should accept (tensor, *args, **kwargs)
*args, **kwargs: Additional arguments passed to modify_func
Returns:
Modified structure with applicable tensors transformed
"""
import torch
if isinstance(obj, torch.Tensor):
if obj.ndim >= 5:
# Check if last 5 dims match reference
obj_last5 = obj.shape[-5:]
ref_last5 = ref_shape[-5:] if len(ref_shape) >= 5 else ref_shape
if obj_last5 == ref_last5:
return modify_func(obj, *args, **kwargs)
return obj
if isinstance(obj, dict):
changed = False
out = {}
for k, v in obj.items():
nv = apply_to_state_info_tensors(v, ref_shape, modify_func, *args, **kwargs)
changed |= (nv is not v)
out[k] = nv
return out if changed else obj
if isinstance(obj, list):
changed = False
out = []
for v in obj:
nv = apply_to_state_info_tensors(v, ref_shape, modify_func, *args, **kwargs)
changed |= (nv is not v)
out.append(nv)
return out if changed else obj
if isinstance(obj, tuple):
new_t = tuple(apply_to_state_info_tensors(v, ref_shape, modify_func, *args, **kwargs) for v in obj)
if all(ov is nv for ov, nv in zip(obj, new_t)):
return obj
return new_t
return obj
================================================
FILE: legacy/__init__.py
================================================
from . import legacy_samplers
from . import legacy_sampler_rk
from . import rk_sampler
from . import samplers
from . import samplers_extensions
from . import samplers_tiled
def add_legacy(NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers):
NODE_CLASS_MAPPINGS.update({
"Legacy_ClownSampler" : legacy_samplers.Legacy_SamplerRK,
"Legacy_SharkSampler" : legacy_samplers.Legacy_SharkSampler,
"Legacy_ClownsharKSampler" : legacy_samplers.Legacy_ClownsharKSampler,
"Legacy_ClownsharKSamplerGuides" : legacy_samplers.Legacy_ClownsharKSamplerGuides,
"ClownSampler" : samplers.ClownSampler,
"ClownSamplerAdvanced" : samplers.ClownSamplerAdvanced,
"ClownsharKSampler" : samplers.ClownsharKSampler,
"ClownsharKSamplerGuides" : samplers_extensions.ClownsharKSamplerGuides,
"ClownsharKSamplerGuide" : samplers_extensions.ClownsharKSamplerGuide,
"ClownOptions_SDE_Noise" : samplers_extensions.ClownOptions_SDE_Noise,
"ClownOptions_FrameWeights" : samplers_extensions.ClownOptions_FrameWeights,
"ClownInpaint" : samplers_extensions.ClownInpaint,
"ClownInpaintSimple" : samplers_extensions.ClownInpaintSimple,
"ClownsharKSamplerOptions" : samplers_extensions.ClownsharKSamplerOptions,
"ClownsharKSamplerAutomation" : samplers_extensions.ClownsharKSamplerAutomation,
"ClownsharKSamplerAutomation_Advanced": samplers_extensions.ClownsharKSamplerAutomation_Advanced,
"SamplerOptions_TimestepScaling" : samplers_extensions.SamplerOptions_TimestepScaling,
"SamplerOptions_GarbageCollection" : samplers_extensions.SamplerOptions_GarbageCollection,
"UltraSharkSampler" : samplers.UltraSharkSampler,
"UltraSharkSampler Tiled" : samplers_tiled.UltraSharkSampler_Tiled,
})
NODE_DISPLAY_NAME_MAPPINGS.update({
"Legacy_SamplerRK" : "Legacy_ClownSampler",
"Legacy_SharkSampler" : "Legacy_SharkSampler",
"Legacy_ClownsharKSampler" : "Legacy_ClownsharKSampler",
"Legacy_ClownsharKSamplerGuides" : "Legacy_ClownsharKSamplerGuides",
"ClownSampler" : "Legacy2_ClownSampler",
"ClownSamplerAdvanced" : "Legacy2_ClownSamplerAdvanced",
"ClownsharKSampler" : "Legacy2_ClownsharKSampler",
"ClownsharKSamplerGuides" : "Legacy2_ClownsharKSamplerGuides",
"ClownsharKSamplerGuide" : "Legacy2_ClownsharKSamplerGuide",
"ClownOptions_SDE_Noise" : "Legacy2_ClownOptions_SDE_Noise",
"ClownOptions_FrameWeights" : "Legacy2_ClownOptions_FrameWeights",
"ClownInpaint" : "Legacy2_ClownInpaint",
"ClownInpaintSimple" : "Legacy2_ClownInpaintSimple",
"ClownsharKSamplerOptions" : "Legacy2_ClownsharKSamplerOptions",
"ClownsharKSamplerAutomation" : "Legacy2_ClownsharKSamplerAutomation",
"ClownsharKSamplerAutomation_Advanced" : "Legacy2_ClownsharKSamplerAutomation_Advanced",
"SamplerOptions_TimestepScaling" : "Legacy2_SamplerOptions_TimestepScaling",
"SamplerOptions_GarbageCollection" : "Legacy2_SamplerOptions_GarbageCollection",
"UltraSharkSampler" : "Legacy2_UltraSharkSampler",
"UltraSharkSampler_Tiled" : "Legacy2_UltraSharkSampler Tiled",
})
extra_samplers.update({
#"res_2m" : rk_sampler.sample_res_2m,
#"res_2s" : rk_sampler.sample_res_2s,
#"res_3s" : rk_sampler.sample_res_3s,
#"res_5s" : rk_sampler.sample_res_5s,
#"res_6s" : rk_sampler.sample_res_6s,
#"res_2m_sde" : rk_sampler.sample_res_2m_sde,
#"res_2s_sde" : rk_sampler.sample_res_2s_sde,
#"res_3s_sde" : rk_sampler.sample_res_3s_sde,
#"res_5s_sde" : rk_sampler.sample_res_5s_sde,
#"res_6s_sde" : rk_sampler.sample_res_6s_sde,
#"deis_2m" : rk_sampler.sample_deis_2m,
#"deis_3m" : rk_sampler.sample_deis_3m,
#"deis_4m" : rk_sampler.sample_deis_4m,
#"deis_2m_sde": rk_sampler.sample_deis_2m_sde,
#"deis_3m_sde": rk_sampler.sample_deis_3m_sde,
#"deis_4m_sde": rk_sampler.sample_deis_4m_sde,
"rk" : rk_sampler.sample_rk,
"legacy_rk" : legacy_sampler_rk.legacy_sample_rk,
})
return NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers
================================================
FILE: legacy/conditioning.py
================================================
import torch
import base64
import pickle # used strictly for serializing conditioning in the ConditioningToBase64 and Base64ToConditioning nodes for API use. (Offloading T5 processing to another machine to avoid model shuffling.)
import comfy.samplers
import comfy.sample
import comfy.sampler_helpers
import node_helpers
import functools
from .noise_classes import precision_tool
from copy import deepcopy
from .helper import initialize_or_scale
import torch.nn.functional as F
import copy
from .helper import get_orthogonal, get_collinear
from ..res4lyf import RESplain
def multiply_nested_tensors(structure, scalar):
if isinstance(structure, torch.Tensor):
return structure * scalar
elif isinstance(structure, list):
return [multiply_nested_tensors(item, scalar) for item in structure]
elif isinstance(structure, dict):
return {key: multiply_nested_tensors(value, scalar) for key, value in structure.items()}
else:
return structure
class ConditioningOrthoCollin:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"conditioning_0": ("CONDITIONING", ),
"conditioning_1": ("CONDITIONING", ),
"t5_strength": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"clip_strength": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "combine"
CATEGORY = "RES4LYF/conditioning"
def combine(self, conditioning_0, conditioning_1, t5_strength, clip_strength):
t5_0_1_collin = get_collinear (conditioning_0[0][0], conditioning_1[0][0])
t5_1_0_ortho = get_orthogonal(conditioning_1[0][0], conditioning_0[0][0])
t5_combined = t5_0_1_collin + t5_1_0_ortho
t5_1_0_collin = get_collinear (conditioning_1[0][0], conditioning_0[0][0])
t5_0_1_ortho = get_orthogonal(conditioning_0[0][0], conditioning_1[0][0])
t5_B_combined = t5_1_0_collin + t5_0_1_ortho
pooled_0_1_collin = get_collinear (conditioning_0[0][1]['pooled_output'].unsqueeze(0), conditioning_1[0][1]['pooled_output'].unsqueeze(0)).squeeze(0)
pooled_1_0_ortho = get_orthogonal(conditioning_1[0][1]['pooled_output'].unsqueeze(0), conditioning_0[0][1]['pooled_output'].unsqueeze(0)).squeeze(0)
pooled_combined = pooled_0_1_collin + pooled_1_0_ortho
#conditioning_0[0][0] = conditioning_0[0][0] + t5_strength * (t5_combined - conditioning_0[0][0])
#conditioning_0[0][0] = t5_strength * t5_combined + (1-t5_strength) * t5_B_combined
conditioning_0[0][0] = t5_strength * t5_0_1_collin + (1-t5_strength) * t5_1_0_collin
conditioning_0[0][1]['pooled_output'] = conditioning_0[0][1]['pooled_output'] + clip_strength * (pooled_combined - conditioning_0[0][1]['pooled_output'])
return (conditioning_0, )
class CLIPTextEncodeFluxUnguided:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"clip": ("CLIP", ),
"clip_l": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"t5xxl": ("STRING", {"multiline": True, "dynamicPrompts": True}),
}}
RETURN_NAMES = ("conditioning", "clip_l_end", "t5xxl_end",)
RETURN_TYPES = ("CONDITIONING","INT","INT",)
FUNCTION = "encode"
CATEGORY = "RES4LYF/conditioning"
def encode(self, clip, clip_l, t5xxl):
tokens = clip.tokenize(clip_l)
tokens["t5xxl"] = clip.tokenize(t5xxl)["t5xxl"]
clip_l_end=0
for i in range(len(tokens['l'][0])):
if tokens['l'][0][i][0] == 49407:
clip_l_end=i
break
t5xxl_end=0
for i in range(len(tokens['l'][0])): # bug? should this be t5xxl?
if tokens['t5xxl'][0][i][0] == 1:
t5xxl_end=i
break
output = clip.encode_from_tokens(tokens, return_pooled=True, return_dict=True)
cond = output.pop("cond")
conditioning = [[cond, output]]
conditioning[0][1]['clip_l_end'] = clip_l_end
conditioning[0][1]['t5xxl_end'] = t5xxl_end
return (conditioning, clip_l_end, t5xxl_end,)
class StyleModelApplyAdvanced:
@classmethod
def INPUT_TYPES(s):
return {"required": {"conditioning": ("CONDITIONING", ),
"style_model": ("STYLE_MODEL", ),
"clip_vision_output": ("CLIP_VISION_OUTPUT", ),
"strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.001}),
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
DESCRIPTION = "Use with Flux Redux."
def main(self, clip_vision_output, style_model, conditioning, strength=1.0):
cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0)
cond = strength * cond
c = []
for t in conditioning:
n = [torch.cat((t[0], cond), dim=1), t[1].copy()]
c.append(n)
return (c, )
class ConditioningZeroAndTruncate:
# needs updating to ensure dims are correct for arbitrary models without hardcoding.
# vanilla ConditioningZeroOut node doesn't truncate and SD3.5M degrades badly with large embeddings, even if zeroed out, as the negative conditioning
@classmethod
def INPUT_TYPES(s):
return { "required": {"conditioning": ("CONDITIONING", )}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "zero_out"
CATEGORY = "RES4LYF/conditioning"
DESCRIPTION = "Use for negative conditioning with SD3.5. ConditioningZeroOut does not truncate the embedding, \
which results in severe degradation of image quality with SD3.5 when the token limit is exceeded."
def zero_out(self, conditioning):
c = []
for t in conditioning:
d = t[1].copy()
pooled_output = d.get("pooled_output", None)
if pooled_output is not None:
d["pooled_output"] = torch.zeros((1,2048), dtype=t[0].dtype, device=t[0].device)
n = [torch.zeros((1,154,4096), dtype=t[0].dtype, device=t[0].device), d]
c.append(n)
return (c, )
class ConditioningTruncate:
# needs updating to ensure dims are correct for arbitrary models without hardcoding.
@classmethod
def INPUT_TYPES(s):
return { "required": {"conditioning": ("CONDITIONING", )}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "zero_out"
CATEGORY = "RES4LYF/conditioning"
DESCRIPTION = "Use for positive conditioning with SD3.5. Tokens beyond 77 result in degradation of image quality."
def zero_out(self, conditioning):
c = []
for t in conditioning:
d = t[1].copy()
pooled_output = d.get("pooled_output", None)
if pooled_output is not None:
d["pooled_output"] = d["pooled_output"][:, :2048]
n = [t[0][:, :154, :4096], d]
c.append(n)
return (c, )
class ConditioningMultiply:
@classmethod
def INPUT_TYPES(s):
return {"required": {"conditioning": ("CONDITIONING", ),
"multiplier": ("FLOAT", {"default": 1.0, "min": -1000000000.0, "max": 1000000000.0, "step": 0.01})
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def main(self, conditioning, multiplier):
c = multiply_nested_tensors(conditioning, multiplier)
return (c,)
class ConditioningAdd:
@classmethod
def INPUT_TYPES(s):
return {"required": {"conditioning_1": ("CONDITIONING", ),
"conditioning_2": ("CONDITIONING", ),
"multiplier": ("FLOAT", {"default": 1.0, "min": -1000000000.0, "max": 1000000000.0, "step": 0.01})
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def main(self, conditioning_1, conditioning_2, multiplier):
conditioning_1[0][0] += multiplier * conditioning_2[0][0]
conditioning_1[0][1]['pooled_output'] += multiplier * conditioning_2[0][1]['pooled_output']
return (conditioning_1,)
class ConditioningCombine:
@classmethod
def INPUT_TYPES(s):
return {"required": {"conditioning_1": ("CONDITIONING", ), "conditioning_2": ("CONDITIONING", )}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "combine"
CATEGORY = "RES4LYF/conditioning"
def combine(self, conditioning_1, conditioning_2):
return (conditioning_1 + conditioning_2, )
class ConditioningAverage :
@classmethod
def INPUT_TYPES(s):
return {"required": {"conditioning_to": ("CONDITIONING", ), "conditioning_from": ("CONDITIONING", ),
"conditioning_to_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "addWeighted"
CATEGORY = "RES4LYF/conditioning"
def addWeighted(self, conditioning_to, conditioning_from, conditioning_to_strength):
out = []
if len(conditioning_from) > 1:
RESplain("Warning: ConditioningAverage conditioning_from contains more than 1 cond, only the first one will actually be applied to conditioning_to.")
cond_from = conditioning_from[0][0]
pooled_output_from = conditioning_from[0][1].get("pooled_output", None)
for i in range(len(conditioning_to)):
t1 = conditioning_to[i][0]
pooled_output_to = conditioning_to[i][1].get("pooled_output", pooled_output_from)
t0 = cond_from[:,:t1.shape[1]]
if t0.shape[1] < t1.shape[1]:
t0 = torch.cat([t0] + [torch.zeros((1, (t1.shape[1] - t0.shape[1]), t1.shape[2]))], dim=1)
tw = torch.mul(t1, conditioning_to_strength) + torch.mul(t0, (1.0 - conditioning_to_strength))
t_to = conditioning_to[i][1].copy()
if pooled_output_from is not None and pooled_output_to is not None:
t_to["pooled_output"] = torch.mul(pooled_output_to, conditioning_to_strength) + torch.mul(pooled_output_from, (1.0 - conditioning_to_strength))
elif pooled_output_from is not None:
t_to["pooled_output"] = pooled_output_from
n = [tw, t_to]
out.append(n)
return (out, )
class ConditioningSetTimestepRange:
@classmethod
def INPUT_TYPES(s):
return {"required": {"conditioning": ("CONDITIONING", ),
"start": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
"end": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001})
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "set_range"
CATEGORY = "RES4LYF/conditioning"
def set_range(self, conditioning, start, end):
c = node_helpers.conditioning_set_values(conditioning, {"start_percent": start,
"end_percent": end})
return (c, )
class ConditioningAverageScheduler: # don't think this is implemented correctly. needs to be reworked
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"conditioning_0": ("CONDITIONING", ),
"conditioning_1": ("CONDITIONING", ),
"ratio": ("SIGMAS", ),
}
}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
@staticmethod
def addWeighted(conditioning_to, conditioning_from, conditioning_to_strength): #this function borrowed from comfyui
out = []
if len(conditioning_from) > 1:
RESplain("Warning: ConditioningAverage conditioning_from contains more than 1 cond, only the first one will actually be applied to conditioning_to.")
cond_from = conditioning_from[0][0]
pooled_output_from = conditioning_from[0][1].get("pooled_output", None)
for i in range(len(conditioning_to)):
t1 = conditioning_to[i][0]
pooled_output_to = conditioning_to[i][1].get("pooled_output", pooled_output_from)
t0 = cond_from[:,:t1.shape[1]]
if t0.shape[1] < t1.shape[1]:
t0 = torch.cat([t0] + [torch.zeros((1, (t1.shape[1] - t0.shape[1]), t1.shape[2]))], dim=1)
tw = torch.mul(t1, conditioning_to_strength) + torch.mul(t0, (1.0 - conditioning_to_strength))
t_to = conditioning_to[i][1].copy()
if pooled_output_from is not None and pooled_output_to is not None:
t_to["pooled_output"] = torch.mul(pooled_output_to, conditioning_to_strength) + torch.mul(pooled_output_from, (1.0 - conditioning_to_strength))
elif pooled_output_from is not None:
t_to["pooled_output"] = pooled_output_from
n = [tw, t_to]
out.append(n)
return out
@staticmethod
def create_percent_array(steps):
step_size = 1.0 / steps
return [{"start_percent": i * step_size, "end_percent": (i + 1) * step_size} for i in range(steps)]
def main(self, conditioning_0, conditioning_1, ratio):
steps = len(ratio)
percents = self.create_percent_array(steps)
cond = []
for i in range(steps):
average = self.addWeighted(conditioning_0, conditioning_1, ratio[i].item())
cond += node_helpers.conditioning_set_values(average, {"start_percent": percents[i]["start_percent"], "end_percent": percents[i]["end_percent"]})
return (cond,)
class StableCascade_StageB_Conditioning64:
@classmethod
def INPUT_TYPES(s):
return {"required": { "conditioning": ("CONDITIONING",),
"stage_c": ("LATENT",),
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "set_prior"
CATEGORY = "RES4LYF/conditioning"
@precision_tool.cast_tensor
def set_prior(self, conditioning, stage_c):
c = []
for t in conditioning:
d = t[1].copy()
d['stable_cascade_prior'] = stage_c['samples']
n = [t[0], d]
c.append(n)
return (c, )
class Conditioning_Recast64:
@classmethod
def INPUT_TYPES(s):
return {"required": { "cond_0": ("CONDITIONING",),
},
"optional": { "cond_1": ("CONDITIONING",),}
}
RETURN_TYPES = ("CONDITIONING","CONDITIONING",)
RETURN_NAMES = ("cond_0_recast","cond_1_recast",)
FUNCTION = "main"
CATEGORY = "RES4LYF/precision"
@precision_tool.cast_tensor
def main(self, cond_0, cond_1 = None):
cond_0[0][0] = cond_0[0][0].to(torch.float64)
cond_0[0][1]["pooled_output"] = cond_0[0][1]["pooled_output"].to(torch.float64)
if cond_1 is not None:
cond_1[0][0] = cond_1[0][0].to(torch.float64)
cond_1[0][1]["pooled_output"] = cond_1[0][1]["pooled_output"].to(torch.float64)
return (cond_0, cond_1,)
class ConditioningToBase64:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"conditioning": ("CONDITIONING",),
},
"hidden": {
"unique_id": "UNIQUE_ID",
"extra_pnginfo": "EXTRA_PNGINFO",
},
}
RETURN_TYPES = ("STRING",)
FUNCTION = "notify"
OUTPUT_NODE = True
OUTPUT_IS_LIST = (True,)
CATEGORY = "RES4LYF/utilities"
def notify(self, unique_id=None, extra_pnginfo=None, conditioning=None):
conditioning_pickle = pickle.dumps(conditioning)
conditioning_base64 = base64.b64encode(conditioning_pickle).decode('utf-8')
text = [conditioning_base64]
if unique_id is not None and extra_pnginfo is not None:
if not isinstance(extra_pnginfo, list):
RESplain("Error: extra_pnginfo is not a list")
elif (
not isinstance(extra_pnginfo[0], dict)
or "workflow" not in extra_pnginfo[0]
):
RESplain("Error: extra_pnginfo[0] is not a dict or missing 'workflow' key")
else:
workflow = extra_pnginfo[0]["workflow"]
node = next(
(x for x in workflow["nodes"] if str(x["id"]) == str(unique_id[0])),
None,
)
if node:
node["widgets_values"] = [text]
return {"ui": {"text": text}, "result": (text,)}
class Base64ToConditioning:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"data": ("STRING", {"default": ""}),
}
}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/utilities"
def main(self, data):
conditioning_pickle = base64.b64decode(data)
conditioning = pickle.loads(conditioning_pickle)
return (conditioning,)
class RegionalMask(torch.nn.Module):
def __init__(self, mask: torch.Tensor, conditioning: torch.Tensor, conditioning_regional: torch.Tensor, latent:torch.Tensor, start_percent: float, end_percent: float, mask_type: str, img_len: int, text_len: int) -> None:
super().__init__()
#self.register_buffer('mask', mask)
self.mask = mask.clone().to('cuda')
self.conditioning = copy.deepcopy(conditioning)
self.conditioning_regional = copy.deepcopy(conditioning_regional)
self.latent = latent.clone()
self.start_percent = start_percent
self.end_percent = end_percent
self.mask_type = mask_type
self.img_len = img_len
self.text_len = text_len
def __call__(self, transformer_options, weight=0, dtype=torch.bfloat16, *args, **kwargs):
sigma = transformer_options['sigmas'][0]
if self.start_percent <= 1 - sigma < self.end_percent:
if self.mask_type == "gradient":
#mask = self.gen_mask(weight)
return self.mask.clone().to(sigma.device) * weight
"""def gen_mask(self, weight): #FOR REGENERATION OF SELF-ATTN MASK
b, c, h, w = self.latent.shape
h //= 2 # 16x16 PE
w //= 2
img_len = h * w
cond_r = torch.cat([cond_reg['cond'] for cond_reg in self.conditioning_regional], dim=1)
if self.conditioning is not None:
text_len = 256 + cond_r.shape[1] # 256 = main prompt tokens... half of t5, comfy issue
conditioning_regional = [
{
'mask': torch.ones((1, h, w), dtype=torch.bfloat16),
'cond': torch.ones((1, 256, 4096), dtype=torch.bfloat16),
},
*self.conditioning_regional,
]
else:
text_len = cond_r.shape[1] # 256 = main prompt tokens... half of t5, comfy issue
conditioning_regional = self.conditioning_regional
all_attn_mask = torch.zeros((text_len+img_len, text_len+img_len), dtype=torch.bfloat16)
self_attn_mask = torch.zeros(( img_len, img_len), dtype=torch.bfloat16)
self_attn_mask_bkg = torch.zeros(( img_len, img_len), dtype=torch.bfloat16)
prev_len = 0
for cond_reg_dict in conditioning_regional: #FOR REGENERATION OF SELF-ATTN MASK
cond_reg = cond_reg_dict['cond']
region_mask_ = 1 - cond_reg_dict['mask'][0]
region_mask_sq = cond_reg_dict['mask'][0].to(torch.bfloat16)
img2txt_mask = torch.nn.functional.interpolate(region_mask_sq[None, None, :, :], (h, w), mode='nearest-exact').flatten().unsqueeze(1).repeat(1, cond_reg.size(1))
txt2img_mask = img2txt_mask.transpose(-1, -2)
img2txt_mask_sq = torch.nn.functional.interpolate(region_mask_sq[None, None, :, :], (h, w), mode='nearest-exact').flatten().unsqueeze(1).repeat(1, self.img_len)
#img2txt_mask_sq = img2txt_mask[:, :1].repeat(1, img_len)
txt2img_mask_sq = img2txt_mask_sq.transpose(-1, -2)
curr_len = prev_len + cond_reg.shape[1] #FOR REGENERATION OF SELF-ATTN MASK
all_attn_mask[prev_len:curr_len, prev_len:curr_len] = 1.0 # self TXT 2 TXT
all_attn_mask[prev_len:curr_len, text_len: ] = txt2img_mask # cross TXT 2 regional IMG
all_attn_mask[text_len: , prev_len:curr_len] = img2txt_mask # cross regional IMG 2 TXT
#all_attn_mask[text_len:, text_len:] = fp_or(all_attn_mask[text_len:, text_len:] , fp_and( img2txt_mask_sq, txt2img_mask_sq))
self_attn_mask = fp_or(self_attn_mask , fp_and( img2txt_mask_sq, txt2img_mask_sq))
self_attn_mask_bkg = fp_or(self_attn_mask_bkg, fp_and(img2txt_mask_sq.max()-img2txt_mask_sq, txt2img_mask_sq.max()-txt2img_mask_sq))
#self_attn_mask_bkg = fp_or(self_attn_mask_bkg, fp_and(1-img2txt_mask_sq, 1-txt2img_mask_sq))
prev_len = curr_len
all_attn_mask[text_len:, text_len:] = fp_or(self_attn_mask, self_attn_mask_bkg) #combine foreground/background self-attn
return all_attn_mask
"""
class RegionalConditioning(torch.nn.Module):
def __init__(self, conditioning: torch.Tensor, region_cond: torch.Tensor, start_percent: float, end_percent: float) -> None:
super().__init__()
#self.register_buffer('region_cond', region_cond)
self.conditioning = conditioning
self.region_cond = region_cond.clone().to('cuda')
self.start_percent = start_percent
self.end_percent = end_percent
def __call__(self, transformer_options, dtype=torch.bfloat16, *args, **kwargs):
sigma = transformer_options['sigmas'][0]
if self.start_percent <= 1 - sigma < self.end_percent:
return self.region_cond.clone().to(sigma.device).to(dtype)
return None
def concat_cond(self, context, transformer_options, dtype=torch.bfloat16, *args, **kwargs):
sigma = transformer_options['sigmas'][0]
if self.start_percent <= 1 - sigma < self.end_percent:
region_cond = self.region_cond.clone().to(sigma.device).to(dtype)
if self.conditioning is None:
return self.region_cond.clone().to(sigma.device).to(dtype)
else:
return torch.cat([context, region_cond.clone().to(torch.bfloat16)], dim=1)
return None
class FluxRegionalPrompt:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"cond": ("CONDITIONING",),
}, "optional": {
"cond_regional": ("CONDITIONING_REGIONAL",),
"mask": ("MASK",),
}}
RETURN_TYPES = ("CONDITIONING_REGIONAL","MASK",)
RETURN_NAMES = ("cond_regional","mask_inv")
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def main(self, cond, mask, cond_regional=[]):
cond_regional = [*cond_regional]
cond_regional.append({'mask': mask, 'cond': cond[0][0]})
mask_inv = 1-mask
return (cond_regional,mask_inv,)
def fp_not(tensor):
return 1 - tensor
def fp_or(tensor1, tensor2):
return torch.maximum(tensor1, tensor2)
def fp_and(tensor1, tensor2):
return torch.minimum(tensor1, tensor2)
class RegionalGenerateConditioningsAndMasks:
def __init__(self, conditioning, conditioning_regional, weight, start_percent, end_percent, mask_type):
self.conditioning = conditioning
self.conditioning_regional = conditioning_regional
self.weight = weight
self.start_percent = start_percent
self.end_percent = end_percent
self.mask_type = mask_type
def __call__(self, latent):
b, c, h, w = latent.shape
h //= 2 # 16x16 PE
w //= 2
img_len = h * w
cond_r = torch.cat([cond_reg['cond'] for cond_reg in self.conditioning_regional], dim=1)
if self.conditioning is not None:
text_len = 256 + cond_r.shape[1] # 256 = main prompt tokens... half of t5, comfy issue
conditioning_regional = [
{
'mask': torch.ones((1, h, w), dtype=torch.bfloat16),
'cond': torch.ones((1, 256, 4096), dtype=torch.bfloat16),
},
*self.conditioning_regional,
]
else:
text_len = cond_r.shape[1] # 256 = main prompt tokens... half of t5, comfy issue
conditioning_regional = self.conditioning_regional
all_attn_mask = torch.zeros((text_len+img_len, text_len+img_len), dtype=torch.bfloat16)
self_attn_mask = torch.zeros(( img_len, img_len), dtype=torch.bfloat16)
self_attn_mask_bkg = torch.zeros(( img_len, img_len), dtype=torch.bfloat16)
prev_len = 0
for cond_reg_dict in conditioning_regional:
cond_reg = cond_reg_dict['cond']
region_mask = cond_reg_dict['mask'][0]
img2txt_mask = torch.nn.functional.interpolate(region_mask[None, None, :, :], (h, w), mode='nearest-exact').flatten().unsqueeze(1).repeat(1, cond_reg.size(1))
txt2img_mask = img2txt_mask .transpose(-1, -2)
img2txt_mask_sq = torch.nn.functional.interpolate(region_mask[None, None, :, :], (h, w), mode='nearest-exact').flatten().unsqueeze(1).repeat(1, img_len)
txt2img_mask_sq = img2txt_mask_sq.transpose(-1, -2)
curr_len = prev_len + cond_reg.shape[1]
all_attn_mask[prev_len:curr_len, prev_len:curr_len] = 1.0 # self TXT 2 TXT
all_attn_mask[prev_len:curr_len, text_len: ] = txt2img_mask # cross TXT 2 regional IMG
all_attn_mask[text_len: , prev_len:curr_len] = img2txt_mask # cross regional IMG 2 TXT
self_attn_mask = fp_or(self_attn_mask , fp_and( img2txt_mask_sq, txt2img_mask_sq))
self_attn_mask_bkg = fp_or(self_attn_mask_bkg, fp_and(img2txt_mask_sq.max()-img2txt_mask_sq, txt2img_mask_sq.max()-txt2img_mask_sq))
prev_len = curr_len
all_attn_mask[text_len:, text_len:] = fp_or(self_attn_mask, self_attn_mask_bkg) #combine foreground/background self-attn
all_attn_mask = RegionalMask(all_attn_mask, self.conditioning, self.conditioning_regional, latent, self.start_percent, self.end_percent, self.mask_type, img_len, text_len)
regional_conditioning = RegionalConditioning(self.conditioning, cond_r, self.start_percent, self.end_percent)
return regional_conditioning, all_attn_mask
class FluxRegionalConditioning:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"mask_weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"self_attn_floor": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"start_percent": ("FLOAT", {"default": 0, "min": 0.0, "max": 1.0, "step": 0.01}),
"end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
"mask_type": (["gradient"], {"default": "gradient"}),
},
"optional": {
"conditioning": ("CONDITIONING",),
"conditioning_regional": ("CONDITIONING_REGIONAL",),
"mask_weights": ("SIGMAS", ),
"self_attn_floors": ("SIGMAS", ),
}}
RETURN_TYPES = ("CONDITIONING",)
RETURN_NAMES = ("conditioning",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def main(self, conditioning_regional, mask_weight=1.0, start_percent=0.0, end_percent=1.0, start_step=0, end_step=10000, conditioning=None, mask_weights=None, self_attn_floors=None, self_attn_floor=0.0, mask_type="gradient", latent=None):
weight, weights = mask_weight, mask_weights
floor, floors = self_attn_floor, self_attn_floors
default_dtype = torch.float64
max_steps = 10000
weights = initialize_or_scale(weights, weight, max_steps).to(default_dtype)
weights = F.pad(weights, (0, max_steps), value=0.0)
floors = initialize_or_scale(floors, floor, max_steps).to(default_dtype)
floors = F.pad(floors, (0, max_steps), value=0.0)
regional_generate_conditionings_and_masks_fn = RegionalGenerateConditioningsAndMasks(conditioning, conditioning_regional, weight, start_percent, end_percent, mask_type)
if conditioning is None:
conditioning = [
[
torch.zeros_like(conditioning_regional[0]['cond']),
{'pooled_output':
torch.zeros((1,768), dtype=conditioning_regional[0]['cond'].dtype, device=conditioning_regional[0]['cond'].device),
}
],
]
conditioning[0][1]['regional_generate_conditionings_and_masks_fn'] = regional_generate_conditionings_and_masks_fn
conditioning[0][1]['regional_conditioning_weights'] = weights
conditioning[0][1]['regional_conditioning_floors'] = floors
return (copy.deepcopy(conditioning),)
"""
from .models import ReFluxPatcher
class ClownRegionalConditioningFlux:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"regional_model": (["auto", "deactivate"], {"default": "auto"}),
"mask_weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"start_percent": ("FLOAT", {"default": 0, "min": 0.0, "max": 1.0, "step": 0.01}),
"end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
"mask_type": (["gradient"], {"default": "gradient"}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional": {
"model": ("MODEL", ),
"positive_masked": ("CONDITIONING", ),
"positive_unmasked": ("CONDITIONING", ),
"mask": ("MASK", ),
"mask_weights": ("SIGMAS", ),
"region_bleeds": ("SIGMAS", ),
}}
RETURN_TYPES = ("MODEL", "CONDITIONING",)
RETURN_NAMES = ("model", "positive",)
FUNCTION = "main"
CATEGORY = "RES4LYF/conditioning"
def main(self, model, regional_model, mask_weight=1.0, start_percent=0.0, end_percent=1.0, positive_masked=None, positive_unmasked=None, mask_weights=None, region_bleeds=None, region_bleed=0.0, mask_type="gradient", mask=None, invert_mask=False):
if regional_model == "auto":
reflux_enable = True
else:
model, = ReFluxPatcher().main(model, enable=False)
return (model, positive_masked,)
if invert_mask and mask is not None:
mask = 1-mask
weight, weights = mask_weight, mask_weights
floor, floors = region_bleed, region_bleeds
default_dtype = torch.float64
max_steps = 10000
weights = initialize_or_scale(weights, weight, max_steps).to(default_dtype)
weights = F.pad(weights, (0, max_steps), value=0.0)
floors = initialize_or_scale(floors, floor, max_steps).to(default_dtype)
floors = F.pad(floors, (0, max_steps), value=0.0)
if (positive_masked is None) and (positive_unmasked is None):
positive = None
reflux_enable = False
elif mask is not None:
if regional_model == "auto":
reflux_enable = True
else:
reflux_enable = False
if positive_unmasked is None:
if positive_unmasked is None:
positive_unmasked = [[
torch.zeros((1, 256, 4096)),
{'pooled_output': torch.zeros((1, 768))}
]]
cond_regional, mask_inv = FluxRegionalPrompt().main(cond=positive_masked, mask=mask)
cond_regional, mask_inv_inv = FluxRegionalPrompt().main(cond=positive_unmasked , cond_regional=cond_regional, mask=mask_inv)
positive, = FluxRegionalConditioning().main(conditioning_regional=cond_regional, self_attn_floor=floor, self_attn_floors=floors, mask_weight=weight, mask_weights=weights, start_percent=start_percent, end_percent=end_percent, mask_type=mask_type)
else:
positive = positive_masked
reflux_enable = False
if not reflux_enable:
model, = ReFluxPatcher().main(model, enable=False)
return (model, positive_masked,)
else:
model, = ReFluxPatcher().main(model, enable=True)
return (model, positive,)
"""
================================================
FILE: legacy/constants.py
================================================
MAX_STEPS = 10000
IMPLICIT_TYPE_NAMES = [
"predictor-corrector",
"rebound",
"retro-eta",
"bongmath",
]
================================================
FILE: legacy/deis_coefficients.py
================================================
# Adapted from: https://github.com/zju-pi/diff-sampler/blob/main/gits-main/solver_utils.py
# fixed the calcs for "rhoab" which suffered from an off-by-one error and made some other minor corrections
import torch
import numpy as np
# A pytorch reimplementation of DEIS (https://github.com/qsh-zh/deis).
#############################
### Utils for DEIS solver ###
#############################
#----------------------------------------------------------------------------
# Transfer from the input time (sigma) used in EDM to that (t) used in DEIS.
def edm2t(edm_steps, epsilon_s=1e-3, sigma_min=0.002, sigma_max=80):
vp_sigma = lambda beta_d, beta_min: lambda t: (np.e ** (0.5 * beta_d * (t ** 2) + beta_min * t) - 1) ** 0.5
vp_sigma_inv = lambda beta_d, beta_min: lambda sigma: ((beta_min ** 2 + 2 * beta_d * (sigma ** 2 + 1).log()).sqrt() - beta_min) / beta_d
vp_beta_d = 2 * (np.log(torch.tensor(sigma_min).cpu() ** 2 + 1) / epsilon_s - np.log(torch.tensor(sigma_max).cpu() ** 2 + 1)) / (epsilon_s - 1)
vp_beta_min = np.log(torch.tensor(sigma_max).cpu() ** 2 + 1) - 0.5 * vp_beta_d
t_steps = vp_sigma_inv(vp_beta_d.clone().detach().cpu(), vp_beta_min.clone().detach().cpu())(edm_steps.clone().detach().cpu())
return t_steps, vp_beta_min, vp_beta_d + vp_beta_min
#----------------------------------------------------------------------------
def cal_poly(prev_t, j, taus):
poly = 1
for k in range(prev_t.shape[0]):
if k == j:
continue
poly *= (taus - prev_t[k]) / (prev_t[j] - prev_t[k])
return poly
#----------------------------------------------------------------------------
# Transfer from t to alpha_t.
def t2alpha_fn(beta_0, beta_1, t):
return torch.exp(-0.5 * t ** 2 * (beta_1 - beta_0) - t * beta_0)
#----------------------------------------------------------------------------
def cal_integrand(beta_0, beta_1, taus):
with torch.inference_mode(mode=False):
taus = taus.clone()
beta_0 = beta_0.clone()
beta_1 = beta_1.clone()
with torch.enable_grad():
taus.requires_grad_(True)
alpha = t2alpha_fn(beta_0, beta_1, taus)
log_alpha = alpha.log()
log_alpha.sum().backward()
d_log_alpha_dtau = taus.grad
integrand = -0.5 * d_log_alpha_dtau / torch.sqrt(alpha * (1 - alpha))
return integrand
#----------------------------------------------------------------------------
def get_deis_coeff_list(t_steps, max_order, N=10000, deis_mode='tab'):
"""
Get the coefficient list for DEIS sampling.
Args:
t_steps: A pytorch tensor. The time steps for sampling.
max_order: A `int`. Maximum order of the solver. 1 <= max_order <= 4
N: A `int`. Use how many points to perform the numerical integration when deis_mode=='tab'.
deis_mode: A `str`. Select between 'tab' and 'rhoab'. Type of DEIS.
Returns:
A pytorch tensor. A batch of generated samples or sampling trajectories if return_inters=True.
"""
if deis_mode == 'tab':
t_steps, beta_0, beta_1 = edm2t(t_steps)
C = []
for i, (t_cur, t_next) in enumerate(zip(t_steps[:-1], t_steps[1:])):
order = min(i+1, max_order)
if order == 1:
C.append([])
else:
taus = torch.linspace(t_cur, t_next, N) # split the interval for integral approximation
dtau = (t_next - t_cur) / N
prev_t = t_steps[[i - k for k in range(order)]]
coeff_temp = []
integrand = cal_integrand(beta_0, beta_1, taus)
for j in range(order):
poly = cal_poly(prev_t, j, taus)
coeff_temp.append(torch.sum(integrand * poly) * dtau)
C.append(coeff_temp)
elif deis_mode == 'rhoab':
# Analytical solution, second order
def get_def_integral_2(a, b, start, end, c):
coeff = (end**3 - start**3) / 3 - (end**2 - start**2) * (a + b) / 2 + (end - start) * a * b
return coeff / ((c - a) * (c - b))
# Analytical solution, third order
def get_def_integral_3(a, b, c, start, end, d):
coeff = (end**4 - start**4) / 4 - (end**3 - start**3) * (a + b + c) / 3 \
+ (end**2 - start**2) * (a*b + a*c + b*c) / 2 - (end - start) * a * b * c
return coeff / ((d - a) * (d - b) * (d - c))
C = []
for i, (t_cur, t_next) in enumerate(zip(t_steps[:-1], t_steps[1:])):
order = min(i+1, max_order) #fixed order calcs
if order == 1:
C.append([])
else:
prev_t = t_steps[[i - k for k in range(order+1)]]
if order == 2:
coeff_cur = ((t_next - prev_t[1])**2 - (t_cur - prev_t[1])**2) / (2 * (t_cur - prev_t[1]))
coeff_prev1 = (t_next - t_cur)**2 / (2 * (prev_t[1] - t_cur))
coeff_temp = [coeff_cur, coeff_prev1]
elif order == 3:
coeff_cur = get_def_integral_2(prev_t[1], prev_t[2], t_cur, t_next, t_cur)
coeff_prev1 = get_def_integral_2(t_cur, prev_t[2], t_cur, t_next, prev_t[1])
coeff_prev2 = get_def_integral_2(t_cur, prev_t[1], t_cur, t_next, prev_t[2])
coeff_temp = [coeff_cur, coeff_prev1, coeff_prev2]
elif order == 4:
coeff_cur = get_def_integral_3(prev_t[1], prev_t[2], prev_t[3], t_cur, t_next, t_cur)
coeff_prev1 = get_def_integral_3(t_cur, prev_t[2], prev_t[3], t_cur, t_next, prev_t[1])
coeff_prev2 = get_def_integral_3(t_cur, prev_t[1], prev_t[3], t_cur, t_next, prev_t[2])
coeff_prev3 = get_def_integral_3(t_cur, prev_t[1], prev_t[2], t_cur, t_next, prev_t[3])
coeff_temp = [coeff_cur, coeff_prev1, coeff_prev2, coeff_prev3]
C.append(coeff_temp)
return C
================================================
FILE: legacy/flux/controlnet.py
================================================
#Original code can be found on: https://github.com/XLabs-AI/x-flux/blob/main/src/flux/controlnet.py
#modified to support different types of flux controlnets
import torch
import math
from torch import Tensor, nn
from einops import rearrange, repeat
from .layers import (DoubleStreamBlock, EmbedND, LastLayer,
MLPEmbedder, SingleStreamBlock,
timestep_embedding)
from .model import Flux
import comfy.ldm.common_dit
class MistolineCondDownsamplBlock(nn.Module):
def __init__(self, dtype=None, device=None, operations=None):
super().__init__()
self.encoder = nn.Sequential(
operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device)
)
def forward(self, x):
return self.encoder(x)
class MistolineControlnetBlock(nn.Module):
def __init__(self, hidden_size, dtype=None, device=None, operations=None):
super().__init__()
self.linear = operations.Linear(hidden_size, hidden_size, dtype=dtype, device=device)
self.act = nn.SiLU()
def forward(self, x):
return self.act(self.linear(x))
class ControlNetFlux(Flux):
def __init__(self, latent_input=False, num_union_modes=0, mistoline=False, control_latent_channels=None, image_model=None, dtype=None, device=None, operations=None, **kwargs):
super().__init__(final_layer=False, dtype=dtype, device=device, operations=operations, **kwargs)
self.main_model_double = 19
self.main_model_single = 38
self.mistoline = mistoline
# add ControlNet blocks
if self.mistoline:
control_block = lambda : MistolineControlnetBlock(self.hidden_size, dtype=dtype, device=device, operations=operations)
else:
control_block = lambda : operations.Linear(self.hidden_size, self.hidden_size, dtype=dtype, device=device)
self.controlnet_blocks = nn.ModuleList([])
for _ in range(self.params.depth):
self.controlnet_blocks.append(control_block())
self.controlnet_single_blocks = nn.ModuleList([])
for _ in range(self.params.depth_single_blocks):
self.controlnet_single_blocks.append(control_block())
self.num_union_modes = num_union_modes
self.controlnet_mode_embedder = None
if self.num_union_modes > 0:
self.controlnet_mode_embedder = operations.Embedding(self.num_union_modes, self.hidden_size, dtype=dtype, device=device)
self.gradient_checkpointing = False
self.latent_input = latent_input
if control_latent_channels is None:
control_latent_channels = self.in_channels
else:
control_latent_channels *= 2 * 2 #patch size
self.pos_embed_input = operations.Linear(control_latent_channels, self.hidden_size, bias=True, dtype=dtype, device=device)
if not self.latent_input:
if self.mistoline:
self.input_cond_block = MistolineCondDownsamplBlock(dtype=dtype, device=device, operations=operations)
else:
self.input_hint_block = nn.Sequential(
operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
nn.SiLU(),
operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device)
)
def forward_orig(
self,
img: Tensor,
img_ids: Tensor,
controlnet_cond: Tensor,
txt: Tensor,
txt_ids: Tensor,
timesteps: Tensor,
y: Tensor,
guidance: Tensor = None,
control_type: Tensor = None,
) -> Tensor:
if img.ndim != 3 or txt.ndim != 3:
raise ValueError("Input img and txt tensors must have 3 dimensions.")
# running on sequences img
img = self.img_in(img)
controlnet_cond = self.pos_embed_input(controlnet_cond)
img = img + controlnet_cond
vec = self.time_in(timestep_embedding(timesteps, 256))
if self.params.guidance_embed:
vec = vec + self.guidance_in(timestep_embedding(guidance, 256))
vec = vec + self.vector_in(y)
txt = self.txt_in(txt)
if self.controlnet_mode_embedder is not None and len(control_type) > 0:
control_cond = self.controlnet_mode_embedder(torch.tensor(control_type, device=img.device), out_dtype=img.dtype).unsqueeze(0).repeat((txt.shape[0], 1, 1))
txt = torch.cat([control_cond, txt], dim=1)
txt_ids = torch.cat([txt_ids[:,:1], txt_ids], dim=1)
ids = torch.cat((txt_ids, img_ids), dim=1)
pe = self.pe_embedder(ids)
controlnet_double = ()
for i in range(len(self.double_blocks)):
img, txt = self.double_blocks[i](img=img, txt=txt, vec=vec, pe=pe)
controlnet_double = controlnet_double + (self.controlnet_blocks[i](img),)
img = torch.cat((txt, img), 1)
controlnet_single = ()
for i in range(len(self.single_blocks)):
img = self.single_blocks[i](img, vec=vec, pe=pe)
controlnet_single = controlnet_single + (self.controlnet_single_blocks[i](img[:, txt.shape[1] :, ...]),)
repeat = math.ceil(self.main_model_double / len(controlnet_double))
if self.latent_input:
out_input = ()
for x in controlnet_double:
out_input += (x,) * repeat
else:
out_input = (controlnet_double * repeat)
out = {"input": out_input[:self.main_model_double]}
if len(controlnet_single) > 0:
repeat = math.ceil(self.main_model_single / len(controlnet_single))
out_output = ()
if self.latent_input:
for x in controlnet_single:
out_output += (x,) * repeat
else:
out_output = (controlnet_single * repeat)
out["output"] = out_output[:self.main_model_single]
return out
def forward(self, x, timesteps, context, y, guidance=None, hint=None, **kwargs):
patch_size = 2
if self.latent_input:
hint = comfy.ldm.common_dit.pad_to_patch_size(hint, (patch_size, patch_size))
elif self.mistoline:
hint = hint * 2.0 - 1.0
hint = self.input_cond_block(hint)
else:
hint = hint * 2.0 - 1.0
hint = self.input_hint_block(hint)
hint = rearrange(hint, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
bs, c, h, w = x.shape
x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))
img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
h_len = ((h + (patch_size // 2)) // patch_size)
w_len = ((w + (patch_size // 2)) // patch_size)
img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
img_ids[..., 1] = img_ids[..., 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype)[:, None]
img_ids[..., 2] = img_ids[..., 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype)[None, :]
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
return self.forward_orig(img, img_ids, hint, context, txt_ids, timesteps, y, guidance, control_type=kwargs.get("control_type", []))
================================================
FILE: legacy/flux/layers.py
================================================
# Adapted from: https://github.com/black-forest-labs/flux
import math
import torch
from torch import Tensor, nn
import torch.nn.functional as F
from einops import rearrange
from torch import Tensor
from dataclasses import dataclass
from .math import attention, rope, apply_rope
import comfy.ldm.common_dit
class EmbedND(nn.Module):
def __init__(self, dim: int, theta: int, axes_dim: list):
super().__init__()
self.dim = dim
self.theta = theta
self.axes_dim = axes_dim
def forward(self, ids: Tensor) -> Tensor:
n_axes = ids.shape[-1]
emb = torch.cat(
[rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(n_axes)],
dim=-3,
)
return emb.unsqueeze(1)
def attention_weights(q, k):
# implementation of in-place softmax to reduce memory req
scores = torch.matmul(q, k.transpose(-2, -1))
scores.div_(math.sqrt(q.size(-1)))
torch.exp(scores, out=scores)
summed = torch.sum(scores, dim=-1, keepdim=True)
scores /= summed
return scores.nan_to_num_(0.0, 65504., -65504.)
def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 1000.0):
"""
Create sinusoidal timestep embeddings.
:param t: a 1-D Tensor of N indices, one per batch element.
These may be fractional.
:param dim: the dimension of the output.
:param max_period: controls the minimum frequency of the embeddings.
:return: an (N, D) Tensor of positional embeddings.
"""
t = time_factor * t
half = dim // 2
freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=t.device) / half)
args = t[:, None].float() * freqs[None]
embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
if dim % 2:
embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
if torch.is_floating_point(t):
embedding = embedding.to(t)
return embedding
class MLPEmbedder(nn.Module):
def __init__(self, in_dim: int, hidden_dim: int, dtype=None, device=None, operations=None):
super().__init__()
self.in_layer = operations.Linear( in_dim, hidden_dim, bias=True, dtype=dtype, device=device)
self.silu = nn.SiLU()
self.out_layer = operations.Linear(hidden_dim, hidden_dim, bias=True, dtype=dtype, device=device)
def forward(self, x: Tensor) -> Tensor:
return self.out_layer(self.silu(self.in_layer(x)))
class RMSNorm(torch.nn.Module):
def __init__(self, dim: int, dtype=None, device=None, operations=None):
super().__init__()
self.scale = nn.Parameter(torch.empty((dim), dtype=dtype, device=device)) # self.scale.shape = 128
def forward(self, x: Tensor):
return comfy.ldm.common_dit.rms_norm(x, self.scale, 1e-6)
class QKNorm(torch.nn.Module):
def __init__(self, dim: int, dtype=None, device=None, operations=None):
super().__init__()
self.query_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
self.key_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple:
q = self.query_norm(q)
k = self.key_norm(k)
return q.to(v), k.to(v)
class SelfAttention(nn.Module):
def __init__(self, dim: int, num_heads: int = 8, qkv_bias: bool = False, dtype=None, device=None, operations=None):
super().__init__()
self.num_heads = num_heads # 24
head_dim = dim // num_heads # 128 = 3072 / 24
self.qkv = operations.Linear(dim, dim * 3, bias=qkv_bias, dtype=dtype, device=device)
self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations)
self.proj = operations.Linear(dim, dim, dtype=dtype, device=device) # dim is usually 3072
@dataclass
class ModulationOut:
shift: Tensor
scale: Tensor
gate: Tensor
class Modulation(nn.Module):
def __init__(self, dim: int, double: bool, dtype=None, device=None, operations=None):
super().__init__()
self.is_double = double
self.multiplier = 6 if double else 3
self.lin = operations.Linear(dim, self.multiplier * dim, bias=True, dtype=dtype, device=device)
def forward(self, vec: Tensor) -> tuple:
out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(self.multiplier, dim=-1)
return (ModulationOut(*out[:3]), ModulationOut(*out[3:]) if self.is_double else None,)
class DoubleStreamBlock(nn.Module):
def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, dtype=None, device=None, operations=None, idx=-1):
super().__init__()
self.idx = idx
mlp_hidden_dim = int(hidden_size * mlp_ratio)
self.num_heads = num_heads
self.hidden_size = hidden_size
self.img_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations) # in_features=3072, out_features=18432 (3072*6)
self.txt_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations) # in_features=3072, out_features=18432 (3072*6)
self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) # .qkv: in_features=3072, out_features=9216 .proj: 3072,3072
self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) # .qkv: in_features=3072, out_features=9216 .proj: 3072,3072
self.img_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.txt_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.img_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.txt_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.img_mlp = nn.Sequential(
operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
nn.GELU(approximate="tanh"),
operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
) # 3072->12288, 12288->3072 (3072*4)
self.txt_mlp = nn.Sequential(
operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
nn.GELU(approximate="tanh"),
operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
) # 3072->12288, 12288->3072 (3072*4)
def img_attn_preproc(self, img, img_mod1):
img_modulated = self.img_norm1(img)
img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
img_qkv = self.img_attn.qkv(img_modulated)
img_q, img_k, img_v = rearrange(img_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
return img_q, img_k, img_v
def txt_attn_preproc(self, txt, txt_mod1):
txt_modulated = self.txt_norm1(txt)
txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
txt_qkv = self.txt_attn.qkv(txt_modulated)
txt_q, txt_k, txt_v = rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads) # Batch SeqLen (9216==3*3072) -> 3*1 24 SeqLen 128
txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
return txt_q, txt_k, txt_v
def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, timestep, transformer_options={}, mask=None, weight=1): # vec 1,3072
img_mod1, img_mod2 = self.img_mod(vec) # -> 3072, 3072
txt_mod1, txt_mod2 = self.txt_mod(vec)
img_q, img_k, img_v = self.img_attn_preproc(img, img_mod1)
txt_q, txt_k, txt_v = self.txt_attn_preproc(txt, txt_mod1)
q, k, v = torch.cat((txt_q, img_q), dim=2), torch.cat((txt_k, img_k), dim=2), torch.cat((txt_v, img_v), dim=2)
"""if mask is None:
attn = attention(q, k, v, pe=pe)
else:
attn_false = attention(q, k, v, pe=pe)
attn = attention(q, k, v, pe=pe, mask=mask.to(torch.bool))
attn = attn_false + weight * (attn - attn_false)"""
#I = torch.eye(q.shape[-2], q.shape[-2], dtype=q.dtype, device=q.device).expand((1,1) + (-1, -1))
#attn_map = attention_weights(q, k)
"""mask_resized = None
if mask is not None:
txt_a = txt[:,:,:]
txt_qa, txt_ka, txt_va = self.txt_attn_preproc(txt_a, txt_mod1)
txt_q_rope, txt_k_rope = apply_rope(txt_q, txt_k, pe[:,:,:512,:,:])
img_q_rope, img_k_rope = apply_rope(img_q, img_k, pe[:,:,512:,:,:])
attn_weights = attention_weights(txt_q_rope, img_k_rope)
attn_weights = attn_weights.permute(0,1,3,2)
attn_weights_slice = attn_weights[:,:,:,:]
test = attn_weights_slice.mean(dim=1)
test2 = rearrange(test, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=64, w=64, ph=1, pw=1)
test3 = test2.mean(dim=1)
mask_resized = F.interpolate(test3[None,:,:,:], size=(1024,1024), mode='bilinear', align_corners=False).squeeze(1)"""
attn = attention(q, k, v, pe=pe, mask=mask)
txt_attn = attn[:, :txt.shape[1]] # 1, 768,3072
img_attn = attn[:, txt.shape[1]:]
img += img_mod1.gate * self.img_attn.proj(img_attn)
txt += txt_mod1.gate * self.txt_attn.proj(txt_attn)
img += img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift)
txt += txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift)
return img, txt #, mask_resized
class SingleStreamBlock(nn.Module):
"""
A DiT block with parallel linear layers as described in
https://arxiv.org/abs/2302.05442 and adapted modulation interface.
"""
def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float = 4.0, qk_scale: float = None, dtype=None, device=None, operations=None, idx=-1):
super().__init__()
self.idx = idx
self.hidden_dim = hidden_size #3072
self.num_heads = num_heads #24
head_dim = hidden_size // num_heads
self.scale = qk_scale or head_dim**-0.5 #0.08838834764831845
self.mlp_hidden_dim = int(hidden_size * mlp_ratio) #12288 == 3072 * 4
# qkv and mlp_in
self.linear1 = operations.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim, dtype=dtype, device=device)
# proj and mlp_out
self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, dtype=dtype, device=device)
self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations)
self.hidden_size = hidden_size #3072
self.pre_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.mlp_act = nn.GELU(approximate="tanh")
self.modulation = Modulation(hidden_size, double=False, dtype=dtype, device=device, operations=operations)
def img_attn(self, img, mod, pe, mask, weight):
img_mod = (1 + mod.scale) * self.pre_norm(img) + mod.shift # mod => vec
qkv, mlp = torch.split(self.linear1(img_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)
q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
q, k = self.norm(q, k, v)
"""if mask is None:
attn = attention(q, k, v, pe=pe)
else:
attn_false = attention(q, k, v, pe=pe)
attn = attention(q, k, v, pe=pe, mask=mask.to(torch.bool))
attn = attn_false + weight * (attn - attn_false)"""
attn = attention(q, k, v, pe=pe, mask=mask)
return attn, mlp
# vec 1,3072 x 1,9984,3072
def forward(self, img: Tensor, vec: Tensor, pe: Tensor, timestep, transformer_options={}, mask=None, weight=1) -> Tensor: # x 1,9984,3072 if 2 reg embeds, 1,9472,3072 if none # 9216x4096 = 16x1536x1536
mod, _ = self.modulation(vec)
attn, mlp = self.img_attn(img, mod, pe, mask, weight)
output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2))
img += mod.gate * output
return img
class LastLayer(nn.Module):
def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None):
super().__init__()
self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.linear = operations.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device)
self.adaLN_modulation = nn.Sequential(nn.SiLU(), operations.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device))
def forward(self, x: Tensor, vec: Tensor) -> Tensor:
shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1)
x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :]
x = self.linear(x)
return x
================================================
FILE: legacy/flux/math.py
================================================
import torch
from einops import rearrange
from torch import Tensor
from comfy.ldm.modules.attention import optimized_attention
import comfy.model_management
def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor:
q, k = apply_rope(q, k, pe)
heads = q.shape[1]
x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask)
return x
def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
assert dim % 2 == 0
if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu():
device = torch.device("cpu")
else:
device = pos.device
scale = torch.linspace(0, (dim - 2) / dim, steps=dim//2, dtype=torch.float64, device=device)
omega = 1.0 / (theta**scale)
out = torch.einsum("...n,d->...nd", pos.to(dtype=torch.float32, device=device), omega)
out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
return out.to(dtype=torch.float32, device=pos.device)
def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)
================================================
FILE: legacy/flux/model.py
================================================
# Adapted from: https://github.com/black-forest-labs/flux
import torch
from torch import Tensor, nn
from dataclasses import dataclass
import copy
from .layers import (
DoubleStreamBlock,
EmbedND,
LastLayer,
MLPEmbedder,
SingleStreamBlock,
timestep_embedding,
)
from comfy.ldm.flux.layers import timestep_embedding
from comfy.ldm.flux.model import Flux as Flux
from einops import rearrange, repeat
import comfy.ldm.common_dit
@dataclass
class FluxParams:
in_channels: int
out_channels: int
vec_in_dim: int
context_in_dim: int
hidden_size: int
mlp_ratio: float
num_heads: int
depth: int
depth_single_blocks: int
axes_dim: list
theta: int
patch_size: int
qkv_bias: bool
guidance_embed: bool
class ReFlux(Flux):
def __init__(self, image_model=None, final_layer=True, dtype=None, device=None, operations=None, **kwargs):
super().__init__()
self.dtype = dtype
self.timestep = -1.0
self.threshold_inv = False
params = FluxParams(**kwargs)
self.params = params #self.params FluxParams(in_channels=16, out_channels=16, vec_in_dim=768, context_in_dim=4096, hidden_size=3072, mlp_ratio=4.0, num_heads=24, depth=19, depth_single_blocks=38, axes_dim=[16, 56, 56], theta=10000, patch_size=2, qkv_bias=True, guidance_embed=False)
self.patch_size = params.patch_size
self.in_channels = params.in_channels * params.patch_size * params.patch_size # in_channels 64
self.out_channels = params.out_channels * params.patch_size * params.patch_size # out_channels 64
if params.hidden_size % params.num_heads != 0:
raise ValueError(f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}")
pe_dim = params.hidden_size // params.num_heads
if sum(params.axes_dim) != pe_dim:
raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}")
self.hidden_size = params.hidden_size # 3072
self.num_heads = params.num_heads # 24
self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim)
self.img_in = operations.Linear( self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device) # in_features= 64, out_features=3072
self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, dtype=dtype, device=device) # in_features=4096, out_features=3072, bias=True
self.time_in = MLPEmbedder( in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations)
self.vector_in = MLPEmbedder(params.vec_in_dim, self.hidden_size, dtype=dtype, device=device, operations=operations) # in_features=768, out_features=3072 (first layer) second layer 3072,3072
self.guidance_in = (MLPEmbedder( in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations) if params.guidance_embed else nn.Identity())
self.double_blocks = nn.ModuleList([DoubleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, qkv_bias=params.qkv_bias, dtype=dtype, device=device, operations=operations, idx=_) for _ in range(params.depth)])
self.single_blocks = nn.ModuleList([SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, dtype=dtype, device=device, operations=operations, idx=_) for _ in range(params.depth_single_blocks)])
if final_layer:
self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels, dtype=dtype, device=device, operations=operations)
def forward_blocks(self, img: Tensor, img_ids: Tensor, txt: Tensor, txt_ids: Tensor, timesteps: Tensor, y: Tensor, guidance: Tensor = None, control=None, transformer_options = {},) -> Tensor:
if img.ndim != 3 or txt.ndim != 3:
raise ValueError("Input img and txt tensors must have 3 dimensions.")
# running on sequences img
img = self.img_in(img) # 1,9216,64 == 768x192 # 1,9216,64 == 1,16,128,256 + 1,16,64,64 # 1,8192,64 with uncond/cond #:,:,64 -> :,:,3072
vec = self.time_in(timestep_embedding(timesteps, 256).to(img.dtype)) # 1 -> 1,3072
if self.params.guidance_embed:
if guidance is None:
print("Guidance strength is none, not using distilled guidance.")
else:
vec = vec + self.guidance_in(timestep_embedding(guidance, 256).to(img.dtype))
vec = vec + self.vector_in(y) #y.shape=1,768 y==all 0s
txt = self.txt_in(txt) #
ids = torch.cat((txt_ids, img_ids), dim=1) # img_ids.shape=1,8192,3 txt_ids.shape=1,512,3 #ids.shape=1,8704,3
pe = self.pe_embedder(ids) # pe.shape 1,1,8704,64,2,2
weight = transformer_options['reg_cond_weight'] if 'reg_cond_weight' in transformer_options else 0.0
floor = transformer_options['reg_cond_floor'] if 'reg_cond_floor' in transformer_options else 0.0
mask_orig, mask_self = None, None
mask_obj = transformer_options.get('patches', {}).get('regional_conditioning_mask', None)
if mask_obj is not None and weight >= 0:
mask_orig = mask_obj[0](transformer_options, weight.item())
mask_self = mask_orig.clone()
mask_self[mask_obj[0].text_len:, mask_obj[0].text_len:] = mask_self.max()
mask_resized_list = []
mask = None
mask_obj = transformer_options.get('patches', {}).get('regional_conditioning_mask', None)
if mask_obj is not None and weight >= 0:
mask = mask_obj[0](transformer_options, weight.item())
text_len = mask_obj[0].text_len
mask[text_len:,text_len:] = torch.clamp(mask[text_len:,text_len:], min=floor.to(mask.device))
for i, block in enumerate(self.double_blocks):
#img, txt, mask_resized = block(img=img, txt=txt, vec=vec, pe=pe, timestep=timesteps, transformer_options=transformer_options, mask=mask, weight=weight) #, mask=mask)
img, txt = block(img=img, txt=txt, vec=vec, pe=pe, timestep=timesteps, transformer_options=transformer_options, mask=mask, weight=weight) #, mask=mask)
#if mask is not None:
# mask_resized_list.append(mask_resized)
if control is not None: # Controlnet
control_i = control.get("input")
if i < len(control_i):
add = control_i[i]
if add is not None:
img[:1] += add
img = torch.cat((txt, img), 1) #first 256 is txt embed
for i, block in enumerate(self.single_blocks):
img = block(img, vec=vec, pe=pe, timestep=timesteps, transformer_options=transformer_options, mask=mask, weight=weight)
if control is not None: # Controlnet
control_o = control.get("output")
if i < len(control_o):
add = control_o[i]
if add is not None:
img[:1, txt.shape[1] :, ...] += add
img = img[:, txt.shape[1] :, ...]
img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels) 1,8192,3072 -> 1,8192,64
return img
def _get_img_ids(self, x, bs, h_len, w_len, h_start, h_end, w_start, w_end):
img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
img_ids[..., 1] = img_ids[..., 1] + torch.linspace(h_start, h_end - 1, steps=h_len, device=x.device, dtype=x.dtype)[:, None]
img_ids[..., 2] = img_ids[..., 2] + torch.linspace(w_start, w_end - 1, steps=w_len, device=x.device, dtype=x.dtype)[None, :]
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
return img_ids
def forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs):
out_list = []
for i in range(len(transformer_options['cond_or_uncond'])):
UNCOND = transformer_options['cond_or_uncond'][i] == 1
bs, c, h, w = x.shape
transformer_options['original_shape'] = x.shape
patch_size = 2
x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size)) # 1,16,192,192
transformer_options['patch_size'] = patch_size
#if 'regional_conditioning_weight' not in transformer_options: # this breaks the graph
# transformer_options['regional_conditioning_weight'] = timestep[0] / 1.5
h_len = ((h + (patch_size // 2)) // patch_size) # h_len 96
w_len = ((w + (patch_size // 2)) // patch_size) # w_len 96
img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64
if UNCOND:
transformer_options['reg_cond_weight'] = -1
context_tmp = context[i][None,...].clone()
elif UNCOND == False:
transformer_options['reg_cond_weight'] = transformer_options['regional_conditioning_weight']
transformer_options['reg_cond_floor'] = transformer_options['regional_conditioning_floor'] #if "regional_conditioning_floor" in transformer_options else 0.0
regional_conditioning_positive = transformer_options.get('patches', {}).get('regional_conditioning_positive', None)
context_tmp = regional_conditioning_positive[0].concat_cond(context[i][None,...], transformer_options)
txt_ids = torch.zeros((bs, context_tmp.shape[1], 3), device=x.device, dtype=x.dtype) # txt_ids 1, 256,3
img_ids_orig = self._get_img_ids(x, bs, h_len, w_len, 0, h_len, 0, w_len) # img_ids_orig = 1,9216,3
out_tmp = self.forward_blocks(img [i][None,...].clone(),
img_ids_orig[i][None,...].clone(),
context_tmp,
txt_ids [i][None,...].clone(),
timestep [i][None,...].clone(),
y [i][None,...].clone(),
guidance [i][None,...].clone(),
control, transformer_options=transformer_options) # context 1,256,4096 y 1,768
out_list.append(out_tmp)
out = torch.stack(out_list, dim=0).squeeze(dim=1)
return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
================================================
FILE: legacy/flux/redux.py
================================================
import torch
import comfy.ops
ops = comfy.ops.manual_cast
class ReduxImageEncoder(torch.nn.Module):
def __init__(
self,
redux_dim: int = 1152,
txt_in_features: int = 4096,
device=None,
dtype=None,
) -> None:
super().__init__()
self.redux_dim = redux_dim
self.device = device
self.dtype = dtype
self.redux_up = ops.Linear(redux_dim, txt_in_features * 3, dtype=dtype)
self.redux_down = ops.Linear(txt_in_features * 3, txt_in_features, dtype=dtype)
def forward(self, sigclip_embeds) -> torch.Tensor:
projected_x = self.redux_down(torch.nn.functional.silu(self.redux_up(sigclip_embeds)))
return projected_x
================================================
FILE: legacy/helper.py
================================================
import re
import torch
from comfy.samplers import SCHEDULER_NAMES
import torch.nn.functional as F
from ..res4lyf import RESplain
def get_extra_options_kv(key, default, extra_options):
match = re.search(rf"{key}\s*=\s*([a-zA-Z0-9_.+-]+)", extra_options)
if match:
value = match.group(1)
else:
value = default
return value
def get_extra_options_list(key, default, extra_options):
match = re.search(rf"{key}\s*=\s*([a-zA-Z0-9_.,+-]+)", extra_options)
if match:
value = match.group(1)
else:
value = default
return value
def extra_options_flag(flag, extra_options):
return bool(re.search(rf"{flag}", extra_options))
def safe_get_nested(d, keys, default=None):
for key in keys:
if isinstance(d, dict):
d = d.get(key, default)
else:
return default
return d
def is_video_model(model):
is_video_model = False
try :
is_video_model = 'video' in model.inner_model.inner_model.model_config.unet_config['image_model'] or \
'cosmos' in model.inner_model.inner_model.model_config.unet_config['image_model']
except:
pass
return is_video_model
def is_RF_model(model):
from comfy import model_sampling
modelsampling = model.inner_model.inner_model.model_sampling
return isinstance(modelsampling, model_sampling.CONST)
def lagrange_interpolation(x_values, y_values, x_new):
if not isinstance(x_values, torch.Tensor):
x_values = torch.tensor(x_values, dtype=torch.get_default_dtype())
if x_values.ndim != 1:
raise ValueError("x_values must be a 1D tensor or a list of scalars.")
if not isinstance(x_new, torch.Tensor):
x_new = torch.tensor(x_new, dtype=x_values.dtype, device=x_values.device)
if x_new.ndim == 0:
x_new = x_new.unsqueeze(0)
if isinstance(y_values, list):
y_values = torch.stack(y_values, dim=0)
if y_values.ndim < 1:
raise ValueError("y_values must have at least one dimension (the sample dimension).")
n = x_values.shape[0]
if y_values.shape[0] != n:
raise ValueError(f"Mismatch: x_values has length {n} but y_values has {y_values.shape[0]} samples.")
m = x_new.shape[0]
result_shape = (m,) + y_values.shape[1:]
result = torch.zeros(result_shape, dtype=y_values.dtype, device=y_values.device)
for i in range(n):
Li = torch.ones_like(x_new, dtype=y_values.dtype, device=y_values.device)
xi = x_values[i]
for j in range(n):
if i == j:
continue
xj = x_values[j]
Li = Li * ((x_new - xj) / (xi - xj))
extra_dims = (1,) * (y_values.ndim - 1)
Li = Li.view(m, *extra_dims)
result = result + Li * y_values[i]
return result
def get_cosine_similarity_manual(a, b):
return (a * b).sum() / (torch.norm(a) * torch.norm(b))
def get_cosine_similarity(a, b):
if a.dim() == 5 and b.dim() == 5 and b.shape[2] == 1:
b = b.expand(-1, -1, a.shape[2], -1, -1)
return F.cosine_similarity(a.flatten(), b.flatten(), dim=0)
def get_pearson_similarity(a, b):
a = a.mean(dim=(-2,-1))
b = b.mean(dim=(-2,-1))
if a.dim() == 5 and b.dim() == 5 and b.shape[2] == 1:
b = b.expand(-1, -1, a.shape[2], -1, -1)
return F.cosine_similarity(a.flatten(), b.flatten(), dim=0)
def initialize_or_scale(tensor, value, steps):
if tensor is None:
return torch.full((steps,), value)
else:
return value * tensor
def has_nested_attr(obj, attr_path):
attrs = attr_path.split('.')
for attr in attrs:
if not hasattr(obj, attr):
return False
obj = getattr(obj, attr)
return True
def get_res4lyf_scheduler_list():
scheduler_names = SCHEDULER_NAMES.copy()
if "beta57" not in scheduler_names:
scheduler_names.append("beta57")
return scheduler_names
def conditioning_set_values(conditioning, values={}):
c = []
for t in conditioning:
n = [t[0], t[1].copy()]
for k in values:
n[1][k] = values[k]
c.append(n)
return c
def get_collinear_alt(x, y):
y_flat = y.view(y.size(0), -1).clone()
x_flat = x.view(x.size(0), -1).clone()
y_flat /= y_flat.norm(dim=-1, keepdim=True)
x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat
return x_proj_y.view_as(x)
def get_collinear(x, y):
y_flat = y.view(y.size(0), -1).clone()
x_flat = x.view(x.size(0), -1).clone()
y_flat /= y_flat.norm(dim=-1, keepdim=True)
x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat
return x_proj_y.view_as(x)
def get_orthogonal(x, y):
y_flat = y.view(y.size(0), -1).clone()
x_flat = x.view(x.size(0), -1).clone()
y_flat /= y_flat.norm(dim=-1, keepdim=True)
x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat
x_ortho_y = x_flat - x_proj_y
return x_ortho_y.view_as(x)
# pytorch slerp implementation from https://gist.github.com/Birch-san/230ac46f99ec411ed5907b0a3d728efa
from torch import FloatTensor, LongTensor, Tensor, Size, lerp, zeros_like
from torch.linalg import norm
# adapted to PyTorch from:
# https://gist.github.com/dvschultz/3af50c40df002da3b751efab1daddf2c
# most of the extra complexity is to support:
# - many-dimensional vectors
# - v0 or v1 with last dim all zeroes, or v0 ~colinear with v1
# - falls back to lerp()
# - conditional logic implemented with parallelism rather than Python loops
# - many-dimensional tensor for t
# - you can ask for batches of slerp outputs by making t more-dimensional than the vectors
# - slerp(
# v0: torch.Size([2,3]),
# v1: torch.Size([2,3]),
# t: torch.Size([4,1,1]),
# )
# - this makes it interface-compatible with lerp()
def slerp(v0: FloatTensor, v1: FloatTensor, t: float|FloatTensor, DOT_THRESHOLD=0.9995):
'''
Spherical linear interpolation
Args:
v0: Starting vector
v1: Final vector
t: Float value between 0.0 and 1.0
DOT_THRESHOLD: Threshold for considering the two vectors as
colinear. Not recommended to alter this.
Returns:
Interpolation vector between v0 and v1
'''
assert v0.shape == v1.shape, "shapes of v0 and v1 must match"
# Normalize the vectors to get the directions and angles
v0_norm: FloatTensor = norm(v0, dim=-1)
v1_norm: FloatTensor = norm(v1, dim=-1)
v0_normed: FloatTensor = v0 / v0_norm.unsqueeze(-1)
v1_normed: FloatTensor = v1 / v1_norm.unsqueeze(-1)
# Dot product with the normalized vectors
dot: FloatTensor = (v0_normed * v1_normed).sum(-1)
dot_mag: FloatTensor = dot.abs()
# if dp is NaN, it's because the v0 or v1 row was filled with 0s
# If absolute value of dot product is almost 1, vectors are ~colinear, so use lerp
gotta_lerp: LongTensor = dot_mag.isnan() | (dot_mag > DOT_THRESHOLD)
can_slerp: LongTensor = ~gotta_lerp
t_batch_dim_count: int = max(0, t.dim()-v0.dim()) if isinstance(t, Tensor) else 0
t_batch_dims: Size = t.shape[:t_batch_dim_count] if isinstance(t, Tensor) else Size([])
out: FloatTensor = zeros_like(v0.expand(*t_batch_dims, *[-1]*v0.dim()))
# if no elements are lerpable, our vectors become 0-dimensional, preventing broadcasting
if gotta_lerp.any():
lerped: FloatTensor = lerp(v0, v1, t)
out: FloatTensor = lerped.where(gotta_lerp.unsqueeze(-1), out)
# if no elements are slerpable, our vectors become 0-dimensional, preventing broadcasting
if can_slerp.any():
# Calculate initial angle between v0 and v1
theta_0: FloatTensor = dot.arccos().unsqueeze(-1)
sin_theta_0: FloatTensor = theta_0.sin()
# Angle at timestep t
theta_t: FloatTensor = theta_0 * t
sin_theta_t: FloatTensor = theta_t.sin()
# Finish the slerp algorithm
s0: FloatTensor = (theta_0 - theta_t).sin() / sin_theta_0
s1: FloatTensor = sin_theta_t / sin_theta_0
slerped: FloatTensor = s0 * v0 + s1 * v1
out: FloatTensor = slerped.where(can_slerp.unsqueeze(-1), out)
return out
class OptionsManager:
APPEND_OPTIONS = {"extra_options"}
def __init__(self, options_inputs=None):
self.options_list = options_inputs or []
self._merged_dict = None
def add_option(self, option):
"""Add a single options dictionary"""
if option is not None:
self.options_list.append(option)
self._merged_dict = None # invalidate cached merged options
@property
def merged(self):
"""Get merged options with proper priority handling"""
if self._merged_dict is None:
self._merged_dict = {}
special_string_options = {
key: [] for key in self.APPEND_OPTIONS
}
for options_dict in self.options_list:
if options_dict is not None:
for key, value in options_dict.items():
if key in self.APPEND_OPTIONS and value:
special_string_options[key].append(value)
elif isinstance(value, dict):
# Deep merge dictionaries
if key not in self._merged_dict:
self._merged_dict[key] = {}
if isinstance(self._merged_dict[key], dict):
self._deep_update(self._merged_dict[key], value)
else:
self._merged_dict[key] = value.copy()
else:
self._merged_dict[key] = value
# append special case string options (e.g. extra_options)
for key, value in special_string_options.items():
if value:
self._merged_dict[key] = "\n".join(value)
return self._merged_dict
def get(self, key, default=None):
return self.merged.get(key, default)
def _deep_update(self, target_dict, source_dict):
for key, value in source_dict.items():
if isinstance(value, dict) and key in target_dict and isinstance(target_dict[key], dict):
# recursive dict update
self._deep_update(target_dict[key], value)
else:
target_dict[key] = value
def __getitem__(self, key):
"""Allow dictionary-like access to options"""
return self.merged[key]
def __contains__(self, key):
"""Allow 'in' operator for options"""
return key in self.merged
def as_dict(self):
"""Return the merged options as a dictionary"""
return self.merged.copy()
def __bool__(self):
"""Return True if there are any options"""
return len(self.options_list) > 0 and any(opt is not None for opt in self.options_list)
def debug_print_options(self):
for i, options_dict in enumerate(self.options_list):
RESplain(f"Options {i}:", debug=True)
if options_dict is not None:
for key, value in options_dict.items():
RESplain(f" {key}: {value}", debug=True)
else:
RESplain(" None", "\n", debug=True)
================================================
FILE: legacy/latents.py
================================================
import torch
import torch.nn.functional as F
import math
import itertools
import comfy.samplers
import comfy.sample
import comfy.sampler_helpers
import comfy.utils
from .noise_classes import NOISE_GENERATOR_NAMES, NOISE_GENERATOR_CLASSES, precision_tool, prepare_noise
def initialize_or_scale(tensor, value, steps):
if tensor is None:
return torch.full((steps,), value)
else:
return value * tensor
def latent_normalize_channels(x):
mean = x.mean(dim=(2, 3), keepdim=True)
std = x.std (dim=(2, 3), keepdim=True)
return (x - mean) / std
def latent_stdize_channels(x):
std = x.std (dim=(2, 3), keepdim=True)
return x / std
def latent_meancenter_channels(x):
mean = x.mean(dim=(2, 3), keepdim=True)
return x - mean
def initialize_or_scale(tensor, value, steps):
if tensor is None:
return torch.full((steps,), value)
else:
return value * tensor
def normalize_latent(target, source=None, mean=True, std=True, set_mean=None, set_std=None, channelwise=True):
target = target.clone()
source = source.clone() if source is not None else None
def normalize_single_latent(single_target, single_source=None):
y = torch.zeros_like(single_target)
for b in range(y.shape[0]):
if channelwise:
for c in range(y.shape[1]):
single_source_mean = single_source[b][c].mean() if set_mean is None else set_mean
single_source_std = single_source[b][c].std() if set_std is None else set_std
if mean and std:
y[b][c] = (single_target[b][c] - single_target[b][c].mean()) / single_target[b][c].std()
if single_source is not None:
y[b][c] = y[b][c] * single_source_std + single_source_mean
elif mean:
y[b][c] = single_target[b][c] - single_target[b][c].mean()
if single_source is not None:
y[b][c] = y[b][c] + single_source_mean
elif std:
y[b][c] = single_target[b][c] / single_target[b][c].std()
if single_source is not None:
y[b][c] = y[b][c] * single_source_std
else:
single_source_mean = single_source[b].mean() if set_mean is None else set_mean
single_source_std = single_source[b].std() if set_std is None else set_std
if mean and std:
y[b] = (single_target[b] - single_target[b].mean()) / single_target[b].std()
if single_source is not None:
y[b] = y[b] * single_source_std + single_source_mean
elif mean:
y[b] = single_target[b] - single_target[b].mean()
if single_source is not None:
y[b] = y[b] + single_source_mean
elif std:
y[b] = single_target[b] / single_target[b].std()
if single_source is not None:
y[b] = y[b] * single_source_std
return y
if isinstance(target, (list, tuple)):
if source is not None:
assert isinstance(source, (list, tuple)) and len(source) == len(target), \
"If target is a list/tuple, source must be a list/tuple of the same length."
return [normalize_single_latent(t, s) for t, s in zip(target, source)]
else:
return [normalize_single_latent(t) for t in target]
else:
return normalize_single_latent(target, source)
class AdvancedNoise:
@classmethod
def INPUT_TYPES(s):
return {
"required":{
"alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}),
"k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}),
"noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
"noise_type": (NOISE_GENERATOR_NAMES, ),
},
}
RETURN_TYPES = ("NOISE",)
FUNCTION = "get_noise"
CATEGORY = "RES4LYF/noise"
def get_noise(self, noise_seed, noise_type, alpha, k):
return (Noise_RandomNoise(noise_seed, noise_type, alpha, k),)
class Noise_RandomNoise:
def __init__(self, seed, noise_type, alpha, k):
self.seed = seed
self.noise_type = noise_type
self.alpha = alpha
self.k = k
def generate_noise(self, input_latent):
latent_image = input_latent["samples"]
batch_inds = input_latent["batch_index"] if "batch_index" in input_latent else None
return prepare_noise(latent_image, self.seed, self.noise_type, batch_inds, self.alpha, self.k)
class LatentNoised:
@classmethod
def INPUT_TYPES(s):
return {"required":
{
"add_noise": ("BOOLEAN", {"default": True}),
"noise_is_latent": ("BOOLEAN", {"default": False}),
"noise_type": (NOISE_GENERATOR_NAMES, ),
"alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}),
"k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}),
"noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
"latent_image": ("LATENT", ),
"noise_strength": ("FLOAT", {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.01, "round": 0.01}),
"normalize": (["false", "true"], {"default": "false"}),
},
"optional":
{
"latent_noise": ("LATENT", ),
"mask": ("MASK", ),
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent_noised",)
FUNCTION = "main"
CATEGORY = "RES4LYF/noise"
def main(self, add_noise, noise_is_latent, noise_type, noise_seed, alpha, k, latent_image, noise_strength, normalize, latent_noise=None, mask=None):
latent_out = latent_image.copy()
samples = latent_out["samples"].clone()
torch.manual_seed(noise_seed)
if not add_noise:
noise = torch.zeros(samples.size(), dtype=samples.dtype, layout=samples.layout, device="cpu")
elif latent_noise is None:
batch_inds = latent_out["batch_index"] if "batch_index" in latent_out else None
noise = prepare_noise(samples, noise_seed, noise_type, batch_inds, alpha, k)
else:
noise = latent_noise["samples"]
if normalize == "true":
latent_mean = samples.mean()
latent_std = samples.std()
noise = noise * latent_std + latent_mean
if noise_is_latent:
noise += samples.cpu()
noise.sub_(noise.mean()).div_(noise.std())
noise = noise * noise_strength
if mask is not None:
mask = F.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])),
size=(samples.shape[2], samples.shape[3]),
mode="bilinear")
mask = mask.expand((-1, samples.shape[1], -1, -1)).to(samples.device)
if mask.shape[0] < samples.shape[0]:
mask = mask.repeat((samples.shape[0] - 1) // mask.shape[0] + 1, 1, 1, 1)[:samples.shape[0]]
elif mask.shape[0] > samples.shape[0]:
mask = mask[:samples.shape[0]]
noise = mask * noise + (1 - mask) * torch.zeros_like(noise)
latent_out["samples"] = samples.cpu() + noise
return (latent_out,)
class MaskToggle:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"enable": ("BOOLEAN", {"default": True}),
"mask": ("MASK", ),
},
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("mask",)
CATEGORY = "RES4LYF/masks"
FUNCTION = "main"
def main(self, enable=True, mask=None):
if enable == False:
mask = None
return (mask, )
class set_precision:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent_image": ("LATENT", ),
"precision": (["16", "32", "64"], ),
"set_default": ("BOOLEAN", {"default": False})
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("passthrough",)
CATEGORY = "RES4LYF/precision"
FUNCTION = "main"
def main(self, precision="32", latent_image=None, set_default=False):
match precision:
case "16":
if set_default is True:
torch.set_default_dtype(torch.float16)
x = latent_image["samples"].to(torch.float16)
case "32":
if set_default is True:
torch.set_default_dtype(torch.float32)
x = latent_image["samples"].to(torch.float32)
case "64":
if set_default is True:
torch.set_default_dtype(torch.float64)
x = latent_image["samples"].to(torch.float64)
return ({"samples": x}, )
class set_precision_universal:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"precision": (["bf16", "fp16", "fp32", "fp64", "passthrough"], {"default": "fp32"}),
"set_default": ("BOOLEAN", {"default": False})
},
"optional": {
"cond_pos": ("CONDITIONING",),
"cond_neg": ("CONDITIONING",),
"sigmas": ("SIGMAS", ),
"latent_image": ("LATENT", ),
},
}
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "SIGMAS", "LATENT",)
RETURN_NAMES = ("cond_pos","cond_neg","sigmas","latent_image",)
CATEGORY = "RES4LYF/precision"
FUNCTION = "main"
def main(self, precision="fp32", cond_pos=None, cond_neg=None, sigmas=None, latent_image=None, set_default=False):
dtype = None
match precision:
case "bf16":
dtype = torch.bfloat16
case "fp16":
dtype = torch.float16
case "fp32":
dtype = torch.float32
case "fp64":
dtype = torch.float64
case "passthrough":
return (cond_pos, cond_neg, sigmas, latent_image, )
if cond_pos is not None:
cond_pos[0][0] = cond_pos[0][0].clone().to(dtype)
cond_pos[0][1]["pooled_output"] = cond_pos[0][1]["pooled_output"].clone().to(dtype)
if cond_neg is not None:
cond_neg[0][0] = cond_neg[0][0].clone().to(dtype)
cond_neg[0][1]["pooled_output"] = cond_neg[0][1]["pooled_output"].clone().to(dtype)
if sigmas is not None:
sigmas = sigmas.clone().to(dtype)
if latent_image is not None:
x = latent_image["samples"].clone().to(dtype)
latent_image = {"samples": x}
if set_default is True:
torch.set_default_dtype(dtype)
return (cond_pos, cond_neg, sigmas, latent_image, )
class set_precision_advanced:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent_image": ("LATENT", ),
"global_precision": (["64", "32", "16"], ),
"shark_precision": (["64", "32", "16"], ),
},
}
RETURN_TYPES = ("LATENT","LATENT","LATENT","LATENT","LATENT",)
RETURN_NAMES = ("PASSTHROUGH","LATENT_CAST_TO_GLOBAL","LATENT_16","LATENT_32","LATENT_64",)
CATEGORY = "RES4LYF/precision"
FUNCTION = "main"
def main(self, global_precision="32", shark_precision="64", latent_image=None):
dtype_map = {
"16": torch.float16,
"32": torch.float32,
"64": torch.float64
}
precision_map = {
"16": 'fp16',
"32": 'fp32',
"64": 'fp64'
}
torch.set_default_dtype(dtype_map[global_precision])
precision_tool.set_cast_type(precision_map[shark_precision])
latent_passthrough = latent_image["samples"]
latent_out16 = latent_image["samples"].to(torch.float16)
latent_out32 = latent_image["samples"].to(torch.float32)
latent_out64 = latent_image["samples"].to(torch.float64)
target_dtype = dtype_map[global_precision]
if latent_image["samples"].dtype != target_dtype:
latent_image["samples"] = latent_image["samples"].to(target_dtype)
latent_cast_to_global = latent_image["samples"]
return ({"samples": latent_passthrough}, {"samples": latent_cast_to_global}, {"samples": latent_out16}, {"samples": latent_out32}, {"samples": latent_out64})
class latent_to_cuda:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent": ("LATENT", ),
"to_cuda": ("BOOLEAN", {"default": True}),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("passthrough",)
CATEGORY = "RES4LYF/latents"
FUNCTION = "main"
def main(self, latent, to_cuda):
match to_cuda:
case "True":
latent = latent.to('cuda')
case "False":
latent = latent.to('cpu')
return (latent,)
class latent_batch:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent": ("LATENT", ),
"batch_size": ("INT", {"default": 0, "min": -10000, "max": 10000}),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent_batch",)
CATEGORY = "RES4LYF/latents"
FUNCTION = "main"
def main(self, latent, batch_size):
latent = latent["samples"]
b, c, h, w = latent.shape
batch_latents = torch.zeros([batch_size, 4, h, w], device=latent.device)
for i in range(batch_size):
batch_latents[i] = latent
return ({"samples": batch_latents}, )
class LatentPhaseMagnitude:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent_0_batch": ("LATENT",),
"latent_1_batch": ("LATENT",),
"phase_mix_power": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_mix_power": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"latent_0_normal": ("BOOLEAN", {"default": True}),
"latent_1_normal": ("BOOLEAN", {"default": True}),
"latent_out_normal": ("BOOLEAN", {"default": True}),
"latent_0_stdize": ("BOOLEAN", {"default": True}),
"latent_1_stdize": ("BOOLEAN", {"default": True}),
"latent_out_stdize": ("BOOLEAN", {"default": True}),
"latent_0_meancenter": ("BOOLEAN", {"default": True}),
"latent_1_meancenter": ("BOOLEAN", {"default": True}),
"latent_out_meancenter": ("BOOLEAN", {"default": True}),
},
"optional": {
"phase_mix_powers": ("SIGMAS", ),
"magnitude_mix_powers": ("SIGMAS", ),
"phase_luminositys": ("SIGMAS", ),
"phase_cyan_reds": ("SIGMAS", ),
"phase_lime_purples": ("SIGMAS", ),
"phase_pattern_structures": ("SIGMAS", ),
"magnitude_luminositys": ("SIGMAS", ),
"magnitude_cyan_reds": ("SIGMAS", ),
"magnitude_lime_purples": ("SIGMAS", ),
"magnitude_pattern_structures": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_repeat(latent, batch_size):
b, c, h, w = latent.shape
batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device)
for i in range(batch_size):
batch_latents[i] = latent
return batch_latents
@staticmethod
def mix_latent_phase_magnitude(latent_0, latent_1, power_phase, power_magnitude,
phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure,
magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure
):
dtype = torch.promote_types(latent_0.dtype, latent_1.dtype)
# big accuracy problems with fp32 FFT! let's avoid that
latent_0 = latent_0.double()
latent_1 = latent_1.double()
latent_0_fft = torch.fft.fft2(latent_0)
latent_1_fft = torch.fft.fft2(latent_1)
latent_0_phase = torch.angle(latent_0_fft)
latent_1_phase = torch.angle(latent_1_fft)
latent_0_magnitude = torch.abs(latent_0_fft)
latent_1_magnitude = torch.abs(latent_1_fft)
# DC corruption...? handle separately??
#dc_index = (0, 0)
#dc_0 = latent_0_fft[:, :, dc_index[0], dc_index[1]]
#dc_1 = latent_1_fft[:, :, dc_index[0], dc_index[1]]
#mixed_dc = dc_0 * 0.5 + dc_1 * 0.5
#mixed_dc = dc_0 * (1 - phase_weight) + dc_1 * phase_weight
# create complex FFT using a weighted mix of phases
chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]]
chan_weights_magnitude = [w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]]
mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
for i in range(4):
mixed_phase[:, i] = ( (latent_0_phase[:,i] * (1-chan_weights_phase[i])) ** power_phase + (latent_1_phase[:,i] * chan_weights_phase[i]) ** power_phase) ** (1/power_phase)
mixed_magnitude[:, i] = ( (latent_0_magnitude[:,i] * (1-chan_weights_magnitude[i])) ** power_magnitude + (latent_1_magnitude[:,i] * chan_weights_magnitude[i]) ** power_magnitude) ** (1/power_magnitude)
new_fft = mixed_magnitude * torch.exp(1j * mixed_phase)
#new_fft[:, :, dc_index[0], dc_index[1]] = mixed_dc
# inverse FFT to convert back to spatial domain
mixed_phase_magnitude = torch.fft.ifft2(new_fft).real
return mixed_phase_magnitude.to(dtype)
def main(self, #batch_size, latent_1_repeat,
latent_0_batch, latent_1_batch, latent_0_normal, latent_1_normal, latent_out_normal,
latent_0_stdize, latent_1_stdize, latent_out_stdize,
latent_0_meancenter, latent_1_meancenter, latent_out_meancenter,
phase_mix_power, magnitude_mix_power,
phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure,
magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure,
phase_mix_powers=None, magnitude_mix_powers=None,
phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None,
magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None
):
latent_0_batch = latent_0_batch["samples"].double()
latent_1_batch = latent_1_batch["samples"].double().to(latent_0_batch.device)
#if batch_size == 0:
batch_size = latent_0_batch.shape[0]
if latent_1_batch.shape[0] == 1:
latent_1_batch = self.latent_repeat(latent_1_batch, batch_size)
magnitude_mix_powers = initialize_or_scale(magnitude_mix_powers, magnitude_mix_power, batch_size)
phase_mix_powers = initialize_or_scale(phase_mix_powers, phase_mix_power, batch_size)
phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size)
phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size)
phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size)
phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size)
magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size)
magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size)
magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size)
magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size)
mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device)
if latent_0_normal == True:
latent_0_batch = latent_normalize_channels(latent_0_batch)
if latent_1_normal == True:
latent_1_batch = latent_normalize_channels(latent_1_batch)
if latent_0_meancenter == True:
latent_0_batch = latent_meancenter_channels(latent_0_batch)
if latent_1_meancenter == True:
latent_1_batch = latent_meancenter_channels(latent_1_batch)
if latent_0_stdize == True:
latent_0_batch = latent_stdize_channels(latent_0_batch)
if latent_1_stdize == True:
latent_1_batch = latent_stdize_channels(latent_1_batch)
for i in range(batch_size):
mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1], latent_1_batch[i:i+1], phase_mix_powers[i].item(), magnitude_mix_powers[i].item(),
phase_luminositys[i].item(), phase_cyan_reds[i].item(),phase_lime_purples[i].item(),phase_pattern_structures[i].item(),
magnitude_luminositys[i].item(), magnitude_cyan_reds[i].item(),magnitude_lime_purples[i].item(),magnitude_pattern_structures[i].item()
)
if latent_out_normal == True:
mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude)
if latent_out_stdize == True:
mixed_phase_magnitude = latent_stdize_channels(mixed_phase_magnitude)
if latent_out_meancenter == True:
mixed_phase_magnitude = latent_meancenter_channels(mixed_phase_magnitude)
mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude
return ({"samples": mixed_phase_magnitude_batch}, )
class LatentPhaseMagnitudeMultiply:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent_0_batch": ("LATENT",),
"phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"latent_0_normal": ("BOOLEAN", {"default": False}),
"latent_out_normal": ("BOOLEAN", {"default": False}),
},
"optional": {
"phase_luminositys": ("SIGMAS", ),
"phase_cyan_reds": ("SIGMAS", ),
"phase_lime_purples": ("SIGMAS", ),
"phase_pattern_structures": ("SIGMAS", ),
"magnitude_luminositys": ("SIGMAS", ),
"magnitude_cyan_reds": ("SIGMAS", ),
"magnitude_lime_purples": ("SIGMAS", ),
"magnitude_pattern_structures": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_repeat(latent, batch_size):
b, c, h, w = latent.shape
batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device)
for i in range(batch_size):
batch_latents[i] = latent
return batch_latents
@staticmethod
def mix_latent_phase_magnitude(latent_0,
phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure,
magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure
):
dtype = latent_0.dtype
# avoid big accuracy problems with fp32 FFT!
latent_0 = latent_0.double()
latent_0_fft = torch.fft.fft2(latent_0)
latent_0_phase = torch.angle(latent_0_fft)
latent_0_magnitude = torch.abs(latent_0_fft)
# create new complex FFT using weighted mix of phases
chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]]
chan_weights_magnitude = [ w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]]
mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
for i in range(4):
mixed_phase[:, i] = latent_0_phase[:,i] * chan_weights_phase[i]
mixed_magnitude[:, i] = latent_0_magnitude[:,i] * chan_weights_magnitude[i]
new_fft = mixed_magnitude * torch.exp(1j * mixed_phase)
# inverse FFT to convert back to spatial domain
mixed_phase_magnitude = torch.fft.ifft2(new_fft).real
return mixed_phase_magnitude.to(dtype)
def main(self,
latent_0_batch, latent_0_normal, latent_out_normal,
phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure,
magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure,
phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None,
magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None
):
latent_0_batch = latent_0_batch["samples"].double()
batch_size = latent_0_batch.shape[0]
phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size)
phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size)
phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size)
phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size)
magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size)
magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size)
magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size)
magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size)
mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device)
if latent_0_normal == True:
latent_0_batch = latent_normalize_channels(latent_0_batch)
for i in range(batch_size):
mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1],
phase_luminositys[i].item(), phase_cyan_reds[i].item(),phase_lime_purples[i].item(),phase_pattern_structures[i].item(),
magnitude_luminositys[i].item(), magnitude_cyan_reds[i].item(),magnitude_lime_purples[i].item(),magnitude_pattern_structures[i].item()
)
if latent_out_normal == True:
mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude)
mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude
return ({"samples": mixed_phase_magnitude_batch}, )
class LatentPhaseMagnitudeOffset:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent_0_batch": ("LATENT",),
"phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"latent_0_normal": ("BOOLEAN", {"default": False}),
"latent_out_normal": ("BOOLEAN", {"default": False}),
},
"optional": {
"phase_luminositys": ("SIGMAS", ),
"phase_cyan_reds": ("SIGMAS", ),
"phase_lime_purples": ("SIGMAS", ),
"phase_pattern_structures": ("SIGMAS", ),
"magnitude_luminositys": ("SIGMAS", ),
"magnitude_cyan_reds": ("SIGMAS", ),
"magnitude_lime_purples": ("SIGMAS", ),
"magnitude_pattern_structures": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_repeat(latent, batch_size):
b, c, h, w = latent.shape
batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device)
for i in range(batch_size):
batch_latents[i] = latent
return batch_latents
@staticmethod
def mix_latent_phase_magnitude(latent_0,
phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure,
magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure
):
dtype = latent_0.dtype
# avoid big accuracy problems with fp32 FFT!
latent_0 = latent_0.double()
latent_0_fft = torch.fft.fft2(latent_0)
latent_0_phase = torch.angle(latent_0_fft)
latent_0_magnitude = torch.abs(latent_0_fft)
# create new complex FFT using a weighted mix of phases
chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]]
chan_weights_magnitude = [ w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]]
mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
for i in range(4):
mixed_phase[:, i] = latent_0_phase[:,i] + chan_weights_phase[i]
mixed_magnitude[:, i] = latent_0_magnitude[:,i] + chan_weights_magnitude[i]
new_fft = mixed_magnitude * torch.exp(1j * mixed_phase)
# inverse FFT to convert back to spatial domain
mixed_phase_magnitude = torch.fft.ifft2(new_fft).real
return mixed_phase_magnitude.to(dtype)
def main(self,
latent_0_batch, latent_0_normal, latent_out_normal,
phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure,
magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure,
phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None,
magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None
):
latent_0_batch = latent_0_batch["samples"].double()
batch_size = latent_0_batch.shape[0]
phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size)
phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size)
phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size)
phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size)
magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size)
magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size)
magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size)
magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size)
mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device)
if latent_0_normal == True:
latent_0_batch = latent_normalize_channels(latent_0_batch)
for i in range(batch_size):
mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1],
phase_luminositys[i].item(), phase_cyan_reds[i].item(),phase_lime_purples[i].item(),phase_pattern_structures[i].item(),
magnitude_luminositys[i].item(), magnitude_cyan_reds[i].item(),magnitude_lime_purples[i].item(),magnitude_pattern_structures[i].item()
)
if latent_out_normal == True:
mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude)
mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude
return ({"samples": mixed_phase_magnitude_batch}, )
class LatentPhaseMagnitudePower:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent_0_batch": ("LATENT",),
"phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"latent_0_normal": ("BOOLEAN", {"default": False}),
"latent_out_normal": ("BOOLEAN", {"default": False}),
},
"optional": {
"phase_luminositys": ("SIGMAS", ),
"phase_cyan_reds": ("SIGMAS", ),
"phase_lime_purples": ("SIGMAS", ),
"phase_pattern_structures": ("SIGMAS", ),
"magnitude_luminositys": ("SIGMAS", ),
"magnitude_cyan_reds": ("SIGMAS", ),
"magnitude_lime_purples": ("SIGMAS", ),
"magnitude_pattern_structures": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_repeat(latent, batch_size):
b, c, h, w = latent.shape
batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device)
for i in range(batch_size):
batch_latents[i] = latent
return batch_latents
@staticmethod
def mix_latent_phase_magnitude(latent_0,
phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure,
magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure
):
dtype = latent_0.dtype
# avoid big accuracy problems with fp32 FFT!
latent_0 = latent_0.double()
latent_0_fft = torch.fft.fft2(latent_0)
latent_0_phase = torch.angle(latent_0_fft)
latent_0_magnitude = torch.abs(latent_0_fft)
# create new complex FFT using a weighted mix of phases
chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]]
chan_weights_magnitude = [ w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]]
mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
for i in range(4):
mixed_phase[:, i] = latent_0_phase[:,i] ** chan_weights_phase[i]
mixed_magnitude[:, i] = latent_0_magnitude[:,i] ** chan_weights_magnitude[i]
new_fft = mixed_magnitude * torch.exp(1j * mixed_phase)
# inverse FFT to convert back to spatial domain
mixed_phase_magnitude = torch.fft.ifft2(new_fft).real
return mixed_phase_magnitude.to(dtype)
def main(self,
latent_0_batch, latent_0_normal, latent_out_normal,
phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure,
magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure,
phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None,
magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None
):
latent_0_batch = latent_0_batch["samples"].double()
batch_size = latent_0_batch.shape[0]
phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size)
phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size)
phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size)
phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size)
magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size)
magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size)
magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size)
magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size)
mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device)
if latent_0_normal == True:
latent_0_batch = latent_normalize_channels(latent_0_batch)
for i in range(batch_size):
mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1],
phase_luminositys[i].item(), phase_cyan_reds[i].item(),phase_lime_purples[i].item(),phase_pattern_structures[i].item(),
magnitude_luminositys[i].item(), magnitude_cyan_reds[i].item(),magnitude_lime_purples[i].item(),magnitude_pattern_structures[i].item()
)
if latent_out_normal == True:
mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude)
mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude
return ({"samples": mixed_phase_magnitude_batch}, )
class StableCascade_StageC_VAEEncode_Exact:
def __init__(self, device="cpu"):
self.device = device
@classmethod
def INPUT_TYPES(s):
return {"required": {
"image": ("IMAGE",),
"vae": ("VAE", ),
"width": ("INT", {"default": 24, "min": 1, "max": 1024, "step": 1}),
"height": ("INT", {"default": 24, "min": 1, "max": 1024, "step": 1}),
}}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("stage_c",)
FUNCTION = "generate"
CATEGORY = "RES4LYF/vae"
def generate(self, image, vae, width, height):
out_width = (width) * vae.downscale_ratio #downscale_ratio = 32
out_height = (height) * vae.downscale_ratio
#movedim(-1,1) goes from 1,1024,1024,3 to 1,3,1024,1024
s = comfy.utils.common_upscale(image.movedim(-1,1), out_width, out_height, "lanczos", "center").movedim(1,-1)
c_latent = vae.encode(s[:,:,:,:3]) #to slice off alpha channel?
return ({
"samples": c_latent,
},)
class StableCascade_StageC_VAEEncode_Exact_Tiled:
def __init__(self, device="cpu"):
self.device = device
@classmethod
def INPUT_TYPES(s):
return {"required": {
"image": ("IMAGE",),
"vae": ("VAE", ),
"tile_size": ("INT", {"default": 512, "min": 320, "max": 4096, "step": 64}),
"overlap": ("INT", {"default": 16, "min": 8, "max": 128, "step": 8}),
}}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("stage_c",)
FUNCTION = "generate"
CATEGORY = "RES4LYF/vae"
def generate(self, image, vae, tile_size, overlap):
img_width = image.shape[-2]
img_height = image.shape[-3]
upscale_amount = vae.downscale_ratio # downscale_ratio = 32
image = image.movedim(-1, 1) # bhwc -> bchw
encode_fn = lambda img: vae.encode(img.to(vae.device)).to("cpu")
c_latent = tiled_scale_multidim(
image, encode_fn,
tile=(tile_size // 8, tile_size // 8),
overlap=overlap,
upscale_amount=upscale_amount,
out_channels=16,
output_device=self.device
)
return ({
"samples": c_latent,
},)
@torch.inference_mode()
def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_amount=4, out_channels=3, output_device="cpu", pbar=None):
dims = len(tile)
output_shape = [samples.shape[0], out_channels] + list(map(lambda a: round(a * upscale_amount), samples.shape[2:]))
output = torch.zeros(output_shape, device=output_device)
for b in range(samples.shape[0]):
for it in itertools.product(*map(lambda a: range(0, a[0], a[1] - overlap), zip(samples.shape[2:], tile))):
s_in = samples[b:b+1]
upscaled = []
for d in range(dims):
pos = max(0, min(s_in.shape[d + 2] - overlap, it[d]))
l = min(tile[d], s_in.shape[d + 2] - pos)
s_in = s_in.narrow(d + 2, pos, l)
upscaled.append(round(pos * upscale_amount))
ps = function(s_in).to(output_device)
mask = torch.ones_like(ps)
feather = round(overlap * upscale_amount)
for t in range(feather):
for d in range(2, dims + 2):
mask.narrow(d, t, 1).mul_((1.0 / feather) * (t + 1))
mask.narrow(d, mask.shape[d] - 1 - t, 1).mul_((1.0 / feather) * (t + 1))
o = output[b:b+1]
for d in range(dims):
o = o.narrow(d + 2, upscaled[d], mask.shape[d + 2])
o.add_(ps * mask)
if pbar is not None:
pbar.update(1)
return output
class EmptyLatentImageCustom:
def __init__(self):
self.device = comfy.model_management.intermediate_device()
@classmethod
def INPUT_TYPES(s):
return {"required": {
"width": ("INT", {"default": 24, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
"height": ("INT", {"default": 24, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
"channels": (['4', '16'], {"default": '4'}),
"mode": (['sdxl', 'cascade_b', 'cascade_c', 'exact'], {"default": 'default'}),
"compression": ("INT", {"default": 42, "min": 4, "max": 128, "step": 1}),
"precision": (['fp16', 'fp32', 'fp64'], {"default": 'fp32'}),
}}
RETURN_TYPES = ("LATENT",)
FUNCTION = "generate"
CATEGORY = "RES4LYF/latents"
def generate(self, width, height, batch_size, channels, mode, compression, precision):
c = int(channels)
ratio = 1
match mode:
case "sdxl":
ratio = 8
case "cascade_b":
ratio = 4
case "cascade_c":
ratio = compression
case "exact":
ratio = 1
dtype=torch.float32
match precision:
case "fp16":
dtype=torch.float16
case "fp32":
dtype=torch.float32
case "fp64":
dtype=torch.float64
latent = torch.zeros([batch_size, c, height // ratio, width // ratio], dtype=dtype, device=self.device)
return ({"samples":latent}, )
class EmptyLatentImage64:
def __init__(self):
self.device = comfy.model_management.intermediate_device()
@classmethod
def INPUT_TYPES(s):
return {"required": { "width": ("INT", {"default": 1024, "min": 16, "max": MAX_RESOLUTION, "step": 8}),
"height": ("INT", {"default": 1024, "min": 16, "max": MAX_RESOLUTION, "step": 8}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
RETURN_TYPES = ("LATENT",)
FUNCTION = "generate"
CATEGORY = "RES4LYF/latents"
def generate(self, width, height, batch_size=1):
latent = torch.zeros([batch_size, 4, height // 8, width // 8], dtype=torch.float64, device=self.device)
return ({"samples":latent}, )
"""class CheckpointLoader32:
@classmethod
def INPUT_TYPES(s):
return {"required": { "config_name": (folder_paths.get_filename_list("configs"), ),
"ckpt_name": (folder_paths.get_filename_list("checkpoints"), )}}
RETURN_TYPES = ("MODEL", "CLIP", "VAE")
FUNCTION = "load_checkpoint"
CATEGORY = "advanced/loaders"
def load_checkpoint(self, config_name, ckpt_name, output_vae=True, output_clip=True):
#torch.set_default_dtype(torch.float64)
config_path = folder_paths.get_full_path("configs", config_name)
ckpt_path = folder_paths.get_full_path("checkpoints", ckpt_name)
return comfy.sd.load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, embedding_directory=folder_paths.get_folder_paths("embeddings"))"""
MAX_RESOLUTION=8192
class LatentNoiseBatch_perlin:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {"required": {
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
"width": ("INT", {"default": 1024, "min": 8, "max": MAX_RESOLUTION, "step": 8}),
"height": ("INT", {"default": 1024, "min": 8, "max": MAX_RESOLUTION, "step": 8}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 256}),
"detail_level": ("FLOAT", {"default": 0, "min": -1, "max": 1.0, "step": 0.1}),
},
"optional": {
"details": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "create_noisy_latents_perlin"
CATEGORY = "RES4LYF/noise"
# found at https://gist.github.com/vadimkantorov/ac1b097753f217c5c11bc2ff396e0a57
# which was ported from https://github.com/pvigier/perlin-numpy/blob/master/perlin2d.py
def rand_perlin_2d(self, shape, res, fade = lambda t: 6*t**5 - 15*t**4 + 10*t**3):
delta = (res[0] / shape[0], res[1] / shape[1])
d = (shape[0] // res[0], shape[1] // res[1])
grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1])), dim = -1) % 1
angles = 2*math.pi*torch.rand(res[0]+1, res[1]+1)
gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim = -1)
tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1)
dot = lambda grad, shift: (torch.stack((grid[:shape[0],:shape[1],0] + shift[0], grid[:shape[0],:shape[1], 1] + shift[1] ), dim = -1) * grad[:shape[0], :shape[1]]).sum(dim = -1)
n00 = dot(tile_grads([0, -1], [0, -1]), [0, 0])
n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
n01 = dot(tile_grads([0, -1],[1, None]), [0, -1])
n11 = dot(tile_grads([1, None], [1, None]), [-1,-1])
t = fade(grid[:shape[0], :shape[1]])
return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1])
def rand_perlin_2d_octaves(self, shape, res, octaves=1, persistence=0.5):
noise = torch.zeros(shape)
frequency = 1
amplitude = 1
for _ in range(octaves):
noise += amplitude * self.rand_perlin_2d(shape, (frequency*res[0], frequency*res[1]))
frequency *= 2
amplitude *= persistence
noise = torch.remainder(torch.abs(noise)*1000000,11)/11
# noise = (torch.sin(torch.remainder(noise*1000000,83))+1)/2
return noise
def scale_tensor(self, x):
min_value = x.min()
max_value = x.max()
x = (x - min_value) / (max_value - min_value)
return x
def create_noisy_latents_perlin(self, seed, width, height, batch_size, detail_level, details=None):
if details is None:
details = torch.full((10000,), detail_level)
else:
details = detail_level * details
torch.manual_seed(seed)
noise = torch.zeros((batch_size, 4, height // 8, width // 8), dtype=torch.float32, device="cpu").cpu()
for i in range(batch_size):
for j in range(4):
noise_values = self.rand_perlin_2d_octaves((height // 8, width // 8), (1,1), 1, 1)
result = (1+details[i]/10)*torch.erfinv(2 * noise_values - 1) * (2 ** 0.5)
result = torch.clamp(result,-5,5)
noise[i, j, :, :] = result
return ({"samples": noise},)
class LatentNoiseBatch_gaussian_channels:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent": ("LATENT",),
"mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"mean_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"mean_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"mean_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"mean_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"std": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"steps": ("INT", {"default": 0, "min": -10000, "max": 10000}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
},
"optional": {
"means": ("SIGMAS", ),
"mean_luminositys": ("SIGMAS", ),
"mean_cyan_reds": ("SIGMAS", ),
"mean_lime_purples": ("SIGMAS", ),
"mean_pattern_structures": ("SIGMAS", ),
"stds": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/noise"
""" @staticmethod
def gaussian_noise_channels_like(x, mean=0.0, mean_luminosity = -0.1, mean_cyan_red = 0.0, mean_lime_purple=0.0, mean_pattern_structure=0.0, std_dev=1.0, seed=42):
x = x.squeeze(0)
noise = torch.randn_like(x) * std_dev + mean
luminosity = noise[0:1] + mean_luminosity
cyan_red = noise[1:2] + mean_cyan_red
lime_purple = noise[2:3] + mean_lime_purple
pattern_structure = noise[3:4] + mean_pattern_structure
noise = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0)
return noise.to(x.device)"""
@staticmethod
def gaussian_noise_channels(x, mean_luminosity = -0.1, mean_cyan_red = 0.0, mean_lime_purple=0.0, mean_pattern_structure=0.0):
x = x.squeeze(0)
luminosity = x[0:1] + mean_luminosity
cyan_red = x[1:2] + mean_cyan_red
lime_purple = x[2:3] + mean_lime_purple
pattern_structure = x[3:4] + mean_pattern_structure
x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0)
return x
def main(self, latent, steps, seed,
mean, mean_luminosity, mean_cyan_red, mean_lime_purple, mean_pattern_structure, std,
means=None, mean_luminositys=None, mean_cyan_reds=None, mean_lime_purples=None, mean_pattern_structures=None, stds=None):
if steps == 0:
steps = len(means)
x = latent["samples"]
b, c, h, w = x.shape
noise_latents = torch.zeros([steps, 4, h, w], dtype=x.dtype, layout=x.layout, device=x.device)
noise_sampler = NOISE_GENERATOR_CLASSES.get('gaussian')(x=x, seed = seed)
means = initialize_or_scale(means, mean, steps)
mean_luminositys = initialize_or_scale(mean_luminositys, mean_luminosity, steps)
mean_cyan_reds = initialize_or_scale(mean_cyan_reds, mean_cyan_red, steps)
mean_lime_purples = initialize_or_scale(mean_lime_purples, mean_lime_purple, steps)
mean_pattern_structures = initialize_or_scale(mean_pattern_structures, mean_pattern_structure, steps)
stds = initialize_or_scale(stds, std, steps)
for i in range(steps):
noise = noise_sampler(mean=means[i].item(), std=stds[i].item())
noise = self.gaussian_noise_channels(noise, mean_luminositys[i].item(), mean_cyan_reds[i].item(), mean_lime_purples[i].item(), mean_pattern_structures[i].item())
noise_latents[i] = x + noise
return ({"samples": noise_latents}, )
class LatentNoiseBatch_gaussian:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent": ("LATENT",),
"mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"std": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"steps": ("INT", {"default": 0, "min": -10000, "max": 10000}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
},
"optional": {
"means": ("SIGMAS", ),
"stds": ("SIGMAS", ),
"steps_": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/noise"
def main(self, latent, mean, std, steps, seed, means=None, stds=None, steps_=None):
if steps_ is not None:
steps = len(steps_)
means = initialize_or_scale(means, mean, steps)
stds = initialize_or_scale(stds, std, steps)
latent_samples = latent["samples"]
b, c, h, w = latent_samples.shape
noise_latents = torch.zeros([steps, c, h, w], dtype=latent_samples.dtype, layout=latent_samples.layout, device=latent_samples.device)
noise_sampler = NOISE_GENERATOR_CLASSES.get('gaussian')(x=latent_samples, seed = seed)
for i in range(steps):
noise_latents[i] = noise_sampler(mean=means[i].item(), std=stds[i].item())
return ({"samples": noise_latents}, )
class LatentNoiseBatch_fractal:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent": ("LATENT",),
"alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"k_flip": ("BOOLEAN", {"default": False}),
"steps": ("INT", {"default": 0, "min": -10000, "max": 10000}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
},
"optional": {
"alphas": ("SIGMAS", ),
"ks": ("SIGMAS", ),
"steps_": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/noise"
def main(self, latent, alpha, k_flip, steps, seed=42, alphas=None, ks=None, sigmas_=None, steps_=None):
if steps_ is not None:
steps = len(steps_)
alphas = initialize_or_scale(alphas, alpha, steps)
k_flip = -1 if k_flip else 1
ks = initialize_or_scale(ks, k_flip, steps)
latent_samples = latent["samples"]
b, c, h, w = latent_samples.shape
noise_latents = torch.zeros([steps, c, h, w], dtype=latent_samples.dtype, layout=latent_samples.layout, device=latent_samples.device)
noise_sampler = NOISE_GENERATOR_CLASSES.get('fractal')(x=latent_samples, seed = seed)
for i in range(steps):
noise_latents[i] = noise_sampler(alpha=alphas[i].item(), k=ks[i].item(), scale=0.1)
return ({"samples": noise_latents}, )
class LatentNoiseList:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent": ("LATENT",),
"alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"k_flip": ("BOOLEAN", {"default": False}),
"steps": ("INT", {"default": 0, "min": -10000, "max": 10000}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
},
"optional": {
"alphas": ("SIGMAS", ),
"ks": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
OUTPUT_IS_LIST = (True,)
FUNCTION = "main"
CATEGORY = "RES4LYF/noise"
def main(self, seed, latent, alpha, k_flip, steps, alphas=None, ks=None):
alphas = initialize_or_scale(alphas, alpha, steps)
k_flip = -1 if k_flip else 1
ks = initialize_or_scale(ks, k_flip, steps)
latent_samples = latent["samples"]
latents = []
size = latent_samples.shape
steps = len(alphas) if steps == 0 else steps
noise_sampler = NOISE_GENERATOR_CLASSES.get('fractal')(x=latent_samples, seed=seed)
for i in range(steps):
noise = noise_sampler(alpha=alphas[i].item(), k=ks[i].item(), scale=0.1)
noisy_latent = latent_samples + noise
new_latent = {"samples": noisy_latent}
latents.append(new_latent)
return (latents, )
class LatentBatch_channels:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent": ("LATENT",),
"mode": (["offset", "multiply", "power"],),
"luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
},
"optional": {
"luminositys": ("SIGMAS", ),
"cyan_reds": ("SIGMAS", ),
"lime_purples": ("SIGMAS", ),
"pattern_structures": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_channels_multiply(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0):
luminosity = x[0:1] * luminosity
cyan_red = x[1:2] * cyan_red
lime_purple = x[2:3] * lime_purple
pattern_structure = x[3:4] * pattern_structure
x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0)
return x
@staticmethod
def latent_channels_offset(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0):
luminosity = x[0:1] + luminosity
cyan_red = x[1:2] + cyan_red
lime_purple = x[2:3] + lime_purple
pattern_structure = x[3:4] + pattern_structure
x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0)
return x
@staticmethod
def latent_channels_power(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0):
luminosity = x[0:1] ** luminosity
cyan_red = x[1:2] ** cyan_red
lime_purple = x[2:3] ** lime_purple
pattern_structure = x[3:4] ** pattern_structure
x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0)
return x
def main(self, latent, mode,
luminosity, cyan_red, lime_purple, pattern_structure,
luminositys=None, cyan_reds=None, lime_purples=None, pattern_structures=None):
x = latent["samples"]
b, c, h, w = x.shape
noise_latents = torch.zeros([b, c, h, w], dtype=x.dtype, layout=x.layout, device=x.device)
luminositys = initialize_or_scale(luminositys, luminosity, b)
cyan_reds = initialize_or_scale(cyan_reds, cyan_red, b)
lime_purples = initialize_or_scale(lime_purples, lime_purple, b)
pattern_structures = initialize_or_scale(pattern_structures, pattern_structure, b)
for i in range(b):
if mode == "offset":
noise = self.latent_channels_offset(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item())
elif mode == "multiply":
noise = self.latent_channels_multiply(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item())
elif mode == "power":
noise = self.latent_channels_power(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item())
noise_latents[i] = noise
return ({"samples": noise_latents}, )
class LatentBatch_channels_16:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent": ("LATENT",),
"mode": (["offset", "multiply", "power"],),
"chan_1": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_2": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_3": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_4": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_5": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_6": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_7": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_8": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_9": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_10": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_11": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_12": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_13": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_14": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_15": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_16": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
},
"optional": {
"chan_1s": ("SIGMAS", ),
"chan_2s": ("SIGMAS", ),
"chan_3s": ("SIGMAS", ),
"chan_4s": ("SIGMAS", ),
"chan_5s": ("SIGMAS", ),
"chan_6s": ("SIGMAS", ),
"chan_7s": ("SIGMAS", ),
"chan_8s": ("SIGMAS", ),
"chan_9s": ("SIGMAS", ),
"chan_10s": ("SIGMAS", ),
"chan_11s": ("SIGMAS", ),
"chan_12s": ("SIGMAS", ),
"chan_13s": ("SIGMAS", ),
"chan_14s": ("SIGMAS", ),
"chan_15s": ("SIGMAS", ),
"chan_16s": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_channels_multiply(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0):
chan_1 = x[0:1] * chan_1
chan_2 = x[1:2] * chan_2
chan_3 = x[2:3] * chan_3
chan_4 = x[3:4] * chan_4
chan_5 = x[4:5] * chan_5
chan_6 = x[5:6] * chan_6
chan_7 = x[6:7] * chan_7
chan_8 = x[7:8] * chan_8
chan_9 = x[8:9] * chan_9
chan_10 = x[9:10] * chan_10
chan_11 = x[10:11] * chan_11
chan_12 = x[11:12] * chan_12
chan_13 = x[12:13] * chan_13
chan_14 = x[13:14] * chan_14
chan_15 = x[14:15] * chan_15
chan_16 = x[15:16] * chan_16
x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0)
return x
@staticmethod
def latent_channels_offset(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0):
chan_1 = x[0:1] + chan_1
chan_2 = x[1:2] + chan_2
chan_3 = x[2:3] + chan_3
chan_4 = x[3:4] + chan_4
chan_5 = x[4:5] + chan_5
chan_6 = x[5:6] + chan_6
chan_7 = x[6:7] + chan_7
chan_8 = x[7:8] + chan_8
chan_9 = x[8:9] + chan_9
chan_10 = x[9:10] + chan_10
chan_11 = x[10:11] + chan_11
chan_12 = x[11:12] + chan_12
chan_13 = x[12:13] + chan_13
chan_14 = x[13:14] + chan_14
chan_15 = x[14:15] + chan_15
chan_16 = x[15:16] + chan_16
x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0)
return x
@staticmethod
def latent_channels_power(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0):
chan_1 = x[0:1] ** chan_1
chan_2 = x[1:2] ** chan_2
chan_3 = x[2:3] ** chan_3
chan_4 = x[3:4] ** chan_4
chan_5 = x[4:5] ** chan_5
chan_6 = x[5:6] ** chan_6
chan_7 = x[6:7] ** chan_7
chan_8 = x[7:8] ** chan_8
chan_9 = x[8:9] ** chan_9
chan_10 = x[9:10] ** chan_10
chan_11 = x[10:11] ** chan_11
chan_12 = x[11:12] ** chan_12
chan_13 = x[12:13] ** chan_13
chan_14 = x[13:14] ** chan_14
chan_15 = x[14:15] ** chan_15
chan_16 = x[15:16] ** chan_16
x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0)
return x
def main(self, latent, mode,
chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16,
chan_1s=None, chan_2s=None, chan_3s=None, chan_4s=None, chan_5s=None, chan_6s=None, chan_7s=None, chan_8s=None, chan_9s=None, chan_10s=None, chan_11s=None, chan_12s=None, chan_13s=None, chan_14s=None, chan_15s=None, chan_16s=None):
x = latent["samples"]
b, c, h, w = x.shape
noise_latents = torch.zeros([b, c, h, w], dtype=x.dtype, layout=x.layout, device=x.device)
chan_1s = initialize_or_scale(chan_1s, chan_1, b)
chan_2s = initialize_or_scale(chan_2s, chan_2, b)
chan_3s = initialize_or_scale(chan_3s, chan_3, b)
chan_4s = initialize_or_scale(chan_4s, chan_4, b)
chan_5s = initialize_or_scale(chan_5s, chan_5, b)
chan_6s = initialize_or_scale(chan_6s, chan_6, b)
chan_7s = initialize_or_scale(chan_7s, chan_7, b)
chan_8s = initialize_or_scale(chan_8s, chan_8, b)
chan_9s = initialize_or_scale(chan_9s, chan_9, b)
chan_10s = initialize_or_scale(chan_10s, chan_10, b)
chan_11s = initialize_or_scale(chan_11s, chan_11, b)
chan_12s = initialize_or_scale(chan_12s, chan_12, b)
chan_13s = initialize_or_scale(chan_13s, chan_13, b)
chan_14s = initialize_or_scale(chan_14s, chan_14, b)
chan_15s = initialize_or_scale(chan_15s, chan_15, b)
chan_16s = initialize_or_scale(chan_16s, chan_16, b)
for i in range(b):
if mode == "offset":
noise = self.latent_channels_offset(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item())
elif mode == "multiply":
noise = self.latent_channels_multiply(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item())
elif mode == "power":
noise = self.latent_channels_power(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item())
noise_latents[i] = noise
return ({"samples": noise_latents}, )
class latent_normalize_channels:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"latent": ("LATENT", ),
"mode": (["full", "channels"],),
"operation": (["normalize", "center", "standardize"],),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("passthrough",)
CATEGORY = "RES4LYF/latents"
FUNCTION = "main"
def main(self, latent, mode, operation):
x = latent["samples"]
b, c, h, w = x.shape
if mode == "full":
if operation == "normalize":
x = (x - x.mean()) / x.std()
elif operation == "center":
x = x - x.mean()
elif operation == "standardize":
x = x / x.std()
elif mode == "channels":
if operation == "normalize":
for i in range(b):
for j in range(c):
x[i, j] = (x[i, j] - x[i, j].mean()) / x[i, j].std()
elif operation == "center":
for i in range(b):
for j in range(c):
x[i, j] = x[i, j] - x[i, j].mean()
elif operation == "standardize":
for i in range(b):
for j in range(c):
x[i, j] = x[i, j] / x[i, j].std()
return ({"samples": x},)
def hard_light_blend(base_latent, blend_latent):
if base_latent.sum() == 0 and base_latent.std() == 0:
return base_latent
blend_latent = (blend_latent - blend_latent.min()) / (blend_latent.max() - blend_latent.min())
positive_mask = base_latent >= 0
negative_mask = base_latent < 0
positive_latent = base_latent * positive_mask.float()
negative_latent = base_latent * negative_mask.float()
positive_result = torch.where(blend_latent < 0.5,
2 * positive_latent * blend_latent,
1 - 2 * (1 - positive_latent) * (1 - blend_latent))
negative_result = torch.where(blend_latent < 0.5,
2 * negative_latent.abs() * blend_latent,
1 - 2 * (1 - negative_latent.abs()) * (1 - blend_latent))
negative_result = -negative_result
combined_result = positive_result * positive_mask.float() + negative_result * negative_mask.float()
#combined_result *= base_latent.max()
ks = combined_result
ks2 = torch.zeros_like(base_latent)
for n in range(base_latent.shape[1]):
ks2[0][n] = (ks[0][n]) / ks[0][n].std()
ks2[0][n] = (ks2[0][n] * base_latent[0][n].std())
combined_result = ks2
return combined_result
================================================
FILE: legacy/legacy_sampler_rk.py
================================================
import torch
import torch.nn.functional as F
from tqdm.auto import trange
import math
import copy
import gc
import comfy.model_patcher
from .noise_classes import NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_CLASSES
from .deis_coefficients import get_deis_coeff_list
from .latents import hard_light_blend
from .noise_sigmas_timesteps_scaling import get_res4lyf_step_with_model, get_res4lyf_half_step3
def get_epsilon(model, x, sigma, **extra_args):
s_in = x.new_ones([x.shape[0]])
x0 = model(x, sigma * s_in, **extra_args)
eps = (x - x0) / (sigma * s_in)
return eps
def get_denoised(model, x, sigma, **extra_args):
s_in = x.new_ones([x.shape[0]])
x0 = model(x, sigma * s_in, **extra_args)
return x0
# Remainder solution
def __phi(j, neg_h):
remainder = torch.zeros_like(neg_h)
for k in range(j):
remainder += (neg_h)**k / math.factorial(k)
phi_j_h = ((neg_h).exp() - remainder) / (neg_h)**j
return phi_j_h
def calculate_gamma(c2, c3):
return (3*(c3**3) - 2*c3) / (c2*(2 - 3*c2))
from typing import Optional
def _gamma(n: int,) -> int:
"""
https://en.wikipedia.org/wiki/Gamma_function
for every positive integer n,
Γ(n) = (n-1)!
"""
return math.factorial(n-1)
def _incomplete_gamma(s: int, x: float, gamma_s: Optional[int] = None) -> float:
"""
https://en.wikipedia.org/wiki/Incomplete_gamma_function#Special_values
if s is a positive integer,
Γ(s, x) = (s-1)!*∑{k=0..s-1}(x^k/k!)
"""
if gamma_s is None:
gamma_s = _gamma(s)
sum_: float = 0
# {k=0..s-1} inclusive
for k in range(s):
numerator: float = x**k
denom: int = math.factorial(k)
quotient: float = numerator/denom
sum_ += quotient
incomplete_gamma_: float = sum_ * math.exp(-x) * gamma_s
return incomplete_gamma_
# Exact analytic solution originally calculated by Clybius. https://github.com/Clybius/ComfyUI-Extra-Samplers/tree/main
def phi(j: int, neg_h: float, ):
"""
For j={1,2,3}: you could alternatively use Kat's phi_1, phi_2, phi_3 which perform fewer steps
Lemma 1
https://arxiv.org/abs/2308.02157
ϕj(-h) = 1/h^j*∫{0..h}(e^(τ-h)*(τ^(j-1))/((j-1)!)dτ)
https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84
= 1/h^j*[(e^(-h)*(-τ)^(-j)*τ(j))/((j-1)!)]{0..h}
https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84+between+0+and+h
= 1/h^j*((e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h)))/(j-1)!)
= (e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h))/((j-1)!*h^j)
= (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/(j-1)!
= (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/Γ(j)
= (e^(-h)*(-h)^(-j)*(1-Γ(j,-h)/Γ(j))
requires j>0
"""
assert j > 0
gamma_: float = _gamma(j)
incomp_gamma_: float = _incomplete_gamma(j, neg_h, gamma_s=gamma_)
phi_: float = math.exp(neg_h) * neg_h**-j * (1-incomp_gamma_/gamma_)
return phi_
rk_coeff = {
"gauss-legendre_5s": (
[
[4563950663 / 32115191526,
(310937500000000 / 2597974476091533 + 45156250000 * (739**0.5) / 8747388808389),
(310937500000000 / 2597974476091533 - 45156250000 * (739**0.5) / 8747388808389),
(5236016175 / 88357462711 + 709703235 * (739**0.5) / 353429850844),
(5236016175 / 88357462711 - 709703235 * (739**0.5) / 353429850844)],
[(4563950663 / 32115191526 - 38339103 * (739**0.5) / 6250000000),
(310937500000000 / 2597974476091533 + 9557056475401 * (739**0.5) / 3498955523355600000),
(310937500000000 / 2597974476091533 - 14074198220719489 * (739**0.5) / 3498955523355600000),
(5236016175 / 88357462711 + 5601362553163918341 * (739**0.5) / 2208936567775000000000),
(5236016175 / 88357462711 - 5040458465159165409 * (739**0.5) / 2208936567775000000000)],
[(4563950663 / 32115191526 + 38339103 * (739**0.5) / 6250000000),
(310937500000000 / 2597974476091533 + 14074198220719489 * (739**0.5) / 3498955523355600000),
(310937500000000 / 2597974476091533 - 9557056475401 * (739**0.5) / 3498955523355600000),
(5236016175 / 88357462711 + 5040458465159165409 * (739**0.5) / 2208936567775000000000),
(5236016175 / 88357462711 - 5601362553163918341 * (739**0.5) / 2208936567775000000000)],
[(4563950663 / 32115191526 - 38209 * (739**0.5) / 7938810),
(310937500000000 / 2597974476091533 - 359369071093750 * (739**0.5) / 70145310854471391),
(310937500000000 / 2597974476091533 - 323282178906250 * (739**0.5) / 70145310854471391),
(5236016175 / 88357462711 - 470139 * (739**0.5) / 1413719403376),
(5236016175 / 88357462711 - 44986764863 * (739**0.5) / 21205791050640)],
[(4563950663 / 32115191526 + 38209 * (739**0.5) / 7938810),
(310937500000000 / 2597974476091533 + 359369071093750 * (739**0.5) / 70145310854471391),
(310937500000000 / 2597974476091533 + 323282178906250 * (739**0.5) / 70145310854471391),
(5236016175 / 88357462711 + 44986764863 * (739**0.5) / 21205791050640),
(5236016175 / 88357462711 + 470139 * (739**0.5) / 1413719403376)],
[4563950663 / 16057595763,
621875000000000 / 2597974476091533,
621875000000000 / 2597974476091533,
10472032350 / 88357462711,
10472032350 / 88357462711]
],
[
1 / 2,
1 / 2 - 99 * (739**0.5) / 10000,
1 / 2 + 99 * (739**0.5) / 10000,
1 / 2 - (739**0.5) / 60,
1 / 2 + (739**0.5) / 60
]
),
"gauss-legendre_4s": (
[
[1/4, 1/4 - 15**0.5 / 6, 1/4 + 15**0.5 / 6, 1/4],
[1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6, 1/4],
[1/4, 1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6],
[1/4 - 15**0.5 / 6, 1/4, 1/4 + 15**0.5 / 6, 1/4],
[1/8, 3/8, 3/8, 1/8]
],
[
1/2 - 15**0.5 / 10,
1/2 + 15**0.5 / 10,
1/2 + 15**0.5 / 10,
1/2 - 15**0.5 / 10
]
),
"gauss-legendre_3s": (
[
[5/36, 2/9 - 15**0.5 / 15, 5/36 - 15**0.5 / 30],
[5/36 + 15**0.5 / 24, 2/9, 5/36 - 15**0.5 / 24],
[5/36 + 15**0.5 / 30, 2/9 + 15**0.5 / 15, 5/36],
[5/18, 4/9, 5/18]
],
[1/2 - 15**0.5 / 10, 1/2, 1/2 + 15**0.5 / 10]
),
"gauss-legendre_2s": (
[
[1/4, 1/4 - 3**0.5 / 6],
[1/4 + 3**0.5 / 6, 1/4],
[1/2, 1/2],
],
[1/2 - 3**0.5 / 6, 1/2 + 3**0.5 / 6]
),
"radau_iia_3s": (
[
[11/45 - 7*6**0.5 / 360, 37/225 - 169*6**0.5 / 1800, -2/225 + 6**0.5 / 75],
[37/225 + 169*6**0.5 / 1800, 11/45 + 7*6**0.5 / 360, -2/225 - 6**0.5 / 75],
[4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9],
[4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9],
],
[2/5 - 6**0.5 / 10, 2/5 + 6**0.5 / 10, 1.]
),
"radau_iia_2s": (
[
[5/12, -1/12],
[3/4, 1/4],
[3/4, 1/4],
],
[1/3, 1]
),
"lobatto_iiic_3s": (
[
[1/6, -1/3, 1/6],
[1/6, 5/12, -1/12],
[1/6, 2/3, 1/6],
[1/6, 2/3, 1/6],
],
[0, 1/2, 1]
),
"lobatto_iiic_2s": (
[
[1/2, -1/2],
[1/2, 1/2],
[1/2, 1/2],
],
[0, 1]
),
"dormand-prince_13s": (
[
[1/18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1/48, 1/16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1/32, 0, 3/32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[5/16, 0, -75/64, 75/64, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[3/80, 0, 0, 3/16, 3/20, 0, 0, 0, 0, 0, 0, 0, 0],
[29443841/614563906, 0, 0, 77736538/692538347, -28693883/1125000000, 23124283/1800000000, 0, 0, 0, 0, 0, 0, 0],
[16016141/946692911, 0, 0, 61564180/158732637, 22789713/633445777, 545815736/2771057229, -180193667/1043307555, 0, 0, 0, 0, 0, 0],
[39632708/573591083, 0, 0, -433636366/683701615, -421739975/2616292301, 100302831/723423059, 790204164/839813087, 800635310/3783071287, 0, 0, 0, 0, 0],
[246121993/1340847787, 0, 0, -37695042795/15268766246, -309121744/1061227803, -12992083/490766935, 6005943493/2108947869, 393006217/1396673457, 123872331/1001029789, 0, 0, 0, 0],
[-1028468189/846180014, 0, 0, 8478235783/508512852, 1311729495/1432422823, -10304129995/1701304382, -48777925059/3047939560, 15336726248/1032824649, -45442868181/3398467696, 3065993473/597172653, 0, 0, 0],
[185892177/718116043, 0, 0, -3185094517/667107341, -477755414/1098053517, -703635378/230739211, 5731566787/1027545527, 5232866602/850066563, -4093664535/808688257, 3962137247/1805957418, 65686358/487910083, 0, 0],
[403863854/491063109, 0, 0, -5068492393/434740067, -411421997/543043805, 652783627/914296604, 11173962825/925320556, -13158990841/6184727034, 3936647629/1978049680, -160528059/685178525, 248638103/1413531060, 0, 0],
[14005451/335480064, 0, 0, 0, 0, -59238493/1068277825, 181606767/758867731, 561292985/797845732, -1041891430/1371343529, 760417239/1151165299, 118820643/751138087, -528747749/2220607170, 1/4]
],
[0, 1/18, 1/12, 1/8, 5/16, 3/8, 59/400, 93/200, 5490023248 / 9719169821, 13/20, 1201146811 / 1299019798, 1, 1],
),
"dormand-prince_6s": (
[
[1/5, 0, 0, 0, 0, 0, 0],
[3/40, 9/40, 0, 0, 0, 0, 0],
[44/45, -56/15, 32/9, 0, 0, 0, 0],
[19372/6561, -25360/2187, 64448/6561, -212/729, 0, 0, 0],
[9017/3168, -355/33, 46732/5247, 49/176, -5103/18656, 0],
[35/384, 0, 500/1113, 125/192, -2187/6784, 11/84, 0],
],
[0, 1/5, 3/10, 4/5, 8/9, 1],
),
"dormand-prince_6s_alt": (
[
[1/5, 0, 0, 0, 0, 0, 0],
[3/40, 9/40, 0, 0, 0, 0, 0],
[44/45, -56/15, 32/9, 0, 0, 0, 0],
[19372/6561, -25360/2187, 64448/6561, -212/729, 0, 0, 0],
[9017/3168, -355/33, 46732/5247, 49/176, -5103/18656, 0],
[35/384, 0, 500/1113, 125/192, -2187/6784, 11/84, 0],
],
[0, 1/5, 3/10, 4/5, 8/9, 1],
),
"dormand-prince_7s": (
[
[1/5, 0, 0, 0, 0, 0, 0],
[3/40, 9/40, 0, 0, 0, 0, 0],
[44/45, -56/15, 32/9, 0, 0, 0, 0],
[19372/6561, -25360/2187, 64448/6561, -212/729, 0, 0, 0],
[9017/3168, -355/33, 46732/5247, 49/176, -5103/18656, 0],
[35/384, 0, 500/1113, 125/192, -2187/6784, 11/84, 0],
],
[0, 1/5, 3/10, 4/5, 8/9, 1],
),
"bogacki-shampine_7s": ( #5th order
[
[1/6, 0, 0, 0, 0, 0, 0],
[2/27, 4/27, 0, 0, 0, 0, 0],
[183/1372, -162/343, 1053/1372, 0, 0, 0, 0],
[68/297, -4/11, 42/143, 1960/3861, 0, 0, 0],
[597/22528, 81/352, 63099/585728, 58653/366080, 4617/20480, 0, 0],
[174197/959244, -30942/79937, 8152137/19744439, 666106/1039181, -29421/29068, 482048/414219, 0],
[587/8064, 0, 4440339/15491840, 24353/124800, 387/44800, 2152/5985, 7267/94080]
],
[0, 1/6, 2/9, 3/7, 2/3, 3/4, 1]
),
"rk4_4s": (
[
[1/2, 0, 0, 0],
[0, 1/2, 0, 0],
[0, 0, 1, 0],
[1/6, 1/3, 1/3, 1/6]
],
[0, 1/2, 1/2, 1],
),
"rk38_4s": (
[
[1/3, 0, 0, 0],
[-1/3, 1, 0, 0],
[1, -1, 1, 0],
[1/8, 3/8, 3/8, 1/8]
],
[0, 1/3, 2/3, 1],
),
"ralston_4s": (
[
[2/5, 0, 0, 0],
[(-2889+1428 * 5**0.5)/1024, (3785-1620 * 5**0.5)/1024, 0, 0],
[(-3365+2094 * 5**0.5)/6040, (-975-3046 * 5**0.5)/2552, (467040+203968*5**0.5)/240845, 0],
[(263+24*5**0.5)/1812, (125-1000*5**0.5)/3828, (3426304+1661952*5**0.5)/5924787, (30-4*5**0.5)/123]
],
[0, 2/5, (14-3 * 5**0.5)/16, 1],
),
"heun_3s": (
[
[1/3, 0, 0],
[0, 2/3, 0],
[1/4, 0, 3/4]
],
[0, 1/3, 2/3],
),
"kutta_3s": (
[
[1/2, 0, 0],
[-1, 2, 0],
[1/6, 2/3, 1/6]
],
[0, 1/2, 1],
),
"ralston_3s": (
[
[1/2, 0, 0],
[0, 3/4, 0],
[2/9, 1/3, 4/9]
],
[0, 1/2, 3/4],
),
"houwen-wray_3s": (
[
[8/15, 0, 0],
[1/4, 5/12, 0],
[1/4, 0, 3/4]
],
[0, 8/15, 2/3],
),
"ssprk3_3s": (
[
[1, 0, 0],
[1/4, 1/4, 0],
[1/6, 1/6, 2/3]
],
[0, 1, 1/2],
),
"midpoint_2s": (
[
[1/2, 0],
[0, 1]
],
[0, 1/2],
),
"heun_2s": (
[
[1, 0],
[1/2, 1/2]
],
[0, 1],
),
"ralston_2s": (
[
[2/3, 0],
[1/4, 3/4]
],
[0, 2/3],
),
"buehler": (
[
[1],
],
[0],
),
}
def get_rk_methods(rk_type, h, c1=0.0, c2=0.5, c3=1.0, h_prev=None, h_prev2=None, stepcount=0, sigmas=None):
FSAL = False
multistep_stages = 0
if rk_type[:4] == "deis":
order = int(rk_type[-2])
if stepcount < order:
if order == 4:
rk_type = "res_3s"
order = 3
elif order == 3:
rk_type = "res_3s"
elif order == 2:
rk_type = "res_2s"
else:
rk_type = "deis"
multistep_stages = order-1
if rk_type[-2:] == "2m": #multistep method
if h_prev is not None:
multistep_stages = 1
c2 = -h_prev / h
rk_type = rk_type[:-2] + "2s"
else:
rk_type = rk_type[:-2] + "2s"
if rk_type[-2:] == "3m": #multistep method
if h_prev2 is not None:
multistep_stages = 2
c2 = -h_prev2 / h_prev
c3 = -h_prev / h
rk_type = rk_type[:-2] + "3s"
else:
rk_type = rk_type[:-2] + "3s"
if rk_type in rk_coeff:
ab, ci = copy.deepcopy(rk_coeff[rk_type])
ci = ci[:]
ci.append(1)
alpha_fn = lambda h: 1
t_fn = lambda sigma: sigma
sigma_fn = lambda t: t
h_fn = lambda sigma_down, sigma: sigma_down - sigma
model_call = get_denoised
EPS_PRED = False
else:
alpha_fn = lambda neg_h: torch.exp(neg_h)
t_fn = lambda sigma: sigma.log().neg()
sigma_fn = lambda t: t.neg().exp()
h_fn = lambda sigma_down, sigma: -torch.log(sigma_down/sigma)
model_call = get_denoised
EPS_PRED = False
match rk_type:
case "deis":
alpha_fn = lambda neg_h: torch.exp(neg_h)
t_fn = lambda sigma: sigma.log().neg()
sigma_fn = lambda t: t.neg().exp()
h_fn = lambda sigma_down, sigma: -torch.log(sigma_down/sigma)
model_call = get_epsilon
EPS_PRED = True
coeff_list = get_deis_coeff_list(sigmas, multistep_stages+1, deis_mode="rhoab")
coeff_list = [[elem / h for elem in inner_list] for inner_list in coeff_list]
if multistep_stages == 1:
b1, b2 = coeff_list[stepcount]
ab = [
[0, 0],
[b1, b2],
]
ci = [0, 0, 1]
if multistep_stages == 2:
b1, b2, b3 = coeff_list[stepcount]
ab = [
[0, 0, 0],
[0, 0, 0],
[b1, b2, b3],
]
ci = [0, 0, 0, 1]
if multistep_stages == 3:
b1, b2, b3, b4 = coeff_list[stepcount]
ab = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[b1, b2, b3, b4],
]
ci = [0, 0, 0, 0, 1]
case "dormand-prince_6s":
FSAL = True
case "ddim":
b1 = phi(1, -h)
ab = [
[b1],
]
ci = [0, 1]
case "res_2s":
a2_1 = c2 * phi(1, -h*c2)
b1 = phi(1, -h) - phi(2, -h)/c2
b2 = phi(2, -h)/c2
a2_1 /= (1 - torch.exp(-h*c2)) / h
b1 /= phi(1, -h)
b2 /= phi(1, -h)
ab = [
[a2_1, 0],
[b1, b2],
]
ci = [0, c2, 1]
case "res_3s":
gamma = calculate_gamma(c2, c3)
a2_1 = c2 * phi(1, -h*c2)
a3_2 = gamma * c2 * phi(2, -h*c2) + (c3 ** 2 / c2) * phi(2, -h*c3) #phi_2_c3_h # a32 from k2 to k3
a3_1 = c3 * phi(1, -h*c3) - a3_2 # a31 from k1 to k3
b3 = (1 / (gamma * c2 + c3)) * phi(2, -h)
b2 = gamma * b3 #simplified version of: b2 = (gamma / (gamma * c2 + c3)) * phi_2_h
b1 = phi(1, -h) - b2 - b3
0
a3_2 /= (1 - torch.exp(-h*c3)) / h
a3_1 /= (1 - torch.exp(-h*c3)) / h
b1 /= phi(1, -h)
b2 /= phi(1, -h)
b3 /= phi(1, -h)
ab = [
[a2_1, 0, 0],
[a3_1, a3_2, 0],
[b1, b2, b3],
]
ci = [c1, c2, c3, 1]
#ci = [0, c2, c3, 1]
case "dpmpp_2s":
#c2 = 0.5
a2_1 = c2 * phi(1, -h*c2)
b1 = (1 - 1/(2*c2)) * phi(1, -h)
b2 = (1/(2*c2)) * phi(1, -h)
a2_1 /= (1 - torch.exp(-h*c2)) / h
b1 /= phi(1, -h)
b2 /= phi(1, -h)
ab = [
[a2_1, 0],
[b1, b2],
]
ci = [0, c2, 1]
case "dpmpp_sde_2s":
c2 = 1.0 #hardcoded to 1.0 to more closely emulate the configuration for k-diffusion's implementation
a2_1 = c2 * phi(1, -h*c2)
b1 = (1 - 1/(2*c2)) * phi(1, -h)
b2 = (1/(2*c2)) * phi(1, -h)
a2_1 /= (1 - torch.exp(-h*c2)) / h
b1 /= phi(1, -h)
b2 /= phi(1, -h)
ab = [
[a2_1, 0],
[b1, b2],
]
ci = [0, c2, 1]
case "dpmpp_3s":
a2_1 = c2 * phi(1, -h*c2)
a3_2 = (c3**2 / c2) * phi(2, -h*c3)
a3_1 = c3 * phi(1, -h*c3) - a3_2
b2 = 0
b3 = (1/c3) * phi(2, -h)
b1 = phi(1, -h) - b2 - b3
a2_1 /= (1 - torch.exp(-h*c2)) / h
a3_2 /= (1 - torch.exp(-h*c3)) / h
a3_1 /= (1 - torch.exp(-h*c3)) / h
b1 /= phi(1, -h)
b2 /= phi(1, -h)
b3 /= phi(1, -h)
ab = [
[a2_1, 0, 0],
[a3_1, a3_2, 0],
[b1, b2, b3],
]
ci = [0, c2, c3, 1]
case "rk_exp_5s":
c1, c2, c3, c4, c5 = 0., 0.5, 0.5, 1., 0.5
a2_1 = 0.5 * phi(1, -h * c2)
a3_1 = 0.5 * phi(1, -h * c3) - phi(2, -h * c3)
a3_2 = phi(2, -h * c3)
a4_1 = phi(1, -h * c4) - 2 * phi(2, -h * c4)
a4_2 = a4_3 = phi(2, -h * c4)
a5_2 = a5_3 = 0.5 * phi(2, -h * c5) - phi(3, -h * c4) + 0.25 * phi(2, -h * c4) - 0.5 * phi(3, -h * c5)
a5_4 = 0.25 * phi(2, -h * c5) - a5_2
a5_1 = 0.5 * phi(1, -h * c5) - 2 * a5_2 - a5_4
b1 = phi(1, -h) - 3 * phi(2, -h) + 4 * phi(3, -h)
b2 = b3 = 0
b4 = -phi(2, -h) + 4*phi(3, -h)
b5 = 4 * phi(2, -h) - 8 * phi(3, -h)
a2_1 /= (1 - torch.exp(-h*c2)) / h
a3_1 /= (1 - torch.exp(-h*c3)) / h
a3_2 /= (1 - torch.exp(-h*c3)) / h
a4_1 /= (1 - torch.exp(-h*c4)) / h
a4_2 /= (1 - torch.exp(-h*c4)) / h
a4_3 /= (1 - torch.exp(-h*c4)) / h
a5_1 /= (1 - torch.exp(-h*c5)) / h
a5_2 /= (1 - torch.exp(-h*c5)) / h
a5_3 /= (1 - torch.exp(-h*c5)) / h
a5_4 /= (1 - torch.exp(-h*c5)) / h
b1 /= phi(1, -h)
b2 /= phi(1, -h)
b3 /= phi(1, -h)
b4 /= phi(1, -h)
b5 /= phi(1, -h)
ab = [
[a2_1, 0, 0, 0, 0],
[a3_1, a3_2, 0, 0, 0],
[a4_1, a4_2, a4_3, 0, 0],
[a5_1, a5_2, a5_3, a5_4, 0],
[b1, b2, b3, b4, b5],
]
ci = [0., 0.5, 0.5, 1., 0.5, 1]
return ab, ci, multistep_stages, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED
def get_rk_methods_order(rk_type):
ab, ci, multistep_stages, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods(rk_type, torch.tensor(1.0).to('cuda').to(torch.float64), c1=0.0, c2=0.5, c3=1.0)
return len(ci)-1
def get_rk_methods_order_and_fn(rk_type, h=None, c1=None, c2=None, c3=None, h_prev=None, h_prev2=None, stepcount=0, sigmas=None):
if h == None:
ab, ci, multistep_stages, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods(rk_type, torch.tensor(1.0).to('cuda').to(torch.float64), c1=0.0, c2=0.5, c3=1.0)
else:
ab, ci, multistep_stages, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods(rk_type, h, c1, c2, c3, h_prev, h_prev2, stepcount, sigmas)
return len(ci)-1, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED
def get_rk_methods_coeff(rk_type, h, c1, c2, c3, h_prev=None, h_prev2=None, stepcount=0, sigmas=None):
ab, ci, multistep_stages, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods(rk_type, h, c1, c2, c3, h_prev, h_prev2, stepcount, sigmas)
return ab, ci, multistep_stages, EPS_PRED
@torch.no_grad()
def legacy_sample_rk(model, x, sigmas, extra_args=None, callback=None, disable=None, noise_sampler=None, noise_sampler_type="brownian", noise_mode="hard", noise_seed=-1, rk_type="res_2m", implicit_sampler_name="default",
sigma_fn_formula="", t_fn_formula="",
eta=0.0, eta_var=0.0, s_noise=1., d_noise=1., alpha=-1.0, k=1.0, scale=0.1, c1=0.0, c2=0.5, c3=1.0, MULTISTEP=False, cfgpp=0.0, implicit_steps=0, reverse_weight=0.0, exp_mode=False,
latent_guide=None, latent_guide_inv=None, latent_guide_weight=0.0, latent_guide_weights=None, guide_mode="blend",
GARBAGE_COLLECT=False, mask=None, LGW_MASK_RESCALE_MIN=True, sigmas_override=None, t_is=None,
):
extra_args = {} if extra_args is None else extra_args
if sigmas_override is not None:
sigmas = sigmas_override.clone()
sigmas = sigmas.clone() * d_noise
sigmin = model.inner_model.inner_model.model_sampling.sigma_min
sigmax = model.inner_model.inner_model.model_sampling.sigma_max
UNSAMPLE = False
if sigmas[0] == 0.0: #remove padding used to avoid need for model patch with noise inversion
UNSAMPLE = True
sigmas = sigmas[1:-1]
if mask is None:
mask = torch.ones_like(x)
LGW_MASK_RESCALE_MIN = False
else:
mask = mask.unsqueeze(1)
mask = mask.repeat(1, x.shape[1], 1, 1)
mask = F.interpolate(mask, size=(x.shape[2], x.shape[3]), mode='bilinear', align_corners=False)
mask = mask.to(x.dtype).to(x.device)
y0, y0_inv = torch.zeros_like(x), torch.zeros_like(x)
if latent_guide is not None:
if sigmas[0] > sigmas[1]:
y0 = latent_guide = model.inner_model.inner_model.process_latent_in(latent_guide['samples']).clone().to(x.device)
else:
x = model.inner_model.inner_model.process_latent_in(latent_guide['samples']).clone().to(x.device)
if latent_guide_inv is not None:
if sigmas[0] > sigmas[1]:
y0_inv = latent_guide_inv = model.inner_model.inner_model.process_latent_in(latent_guide_inv['samples']).clone().to(x.device)
elif UNSAMPLE and mask is not None:
x = mask * x + (1-mask) * model.inner_model.inner_model.process_latent_in(latent_guide_inv['samples']).clone().to(x.device)
uncond = [torch.full_like(x, 0.0)]
if cfgpp != 0.0:
def post_cfg_function(args):
uncond[0] = args["uncond_denoised"]
return args["denoised"]
model_options = extra_args.get("model_options", {}).copy()
extra_args["model_options"] = comfy.model_patcher.set_model_options_post_cfg_function(model_options, post_cfg_function, disable_cfg1_optimization=True)
if noise_seed == -1:
seed = torch.initial_seed() + 1
else:
seed = noise_seed
if noise_sampler_type == "fractal":
noise_sampler = NOISE_GENERATOR_CLASSES.get(noise_sampler_type)(x=x, seed=seed, sigma_min=sigmin, sigma_max=sigmax)
noise_sampler.alpha = alpha
noise_sampler.k = k
noise_sampler.scale = scale
else:
noise_sampler = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_sampler_type)(x=x, seed=seed, sigma_min=sigmin, sigma_max=sigmax)
if UNSAMPLE and sigmas[0] < sigmas[1]: #sigma_next > sigma:
y0 = noise_sampler(sigma=sigmax, sigma_next=sigmin)
y0 = (y0 - y0.mean()) / y0.std()
y0_inv = noise_sampler(sigma=sigmax, sigma_next=sigmin)
y0_inv = (y0_inv - y0_inv.mean()) / y0_inv.std()
order, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods_order_and_fn(rk_type)
if exp_mode:
model_call = get_denoised
alpha_fn = lambda neg_h: torch.exp(neg_h)
t_fn = lambda sigma: sigma.log().neg()
sigma_fn = lambda t: t.neg().exp()
xi, ki, ki_u = [torch.zeros_like(x)]*(order+2), [torch.zeros_like(x)]*(order+1), [torch.zeros_like(x)]*(order+1)
h, h_prev, h_prev2 = None, None, None
xi[0] = x
for _ in trange(len(sigmas)-1, disable=disable):
sigma, sigma_next = sigmas[_], sigmas[_+1]
if sigma_next == 0.0:
rk_type = "buehler"
eta, eta_var = 0, 0
order, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods_order_and_fn(rk_type)
#sigma_up, sigma, sigma_down, alpha_ratio = get_res4lyf_step_with_model(model, sigma, sigma_next, eta, eta_var, noise_mode, h_fn(sigma_next,sigma) )
sigma_up, sigma, sigma_down, alpha_ratio = get_res4lyf_step_with_model(model, sigma, sigma_next, eta, noise_mode)
t_down, t = t_fn(sigma_down), t_fn(sigma)
h = h_fn(sigma_down, sigma)
c2, c3 = get_res4lyf_half_step3(sigma, sigma_down, c2, c3, t_fn=t_fn, sigma_fn=sigma_fn, t_fn_formula=t_fn_formula, sigma_fn_formula=sigma_fn_formula)
ab, ci, multistep_stages, EPS_PRED = get_rk_methods_coeff(rk_type, h, c1, c2, c3, h_prev, h_prev2, _, sigmas)
order = len(ci)-1
if exp_mode:
for i in range(order):
for j in range(order):
ab[i][j] = ab[i][j] * phi(1, -h * ci[i+1])
if isinstance(model.inner_model.inner_model.model_sampling, comfy.model_sampling.CONST) == False and noise_mode == "hard" and sigma_next > 0.0:
noise = noise_sampler(sigma=sigmas[_], sigma_next=sigmas[_+1])
noise = torch.nan_to_num((noise - noise.mean()) / noise.std(), 0.0)
xi[0] = alpha_ratio * xi[0] + noise * s_noise * sigma_up
xi_0 = xi[0] # needed for implicit sampling
if (MULTISTEP == False and FSAL == False) or _ == 0:
ki[0] = model_call(model, xi_0, sigma, **extra_args)
if EPS_PRED and rk_type.startswith("deis"):
ki[0] = (xi_0 - ki[0]) / sigma
ki[0] = ki[0] * (sigma_down-sigma)/(sigma_next-sigma)
ki_u[0] = uncond[0]
if cfgpp != 0.0:
ki[0] = uncond[0] + cfgpp * (ki[0] - uncond[0])
ki_u[0] = uncond[0]
for iteration in range(implicit_steps+1):
for i in range(multistep_stages, order):
if implicit_steps > 0 and iteration > 0 and implicit_sampler_name != "default":
ab, ci, multistep_stages, EPS_PRED = get_rk_methods_coeff(implicit_sampler_name, h, c1, c2, c3, h_prev, h_prev2, _, sigmas)
order = len(ci)-1
if len(ki) < order + 1:
last_value_ki = ki[-1]
last_value_ki_u = ki_u[-1]
ki.extend( [last_value_ki] * ((order + 1) - len(ki)))
ki_u.extend([last_value_ki_u] * ((order + 1) - len(ki_u)))
if len(xi) < order + 2:
xi.extend([torch.zeros_like(xi[0])] * ((order + 2) - len(xi)))
ki[0] = model_call(model, xi_0, sigma, **extra_args)
ki_u[0] = uncond[0]
sigma_mid = sigma_fn(t + h*ci[i+1])
alpha_t_1 = alpha_t_1_inv = torch.exp(torch.log(sigma_down/sigma) * ci[i+1] )
if sigma_next > sigma:
alpha_t_1_inv = torch.nan_to_num( torch.exp(torch.log((sigmax - sigma_down)/(sigmax - sigma)) * ci[i+1]), 1.)
if LGW_MASK_RESCALE_MIN:
lgw_mask = mask * (1 - latent_guide_weights[_]) + latent_guide_weights[_]
lgw_mask_inv = (1-mask) * (1 - latent_guide_weights[_]) + latent_guide_weights[_]
else:
lgw_mask = mask * latent_guide_weights[_]
lgw_mask_inv = (1-mask) * latent_guide_weights[_]
ks, ks_u, ys, ys_inv = torch.zeros_like(x), torch.zeros_like(x), torch.zeros_like(x), torch.zeros_like(x)
for j in range(order):
ks += ab[i][j] * ki[j]
ks_u += ab[i][j] * ki_u[j]
ys += ab[i][j] * y0
ys_inv += ab[i][j] * y0_inv
if EPS_PRED and rk_type.startswith("deis"):
epsilon = (h * ks) / (sigma_down - sigma) #xi[(i+1)%order] = xi_0 + h*ks
ks = xi_0 - epsilon * sigma # denoised
else:
if implicit_sampler_name.startswith("lobatto") == False:
ks /= sum(ab[i])
elif iteration == 0:
ks /= sum(ab[i])
if UNSAMPLE == False and latent_guide is not None and latent_guide_weights[_] > 0.0:
if guide_mode == "hard_light":
lg = latent_guide * sum(ab[i])
if EPS_PRED:
lg = (alpha_fn(-h*ci[i+1]) * xi[0] - latent_guide) / (sigma_fn(t + h*ci[i]) + 1e-8)
hard_light_blend_1 = hard_light_blend(lg, ks)
ks = (1 - lgw_mask) * ks + lgw_mask * hard_light_blend_1
elif guide_mode == "mean_std":
ks2 = torch.zeros_like(x)
for n in range(latent_guide.shape[1]):
ks2[0][n] = (ks[0][n] - ks[0][n].mean()) / ks[0][n].std()
ks2[0][n] = (ks2[0][n] * latent_guide[0][n].std()) + latent_guide[0][n].mean()
ks = (1 - lgw_mask) * ks + lgw_mask * ks2
elif guide_mode == "mean":
ks2 = torch.zeros_like(x)
for n in range(latent_guide.shape[1]):
ks2[0][n] = (ks[0][n] - ks[0][n].mean())
ks2[0][n] = (ks2[0][n]) + latent_guide[0][n].mean()
ks3 = torch.zeros_like(x)
for n in range(latent_guide.shape[1]):
ks3[0][n] = (ks[0][n] - ks[0][n].mean())
ks3[0][n] = (ks3[0][n]) + latent_guide_inv[0][n].mean()
ks = (1 - lgw_mask) * ks + lgw_mask * ks2
ks = (1 - lgw_mask_inv) * ks + lgw_mask_inv * ks3
elif guide_mode == "std":
ks2 = torch.zeros_like(x)
for n in range(latent_guide.shape[1]):
ks2[0][n] = (ks[0][n]) / ks[0][n].std()
ks2[0][n] = (ks2[0][n] * latent_guide[0][n].std())
ks = (1 - lgw_mask) * ks + lgw_mask * ks2
elif guide_mode == "blend":
ks = (1 - lgw_mask) * ks + lgw_mask * ys #+ (1-lgw_mask) * latent_guide_inv
ks = (1 - lgw_mask_inv) * ks + lgw_mask_inv * ys_inv
elif guide_mode == "inversion":
UNSAMPLE = True
cfgpp_term = cfgpp*h*(ks - ks_u)
xi[(i+1)%order] = (1-UNSAMPLE * lgw_mask) * (alpha_t_1 * (xi_0 + cfgpp_term) + (1 - alpha_t_1) * ks ) \
+ UNSAMPLE * lgw_mask * (alpha_t_1_inv * (xi_0 + cfgpp_term) + (1 - alpha_t_1_inv) * ys )
if UNSAMPLE:
xi[(i+1)%order] = (1-lgw_mask_inv) * xi[(i+1)%order] + UNSAMPLE * lgw_mask_inv * (alpha_t_1_inv * (xi_0 + cfgpp_term) + (1 - alpha_t_1_inv) * ys_inv )
if (i+1)%order > 0 and (i+1)%order > multistep_stages-1:
if GARBAGE_COLLECT: gc.collect(); torch.cuda.empty_cache()
ki[i+1] = model_call(model, xi[i+1], sigma_fn(t + h*ci[i+1]), **extra_args)
if EPS_PRED and rk_type.startswith("deis"):
ki[i+1] = (xi[i+1] - ki[i+1]) / sigma_fn(t + h*ci[i+1])
ki[i+1] = ki[i+1] * (sigma_down-sigma)/(sigma_next-sigma)
ki_u[i+1] = uncond[0]
if FSAL and _ > 0:
ki [0] = ki[order-1]
ki_u[0] = ki_u[order-1]
if MULTISTEP and _ > 0:
ki [0] = denoised
ki_u[0] = ki_u[order-1]
for ms in range(multistep_stages):
ki [multistep_stages - ms] = ki [multistep_stages - ms - 1]
ki_u[multistep_stages - ms] = ki_u[multistep_stages - ms - 1]
if iteration < implicit_steps and implicit_sampler_name == "default":
ki [0] = model_call(model, xi[0], sigma_down, **extra_args)
ki_u[0] = uncond[0]
elif iteration == implicit_steps and implicit_sampler_name != "default" and implicit_steps > 0:
ks, ks_u, ys, ys_inv = torch.zeros_like(x), torch.zeros_like(x), torch.zeros_like(x), torch.zeros_like(x)
for j in range(order):
ks += ab[i+1][j] * ki[j]
ks_u += ab[i+1][j] * ki_u[j]
ys += ab[i+1][j] * y0
ys_inv += ab[i+1][j] * y0_inv
ks /= sum(ab[i+1])
cfgpp_term = cfgpp*h*(ks - ks_u) #GUIDES NOT FULLY IMPLEMENTED HERE WITH IMPLICIT FINAL STEP
xi[(i+1)%order] = (1-UNSAMPLE * lgw_mask) * (alpha_t_1 * (xi_0 + cfgpp_term) + (1 - alpha_t_1) * ks ) \
+ UNSAMPLE * lgw_mask * (alpha_t_1_inv * (xi_0 + cfgpp_term) + (1 - alpha_t_1_inv) * ys )
if UNSAMPLE:
xi[(i+1)%order] = (1-lgw_mask_inv) * xi[(i+1)%order] + UNSAMPLE * lgw_mask_inv * (alpha_t_1_inv * (xi_0 + cfgpp_term) + (1 - alpha_t_1_inv) * ys_inv )
if EPS_PRED == True and exp_mode == False and not rk_type.startswith("deis"):
denoised = alpha_fn(-h*ci[i+1]) * xi[0] - sigma * ks
elif EPS_PRED == True and rk_type.startswith("deis"):
epsilon = (h * ks) / (sigma_down - sigma)
denoised = xi_0 - epsilon * sigma # denoised
elif iteration == implicit_steps and implicit_sampler_name != "default" and implicit_steps > 0:
denoised = ks
else:
denoised = ks / sum(ab[i])
"""if iteration < implicit_steps and implicit_sampler_name != "default":
for idx in range(len(ki)):
ki[idx] = denoised"""
if callback is not None:
callback({'x': xi[0], 'i': _, 'sigma': sigma, 'sigma_next': sigma_next, 'denoised': denoised})
if (isinstance(model.inner_model.inner_model.model_sampling, comfy.model_sampling.CONST) or noise_mode != "hard") and sigma_next > 0.0:
noise = noise_sampler(sigma=sigma, sigma_next=sigma_next)
noise = (noise - noise.mean()) / noise.std()
if guide_mode == "noise_mean":
noise2 = torch.zeros_like(x)
for n in range(latent_guide.shape[1]):
noise2[0][n] = (noise[0][n] - noise[0][n].mean())
noise2[0][n] = (noise2[0][n]) + latent_guide[0][n].mean()
noise = (1 - lgw_mask) * noise + lgw_mask * noise2
xi[0] = alpha_ratio * xi[0] + noise * s_noise * sigma_up
h_prev2 = h_prev
h_prev = h
return xi[0]
================================================
FILE: legacy/legacy_samplers.py
================================================
import torch
import torch.nn.functional as F
import comfy.samplers
import comfy.sample
import comfy.sampler_helpers
import comfy.model_sampling
import comfy.latent_formats
import comfy.sd
from comfy_extras.nodes_model_advanced import ModelSamplingSD3, ModelSamplingFlux, ModelSamplingAuraFlow, ModelSamplingStableCascade
import comfy.supported_models
import latent_preview
from .noise_classes import NOISE_GENERATOR_NAMES, NOISE_GENERATOR_NAMES_SIMPLE, NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_CLASSES
from .sigmas import get_sigmas
from .helper import get_res4lyf_scheduler_list
def initialize_or_scale(tensor, value, steps):
if tensor is None:
return torch.full((steps,), value)
else:
return value * tensor
def move_to_same_device(*tensors):
if not tensors:
return tensors
device = tensors[0].device
return tuple(tensor.to(device) for tensor in tensors)
RK_SAMPLER_NAMES = ["res_2m",
"res_3m",
"res_2s",
"res_3s",
"rk_exp_5s",
"deis_2m",
"deis_3m",
"deis_4m",
"ralston_2s",
"ralston_3s",
"ralston_4s",
"dpmpp_2m",
"dpmpp_3m",
"dpmpp_2s",
"dpmpp_sde_2s",
"dpmpp_3s",
"midpoint_2s",
"heun_2s",
"heun_3s",
"houwen-wray_3s",
"kutta_3s",
"ssprk3_3s",
"rk38_4s",
"rk4_4s",
"dormand-prince_6s",
"dormand-prince_13s",
"bogacki-shampine_7s",
"ddim",
"buehler",
]
IRK_SAMPLER_NAMES = [
"gauss-legendre_2s",
"gauss-legendre_3s",
"gauss-legendre_4s",
"gauss-legendre_5s",
"radau_iia_2s",
"radau_iia_3s",
"lobatto_iiic_2s",
"lobatto_iiic_3s",
"crouzeix_2s",
"crouzeix_3s",
"irk_exp_diag_2s",
"use_explicit",
]
class Legacy_ClownsharKSampler:
@classmethod
def INPUT_TYPES(s):
return {"required":
{"model": ("MODEL",),
#"add_noise": ("BOOLEAN", {"default": True}),
"noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "brownian"}),
"noise_mode_sde": (["hard", "hard_var", "hard_sq", "soft", "softer", "exp"], {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"eta": ("FLOAT", {"default": 0.25, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"noise_seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}),
#"sampler_mode": (['standard', 'unsample', 'resample'],),
"sampler_mode": (['standard', 'unsample', 'resample',],),
"sampler_name": (RK_SAMPLER_NAMES, {"default": "res_2m"}),
"implicit_sampler_name": (["default",
"gauss-legendre_5s",
"gauss-legendre_4s",
"gauss-legendre_3s",
"gauss-legendre_2s",
"crouzeix_2s",
"radau_iia_3s",
"radau_iia_2s",
"lobatto_iiic_3s",
"lobatto_iiic_2s",
], {"default": "default"}),
"scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},),
"steps": ("INT", {"default": 30, "min": 1, "max": 10000}),
"implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}),
"denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"cfg": ("FLOAT", {"default": 5.0, "min": -100.0, "max": 100.0, "step":0.1, "round": False, }),
"shift": ("FLOAT", {"default": 3.0, "min": -1.0, "max": 100.0, "step":0.1, "round": False, }),
"base_shift": ("FLOAT", {"default": 0.85, "min": -1.0, "max": 100.0, "step":0.1, "round": False, }),
"truncate_conditioning": (['false', 'true'], {"default": "true"}),
},
"optional":
{
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"sigmas": ("SIGMAS", ),
"latent_image": ("LATENT", ),
"guides": ("GUIDES", ),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("LATENT","LATENT", ) #"LATENT","LATENT")
RETURN_NAMES = ("output", "denoised",) # "output_fp64", "denoised_fp64")
FUNCTION = "main"
CATEGORY = "RES4LYF/legacy/samplers"
DEPRECATED = True
def main(self, model, cfg, truncate_conditioning, sampler_mode, scheduler, steps, denoise=1.0, denoise_alt=1.0,
noise_type_init="gaussian", noise_type_sde="brownian", noise_mode_sde="hard", latent_image=None,
positive=None, negative=None, sigmas=None, latent_noise=None, latent_noise_match=None,
noise_stdev=1.0, noise_mean=0.0, noise_normalize=True, noise_is_latent=False,
eta=0.25, eta_var=0.0, d_noise=1.0, s_noise=1.0, alpha_init=-1.0, k_init=1.0, alpha_sde=-1.0, k_sde=1.0, cfgpp=0.0, c1=0.0, c2=0.5, c3=1.0, multistep=False, noise_seed=-1, sampler_name="res_2m", implicit_sampler_name="default",
exp_mode=False, t_fn_formula=None, sigma_fn_formula=None, implicit_steps=0,
latent_guide=None, latent_guide_inv=None, latent_guide_weight=0.0, guide_mode="blend", latent_guide_weights=None, latent_guide_mask=None, rescale_floor=True, sigmas_override=None, unsampler_type="linear",
shift=3.0, base_shift=0.85, guides=None, options=None,
):
default_dtype = torch.float64
max_steps = 10000
if noise_seed == -1:
seed = torch.initial_seed() + 1
else:
seed = noise_seed
torch.manual_seed(noise_seed)
noise_seed_sde = seed + 1
if options is not None:
noise_stdev = options.get('noise_init_stdev', noise_stdev)
noise_mean = options.get('noise_init_mean', noise_mean)
noise_type_init = options.get('noise_type_init', noise_type_init)
noise_type_sde = options.get('noise_type_sde', noise_type_sde)
noise_mode_sde = options.get('noise_mode_sde', noise_mode_sde)
eta = options.get('eta', eta)
s_noise = options.get('s_noise', s_noise)
d_noise = options.get('d_noise', d_noise)
alpha_init = options.get('alpha_init', alpha_init)
k_init = options.get('k_init', k_init)
alpha_sde = options.get('alpha_sde', alpha_sde)
k_sde = options.get('k_sde', k_sde)
noise_seed_sde = options.get('noise_seed_sde', noise_seed+1)
c1 = options.get('c1', c1)
c2 = options.get('c2', c2)
c3 = options.get('c3', c3)
t_fn_formula = options.get('t_fn_formula', t_fn_formula)
sigma_fn_formula = options.get('sigma_fn_formula', sigma_fn_formula)
#unsampler_type = options.get('unsampler_type', unsampler_type)
if guides is not None:
guide_mode, rescale_floor, latent_guide_weight, latent_guide_weights, t_is, latent_guide, latent_guide_inv, latent_guide_mask, scheduler_, steps_, denoise_ = guides
"""if scheduler == "constant":
latent_guide_weights = initialize_or_scale(latent_guide_weights, latent_guide_weight, max_steps).to(default_dtype)
latent_guide_weights = F.pad(latent_guide_weights, (0, max_steps), value=0.0)"""
if scheduler_ != "constant":
latent_guide_weights = get_sigmas(model, scheduler_, steps_, denoise_).to(default_dtype)
latent_guide_weights = initialize_or_scale(latent_guide_weights, latent_guide_weight, max_steps).to(default_dtype)
latent_guide_weights = F.pad(latent_guide_weights, (0, max_steps), value=0.0)
if shift >= 0:
if isinstance(model.model.model_config, comfy.supported_models.SD3):
model = ModelSamplingSD3().patch(model, shift)[0]
elif isinstance(model.model.model_config, comfy.supported_models.AuraFlow):
model = ModelSamplingAuraFlow().patch_aura(model, shift)[0]
elif isinstance(model.model.model_config, comfy.supported_models.Stable_Cascade_C):
model = ModelSamplingStableCascade().patch(model, shift)[0]
if shift >= 0 and base_shift >= 0:
if isinstance(model.model.model_config, comfy.supported_models.Flux) or isinstance(model.model.model_config, comfy.supported_models.FluxSchnell):
model = ModelSamplingFlux().patch(model, shift, base_shift, latent_image['samples'].shape[3], latent_image['samples'].shape[2])[0]
latent = latent_image
latent_image_dtype = latent_image['samples'].dtype
if positive is None:
positive = [[
torch.zeros((1, 154, 4096)),
{'pooled_output': torch.zeros((1, 2048))}
]]
if negative is None:
negative = [[
torch.zeros((1, 154, 4096)),
{'pooled_output': torch.zeros((1, 2048))}
]]
if denoise_alt < 0:
d_noise = denoise_alt = -denoise_alt
if options is not None:
d_noise = options.get('d_noise', d_noise)
if sigmas is not None:
sigmas = sigmas.clone().to(default_dtype)
else:
sigmas = get_sigmas(model, scheduler, steps, denoise).to(default_dtype)
sigmas *= denoise_alt
if sampler_mode.startswith("unsample"):
null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype)
sigmas = torch.flip(sigmas, dims=[0])
sigmas = torch.cat([sigmas, null])
elif sampler_mode.startswith("resample"):
null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype)
sigmas = torch.cat([null, sigmas])
sigmas = torch.cat([sigmas, null])
if sampler_mode.startswith("unsample_"):
unsampler_type = sampler_mode.split("_", 1)[1]
elif sampler_mode.startswith("resample_"):
unsampler_type = sampler_mode.split("_", 1)[1]
else:
unsampler_type = ""
x = latent_image["samples"].clone().to(default_dtype)
if latent_image is not None:
if "samples_fp64" in latent_image:
if latent_image['samples'].shape == latent_image['samples_fp64'].shape:
if torch.norm(latent_image['samples'] - latent_image['samples_fp64']) < 0.01:
x = latent_image["samples_fp64"].clone()
if latent_noise is not None:
latent_noise["samples"] = latent_noise["samples"].clone().to(default_dtype)
if latent_noise_match is not None:
latent_noise_match["samples"] = latent_noise_match["samples"].clone().to(default_dtype)
if truncate_conditioning == "true" or truncate_conditioning == "true_and_zero_neg":
if positive is not None:
positive[0][0] = positive[0][0].clone().to(default_dtype)
positive[0][1]["pooled_output"] = positive[0][1]["pooled_output"].clone().to(default_dtype)
if negative is not None:
negative[0][0] = negative[0][0].clone().to(default_dtype)
negative[0][1]["pooled_output"] = negative[0][1]["pooled_output"].clone().to(default_dtype)
c = []
for t in positive:
d = t[1].copy()
pooled_output = d.get("pooled_output", None)
if pooled_output is not None:
d["pooled_output"] = d["pooled_output"][:, :2048]
n = [t[0][:, :154, :4096], d]
c.append(n)
positive = c
c = []
for t in negative:
d = t[1].copy()
pooled_output = d.get("pooled_output", None)
if pooled_output is not None:
if truncate_conditioning == "true_and_zero_neg":
d["pooled_output"] = torch.zeros((1,2048), dtype=t[0].dtype, device=t[0].device)
n = [torch.zeros((1,154,4096), dtype=t[0].dtype, device=t[0].device), d]
else:
d["pooled_output"] = d["pooled_output"][:, :2048]
n = [t[0][:, :154, :4096], d]
c.append(n)
negative = c
sigmin = model.model.model_sampling.sigma_min
sigmax = model.model.model_sampling.sigma_max
if noise_type_init == "none":
noise = torch.zeros_like(x)
elif latent_noise is None:
noise_sampler_init = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_type_init)(x=x, seed=seed, sigma_min=sigmin, sigma_max=sigmax)
if noise_type_init == "fractal":
noise_sampler_init.alpha = alpha_init
noise_sampler_init.k = k_init
noise_sampler_init.scale = 0.1
noise = noise_sampler_init(sigma=sigmax, sigma_next=sigmin)
else:
noise = latent_noise["samples"]
if noise_is_latent: #add noise and latent together and normalize --> noise
noise += x.cpu()
noise.sub_(noise.mean()).div_(noise.std())
if noise_normalize and noise.std() > 0:
noise.sub_(noise.mean()).div_(noise.std())
noise *= noise_stdev
noise = (noise - noise.mean()) + noise_mean
if latent_noise_match:
for i in range(latent_noise_match["samples"].shape[1]):
noise[0][i] = (noise[0][i] - noise[0][i].mean())
noise[0][i] = (noise[0][i]) + latent_noise_match["samples"][0][i].mean()
noise_mask = latent["noise_mask"] if "noise_mask" in latent else None
x0_output = {}
callback = latent_preview.prepare_callback(model, sigmas.shape[-1] - 1, x0_output)
disable_pbar = False
if noise_type_sde == "none":
eta_var = eta = 0.0
noise_type_sde = "gaussian"
if noise_mode_sde == "hard_var":
eta_var = eta
eta = 0.0
if cfg < 0:
cfgpp = -cfg
cfg = 1.0
sampler = comfy.samplers.ksampler("legacy_rk", {"eta": eta, "eta_var": eta_var, "s_noise": s_noise, "d_noise": d_noise, "alpha": alpha_sde, "k": k_sde, "c1": c1, "c2": c2, "c3": c3, "cfgpp": cfgpp, "MULTISTEP": multistep,
"noise_sampler_type": noise_type_sde, "noise_mode": noise_mode_sde, "noise_seed": noise_seed_sde, "rk_type": sampler_name, "implicit_sampler_name": implicit_sampler_name,
"exp_mode": exp_mode, "t_fn_formula": t_fn_formula, "sigma_fn_formula": sigma_fn_formula, "implicit_steps": implicit_steps,
"latent_guide": latent_guide, "latent_guide_inv": latent_guide_inv, "mask": latent_guide_mask,
"latent_guide_weights": latent_guide_weights, "guide_mode": guide_mode, #"unsampler_type": unsampler_type,
"LGW_MASK_RESCALE_MIN": rescale_floor, "sigmas_override": sigmas_override})
samples = comfy.sample.sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, x.clone(),
noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed)
out = latent.copy()
out["samples"] = samples
if "x0" in x0_output:
out_denoised = latent.copy()
out_denoised["samples"] = model.model.process_latent_out(x0_output["x0"].cpu())
else:
out_denoised = out
out["samples_fp64"] = out["samples"].clone()
out["samples"] = out["samples"].to(latent_image_dtype)
out_denoised["samples_fp64"] = out_denoised["samples"].clone()
out_denoised["samples"] = out_denoised["samples"].to(latent_image_dtype)
return ( out, out_denoised, )
class Legacy_SamplerRK:
@classmethod
def INPUT_TYPES(s):
return {"required":
{#"momentum": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
"eta": ("FLOAT", {"default": 0.25, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"eta_var": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculate variance-corrected noise amount (overrides eta/noise_mode settings). Cannot be used at very low sigma values; reverts to eta/noise_mode for final steps."}),
"s_noise": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Ratio of calculated noise amount actually added after each step. >1.0 will leave extra noise behind, <1.0 will remove more noise than it adds."}),
"d_noise": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Ratio of calculated noise amount actually added after each step. >1.0 will leave extra noise behind, <1.0 will remove more noise than it adds."}),
"noise_mode": (["hard", "hard_sq", "soft", "softer", "exp"], {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"noise_sampler_type": (NOISE_GENERATOR_NAMES, {"default": "brownian"}),
"alpha": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": False, "tooltip": "Fractal noise mode: <0 = extra high frequency noise, >0 = extra low frequency noise, 0 = white noise."}),
"k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": False, "tooltip": "Fractal noise mode: all that matters is positive vs. negative. Effect unclear."}),
"noise_seed": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff, "tooltip": "Seed for the SDE noise that is added after each step if eta or eta_var are non-zero. If set to -1, it will use the increment the seed most recently used by the workflow."}),
"rk_type": (RK_SAMPLER_NAMES, {"default": "res_2m"}),
"exp_mode": ("BOOLEAN", {"default": False, "tooltip": "Convert linear RK methods to exponential form."}),
"multistep": ("BOOLEAN", {"default": False, "tooltip": "For samplers ending in S only. Reduces cost by one model call per step by reusing the previous step as the current predictor step."}),
"implicit_steps": ("INT", {"default": 0, "min": 0, "max": 100, "step":1, "tooltip": "Number of implicit Runge-Kutta refinement steps to run after each explicit step."}),
"cfgpp": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "CFG++ scale. Use in place of, or with, CFG. Currently only working with RES, DPMPP, and DDIM samplers."}),
"latent_guide_weight": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
#"guide_mode": (["hard_light", "mean_std", "mean", "std", "noise_mean", "blend", "inversion"], {"default": 'mean', "tooltip": "The mode used. noise_mean and inversion are currently for test purposes only."}),
"guide_mode": (["hard_light", "mean_std", "mean", "std", "blend",], {"default": 'mean', "tooltip": "The mode used. noise_mean and inversion are currently for test purposes only."}),
#"guide_mode": (["hard_light", "blend", "mean_std", "mean", "std"], {"default": 'mean', "tooltip": "The mode used."}),
"rescale_floor": ("BOOLEAN", {"default": True, "tooltip": "Latent_guide_weight(s) control the minimum value for the latent_guide_mask. If false, they control the maximum value."}),
},
"optional":
{
"latent_guide": ("LATENT", ),
"latent_guide_inv": ("LATENT", ),
"latent_guide_mask": ("MASK", ),
"latent_guide_weights": ("SIGMAS", ),
"sigmas_override": ("SIGMAS", ),
}
}
RETURN_TYPES = ("SAMPLER",)
CATEGORY = "RES4LYF/legacy/samplers"
FUNCTION = "get_sampler"
DEPRECATED = True
def get_sampler(self, eta=0.25, eta_var=0.0, d_noise=1.0, s_noise=1.0, alpha=-1.0, k=1.0, cfgpp=0.0, multistep=False, noise_sampler_type="brownian", noise_mode="hard", noise_seed=-1, rk_type="dormand-prince",
exp_mode=False, t_fn_formula=None, sigma_fn_formula=None, implicit_steps=0,
latent_guide=None, latent_guide_inv=None, latent_guide_weight=0.0, guide_mode="hard_light", latent_guide_weights=None, latent_guide_mask=None, rescale_floor=True, sigmas_override=None,
):
sampler_name = "legacy_rk"
if latent_guide is None and latent_guide_inv is None:
latent_guide_weight = 0.0
steps = 10000
latent_guide_weights = initialize_or_scale(latent_guide_weights, latent_guide_weight, steps)
latent_guide_weights = F.pad(latent_guide_weights, (0, 10000), value=0.0)
sampler = comfy.samplers.ksampler(sampler_name, {"eta": eta, "eta_var": eta_var, "s_noise": s_noise, "d_noise": d_noise, "alpha": alpha, "k": k, "cfgpp": cfgpp, "MULTISTEP": multistep, "noise_sampler_type": noise_sampler_type, "noise_mode": noise_mode, "noise_seed": noise_seed, "rk_type": rk_type,
"exp_mode": exp_mode, "t_fn_formula": t_fn_formula, "sigma_fn_formula": sigma_fn_formula, "implicit_steps": implicit_steps,
"latent_guide": latent_guide, "latent_guide_inv": latent_guide_inv, "mask": latent_guide_mask, "latent_guide_weight": latent_guide_weight, "latent_guide_weights": latent_guide_weights, "guide_mode": guide_mode,
"LGW_MASK_RESCALE_MIN": rescale_floor, "sigmas_override": sigmas_override})
return (sampler, )
class Legacy_ClownsharKSamplerGuides:
@classmethod
def INPUT_TYPES(s):
return {"required":
{"guide_mode": (["hard_light", "mean_std", "mean", "std", "blend"], {"default": 'blend', "tooltip": "The mode used."}),
"latent_guide_weight": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
"scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"steps": ("INT", {"default": 30, "min": 1, "max": 10000}),
"denoise": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
"rescale_floor": ("BOOLEAN", {"default": False, "tooltip": "If true, latent_guide_weight(s) primarily affect the masked areas. If false, they control the unmasked areas."}),
},
"optional":
{
"latent_guide": ("LATENT", ),
"latent_guide_inv": ("LATENT", ),
"latent_guide_mask": ("MASK", ),
"latent_guide_weights": ("SIGMAS", ),
}
}
RETURN_TYPES = ("GUIDES",)
CATEGORY = "RES4LYF/legacy/samplers"
FUNCTION = "get_sampler"
DEPRECATED = True
def get_sampler(self, model=None, scheduler="constant", steps=30, denoise=1.0, latent_guide=None, latent_guide_inv=None, latent_guide_weight=0.0, guide_mode="blend", latent_guide_weights=None, latent_guide_mask=None, rescale_floor=True, t_is=None,
):
default_dtype = torch.float64
max_steps = 10000
#if scheduler != "constant":
# latent_guide_weights = get_sigmas(model, scheduler, steps, latent_guide_weight).to(default_dtype)
if scheduler == "constant":
latent_guide_weights = initialize_or_scale(None, latent_guide_weight, steps).to(default_dtype)
latent_guide_weights = F.pad(latent_guide_weights, (0, max_steps), value=0.0)
if latent_guide is not None:
x = latent_guide["samples"].clone().to(default_dtype)
if latent_guide_inv is not None:
x = latent_guide_inv["samples"].clone().to(default_dtype)
guides = (guide_mode, rescale_floor, latent_guide_weight, latent_guide_weights, t_is, latent_guide, latent_guide_inv, latent_guide_mask, scheduler, steps, denoise)
return (guides, )
class Legacy_SharkSampler:
@classmethod
def INPUT_TYPES(s):
return {"required":
{"model": ("MODEL",),
"add_noise": ("BOOLEAN", {"default": True}),
"noise_normalize": ("BOOLEAN", {"default": True}),
"noise_stdev": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }),
"noise_mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }),
"noise_is_latent": ("BOOLEAN", {"default": False}),
"noise_type": (NOISE_GENERATOR_NAMES, {"default": "gaussian"}),
"alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": False, }),
"k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": False, }),
"noise_seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}),
"sampler_mode": (['standard', 'unsample', 'resample'],),
#"scheduler": (comfy.samplers.SCHEDULER_NAMES, ),
"scheduler": (get_res4lyf_scheduler_list(),),
"steps": ("INT", {"default": 30, "min": 1, "max": 10000}),
"denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10000, "step":0.01}),
"cfg": ("FLOAT", {"default": 5.0, "min": 0.0, "max": 100.0, "step":0.5, "round": False, }),
"truncate_conditioning": (['false', 'true', 'true_and_zero_neg'], ),
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"sampler": ("SAMPLER", ),
"latent_image": ("LATENT", ),
},
"optional":
{
"sigmas": ("SIGMAS", ),
"latent_noise": ("LATENT", ),
"latent_noise_match": ("LATENT",),
}
}
RETURN_TYPES = ("LATENT","LATENT","LATENT","LATENT")
RETURN_NAMES = ("output", "denoised", "output_fp64", "denoised_fp64")
FUNCTION = "main"
CATEGORY = "RES4LYF/legacy/samplers"
DEPRECATED = True
def main(self, model, add_noise, noise_stdev, noise_mean, noise_normalize, noise_is_latent, noise_type, noise_seed, cfg, truncate_conditioning, alpha, k, positive, negative, sampler,
latent_image, sampler_mode, scheduler, steps, denoise, sigmas=None, latent_noise=None, latent_noise_match=None,):
latent = latent_image
latent_image_dtype = latent_image['samples'].dtype
default_dtype = torch.float64
if positive is None:
positive = [[
torch.zeros((1, 154, 4096)), # blah[0][0], a tensor of shape (1, 154, 4096)
{'pooled_output': torch.zeros((1, 2048))}
]]
if negative is None:
negative = [[
torch.zeros((1, 154, 4096)), # blah[0][0], a tensor of shape (1, 154, 4096)
{'pooled_output': torch.zeros((1, 2048))}
]]
if denoise < 0:
sampler.extra_options['d_noise'] = -denoise
denoise = 1.0
if sigmas is not None:
sigmas = sigmas.clone().to(default_dtype)
else:
sigmas = get_sigmas(model, scheduler, steps, denoise).to(default_dtype)
#sigmas = sigmas.clone().to(torch.float64)
if sampler_mode == "unsample":
null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype)
sigmas = torch.flip(sigmas, dims=[0])
sigmas = torch.cat([sigmas, null])
elif sampler_mode == "resample":
null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype)
sigmas = torch.cat([null, sigmas])
sigmas = torch.cat([sigmas, null])
if latent_image is not None:
x = latent_image["samples"].clone().to(default_dtype)
#x = {"samples": x}
if latent_noise is not None:
latent_noise["samples"] = latent_noise["samples"].clone().to(default_dtype)
if latent_noise_match is not None:
latent_noise_match["samples"] = latent_noise_match["samples"].clone().to(default_dtype)
if truncate_conditioning == "true" or truncate_conditioning == "true_and_zero_neg":
if positive is not None:
positive[0][0] = positive[0][0].clone().to(default_dtype)
positive[0][1]["pooled_output"] = positive[0][1]["pooled_output"].clone().to(default_dtype)
c = []
for t in positive:
d = t[1].copy()
pooled_output = d.get("pooled_output", None)
if pooled_output is not None:
d["pooled_output"] = d["pooled_output"][:, :2048]
n = [t[0][:, :154, :4096], d]
c.append(n)
positive = c
c = []
for t in negative:
if negative is not None:
negative[0][0] = negative[0][0].clone().to(default_dtype)
negative[0][1]["pooled_output"] = negative[0][1]["pooled_output"].clone().to(default_dtype)
d = t[1].copy()
pooled_output = d.get("pooled_output", None)
if pooled_output is not None:
if truncate_conditioning == "true_and_zero_neg":
d["pooled_output"] = torch.zeros((1,2048), dtype=t[0].dtype, device=t[0].device)
n = [torch.zeros((1,154,4096), dtype=t[0].dtype, device=t[0].device), d]
else:
d["pooled_output"] = d["pooled_output"][:, :2048]
n = [t[0][:, :154, :4096], d]
c.append(n)
negative = c
sigmin = model.model.model_sampling.sigma_min
sigmax = model.model.model_sampling.sigma_max
if noise_seed == -1:
seed = torch.initial_seed() + 1
else:
seed = noise_seed
torch.manual_seed(noise_seed)
noise_sampler = NOISE_GENERATOR_CLASSES.get(noise_type)(x=x, seed=seed, sigma_min=sigmin, sigma_max=sigmax)
if noise_type == "fractal":
noise_sampler.alpha = alpha
noise_sampler.k = k
noise_sampler.scale = 0.1
if not add_noise:
noise = torch.zeros_like(x)
elif latent_noise is None:
noise = noise_sampler(sigma=sigmax, sigma_next=sigmin)
else:
noise = latent_noise["samples"]
if noise_is_latent: #add noise and latent together and normalize --> noise
noise += x.cpu()
noise.sub_(noise.mean()).div_(noise.std())
if noise_normalize:
noise.sub_(noise.mean()).div_(noise.std())
noise *= noise_stdev
noise = (noise - noise.mean()) + noise_mean
if latent_noise_match:
for i in range(latent_noise_match["samples"].shape[1]):
noise[0][i] = (noise[0][i] - noise[0][i].mean())
noise[0][i] = (noise[0][i]) + latent_noise_match["samples"][0][i].mean()
noise_mask = latent["noise_mask"] if "noise_mask" in latent else None
x0_output = {}
callback = latent_preview.prepare_callback(model, sigmas.shape[-1] - 1, x0_output)
disable_pbar = False
samples = comfy.sample.sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, x,
noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed)
out = latent.copy()
out["samples"] = samples
if "x0" in x0_output:
out_denoised = latent.copy()
out_denoised["samples"] = model.model.process_latent_out(x0_output["x0"].cpu())
else:
out_denoised = out
out_orig_dtype = out['samples'].clone().to(latent_image_dtype)
out_denoised_orig_dtype = out_denoised['samples'].clone().to(latent_image_dtype)
return ( {'samples': out_orig_dtype}, {'samples': out_denoised_orig_dtype}, out, out_denoised,)
================================================
FILE: legacy/models.py
================================================
# Code adapted from https://github.com/comfyanonymous/ComfyUI/
import comfy.samplers
import comfy.sample
import comfy.sampler_helpers
import comfy.utils
from comfy.cli_args import args
from comfy_extras.nodes_model_advanced import ModelSamplingSD3, ModelSamplingFlux, ModelSamplingAuraFlow, ModelSamplingStableCascade
import torch
import folder_paths
import os
import json
import math
import comfy.model_management
from .flux.model import ReFlux
from .flux.layers import SingleStreamBlock as ReSingleStreamBlock, DoubleStreamBlock as ReDoubleStreamBlock
from comfy.ldm.flux.model import Flux
from comfy.ldm.flux.layers import SingleStreamBlock, DoubleStreamBlock
from .helper import get_orthogonal, get_cosine_similarity
from ..res4lyf import RESplain
class ReFluxPatcher:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"enable": ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
def main(self, model, enable=True):
m = model #.clone()
if enable:
m.model.diffusion_model.__class__ = ReFlux
m.model.diffusion_model.threshold_inv = False
for i, block in enumerate(m.model.diffusion_model.double_blocks):
block.__class__ = ReDoubleStreamBlock
block.idx = i
for i, block in enumerate(m.model.diffusion_model.single_blocks):
block.__class__ = ReSingleStreamBlock
block.idx = i
else:
m.model.diffusion_model.__class__ = Flux
for i, block in enumerate(m.model.diffusion_model.double_blocks):
block.__class__ = DoubleStreamBlock
block.idx = i
for i, block in enumerate(m.model.diffusion_model.single_blocks):
block.__class__ = SingleStreamBlock
block.idx = i
return (m,)
import types
class FluxOrthoCFGPatcher:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"enable": ("BOOLEAN", {"default": True}),
"ortho_T5": ("BOOLEAN", {"default": True}),
"ortho_clip_L": ("BOOLEAN", {"default": True}),
"zero_clip_L": ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
original_forward = Flux.forward
@staticmethod
def new_forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs):
for _ in range(500):
if self.ortho_T5 and get_cosine_similarity(context[0], context[1]) != 0:
context[0] = get_orthogonal(context[0], context[1])
if self.ortho_clip_L and get_cosine_similarity(y[0], y[1]) != 0:
y[0] = get_orthogonal(y[0].unsqueeze(0), y[1].unsqueeze(0)).squeeze(0)
RESplain("postcossim1: ", get_cosine_similarity(context[0], context[1]))
RESplain("postcossim2: ", get_cosine_similarity(y[0], y[1]))
if self.zero_clip_L:
y[0] = torch.zeros_like(y[0])
return FluxOrthoCFGPatcher.original_forward(self, x, timestep, context, y, guidance, control, transformer_options, **kwargs)
def main(self, model, enable=True, ortho_T5=True, ortho_clip_L=True, zero_clip_L=True):
m = model.clone()
if enable:
m.model.diffusion_model.ortho_T5 = ortho_T5
m.model.diffusion_model.ortho_clip_L = ortho_clip_L
m.model.diffusion_model.zero_clip_L = zero_clip_L
Flux.forward = types.MethodType(FluxOrthoCFGPatcher.new_forward, m.model.diffusion_model)
else:
Flux.forward = FluxOrthoCFGPatcher.original_forward
return (m,)
class FluxGuidanceDisable:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"disable": ("BOOLEAN", {"default": True}),
"zero_clip_L": ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
FUNCTION = "main"
CATEGORY = "RES4LYF/model_patches"
original_forward = Flux.forward
@staticmethod
def new_forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs):
y = torch.zeros_like(y)
return FluxGuidanceDisable.original_forward(self, x, timestep, context, y, guidance, control, transformer_options, **kwargs)
def main(self, model, disable=True, zero_clip_L=True):
m = model.clone()
if disable:
m.model.diffusion_model.params.guidance_embed = False
else:
m.model.diffusion_model.params.guidance_embed = True
#m.model.diffusion_model.zero_clip_L = zero_clip_L
if zero_clip_L:
Flux.forward = types.MethodType(FluxGuidanceDisable.new_forward, m.model.diffusion_model)
return (m,)
def time_snr_shift_exponential(alpha, t):
return math.exp(alpha) / (math.exp(alpha) + (1 / t - 1) ** 1.0)
def time_snr_shift_linear(alpha, t):
if alpha == 1.0:
return t
return alpha * t / (1 + (alpha - 1) * t)
class ModelSamplingAdvanced:
# this is used to set the "shift" using either exponential scaling (default for SD3.5M and Flux) or linear scaling (default for SD3.5L and SD3 2B beta)
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"scaling": (["exponential", "linear"], {"default": 'exponential'}),
"shift": ("FLOAT", {"default": 3.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
#"base_shift": ("FLOAT", {"default": 3.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
FUNCTION = "main"
CATEGORY = "RES4LYF/model_shift"
def sigma_exponential(self, timestep):
return time_snr_shift_exponential(self.timestep_shift, timestep / self.multiplier)
def sigma_linear(self, timestep):
return time_snr_shift_linear(self.timestep_shift, timestep / self.multiplier)
def main(self, model, scaling, shift):
m = model.clone()
self.timestep_shift = shift
self.multiplier = 1000
timesteps = 1000
sampling_base = None
if isinstance(m.model.model_config, comfy.supported_models.Flux) or isinstance(m.model.model_config, comfy.supported_models.FluxSchnell):
self.multiplier = 1
timesteps = 10000
sampling_base = comfy.model_sampling.ModelSamplingFlux
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.AuraFlow):
self.multiplier = 1
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.HunyuanVideo):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.CosmosT2V) or isinstance(m.model.model_config, comfy.supported_models.CosmosI2V):
self.multiplier = 1
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingContinuousEDM
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.LTXV):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingFlux
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.SD3):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
if sampling_base is None:
raise ValueError("Model not supported by ModelSamplingAdvanced")
class ModelSamplingAdvanced(sampling_base, sampling_type):
pass
m.object_patches['model_sampling'] = m.model.model_sampling = ModelSamplingAdvanced(m.model.model_config)
m.model.model_sampling.__dict__['shift'] = self.timestep_shift
m.model.model_sampling.__dict__['multiplier'] = self.multiplier
s_range = torch.arange(1, timesteps + 1, 1).to(torch.float64)
if scaling == "exponential":
ts = self.sigma_exponential((s_range / timesteps) * self.multiplier)
elif scaling == "linear":
ts = self.sigma_linear((s_range / timesteps) * self.multiplier)
m.model.model_sampling.register_buffer('sigmas', ts)
m.object_patches['model_sampling'].sigmas = m.model.model_sampling.sigmas
return (m,)
class ModelSamplingAdvancedResolution:
# this is used to set the "shift" using either exponential scaling (default for SD3.5M and Flux) or linear scaling (default for SD3.5L and SD3 2B beta)
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"scaling": (["exponential", "linear"], {"default": 'exponential'}),
"max_shift": ("FLOAT", {"default": 1.35, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
"base_shift": ("FLOAT", {"default": 0.85, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
"latent_image": ("LATENT",),
}
}
RETURN_TYPES = ("MODEL",)
FUNCTION = "main"
CATEGORY = "RES4LYF/model_shift"
def sigma_exponential(self, timestep):
return time_snr_shift_exponential(self.timestep_shift, timestep / self.multiplier)
def sigma_linear(self, timestep):
return time_snr_shift_linear(self.timestep_shift, timestep / self.multiplier)
def main(self, model, scaling, max_shift, base_shift, latent_image):
m = model.clone()
height, width = latent_image['samples'].shape[2:]
x1 = 256
x2 = 4096
mm = (max_shift - base_shift) / (x2 - x1)
b = base_shift - mm * x1
shift = (width * height / (8 * 8 * 2 * 2)) * mm + b
self.timestep_shift = shift
self.multiplier = 1000
timesteps = 1000
if isinstance(m.model.model_config, comfy.supported_models.Flux) or isinstance(m.model.model_config, comfy.supported_models.FluxSchnell):
self.multiplier = 1
timesteps = 10000
sampling_base = comfy.model_sampling.ModelSamplingFlux
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.AuraFlow):
self.multiplier = 1
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.SD3):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
class ModelSamplingAdvanced(sampling_base, sampling_type):
pass
m.object_patches['model_sampling'] = m.model.model_sampling = ModelSamplingAdvanced(m.model.model_config)
m.model.model_sampling.__dict__['shift'] = self.timestep_shift
m.model.model_sampling.__dict__['multiplier'] = self.multiplier
s_range = torch.arange(1, timesteps + 1, 1).to(torch.float64)
if scaling == "exponential":
ts = self.sigma_exponential((s_range / timesteps) * self.multiplier)
elif scaling == "linear":
ts = self.sigma_linear((s_range / timesteps) * self.multiplier)
m.model.model_sampling.register_buffer('sigmas', ts)
m.object_patches['model_sampling'].sigmas = m.model.model_sampling.sigmas
return (m,)
class UNetSave:
def __init__(self):
self.output_dir = folder_paths.get_output_directory()
@classmethod
def INPUT_TYPES(s):
return {"required": { "model": ("MODEL",),
"filename_prefix": ("STRING", {"default": "models/ComfyUI"}),},
"hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},}
RETURN_TYPES = ()
FUNCTION = "save"
OUTPUT_NODE = True
CATEGORY = "RES4LYF/model_merging"
DESCRIPTION = "Save a .safetensors containing only the model data."
def save(self, model, filename_prefix, prompt=None, extra_pnginfo=None):
save_checkpoint(model, clip=None, vae=None, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo)
return {}
def save_checkpoint(model, clip=None, vae=None, clip_vision=None, filename_prefix=None, output_dir=None, prompt=None, extra_pnginfo=None):
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, output_dir)
prompt_info = ""
if prompt is not None:
prompt_info = json.dumps(prompt)
metadata = {}
enable_modelspec = True
if isinstance(model.model, comfy.model_base.SDXL):
if isinstance(model.model, comfy.model_base.SDXL_instructpix2pix):
metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-edit"
else:
metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-base"
elif isinstance(model.model, comfy.model_base.SDXLRefiner):
metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-refiner"
elif isinstance(model.model, comfy.model_base.SVD_img2vid):
metadata["modelspec.architecture"] = "stable-video-diffusion-img2vid-v1"
elif isinstance(model.model, comfy.model_base.SD3):
metadata["modelspec.architecture"] = "stable-diffusion-v3-medium" #TODO: other SD3 variants
else:
enable_modelspec = False
if enable_modelspec:
metadata["modelspec.sai_model_spec"] = "1.0.0"
metadata["modelspec.implementation"] = "sgm"
metadata["modelspec.title"] = "{} {}".format(filename, counter)
#TODO:
# "stable-diffusion-v1", "stable-diffusion-v1-inpainting", "stable-diffusion-v2-512",
# "stable-diffusion-v2-768-v", "stable-diffusion-v2-unclip-l", "stable-diffusion-v2-unclip-h",
# "v2-inpainting"
extra_keys = {}
model_sampling = model.get_model_object("model_sampling")
if isinstance(model_sampling, comfy.model_sampling.ModelSamplingContinuousEDM):
if isinstance(model_sampling, comfy.model_sampling.V_PREDICTION):
extra_keys["edm_vpred.sigma_max"] = torch.tensor(model_sampling.sigma_max).float()
extra_keys["edm_vpred.sigma_min"] = torch.tensor(model_sampling.sigma_min).float()
if model.model.model_type == comfy.model_base.ModelType.EPS:
metadata["modelspec.predict_key"] = "epsilon"
elif model.model.model_type == comfy.model_base.ModelType.V_PREDICTION:
metadata["modelspec.predict_key"] = "v"
if not args.disable_metadata:
metadata["prompt"] = prompt_info
if extra_pnginfo is not None:
for x in extra_pnginfo:
metadata[x] = json.dumps(extra_pnginfo[x])
output_checkpoint = f"{filename}_{counter:05}_.safetensors"
output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
sd_save_checkpoint(output_checkpoint, model, clip, vae, clip_vision, metadata=metadata, extra_keys=extra_keys)
def sd_save_checkpoint(output_path, model, clip=None, vae=None, clip_vision=None, metadata=None, extra_keys={}):
clip_sd = None
load_models = [model]
if clip is not None:
load_models.append(clip.load_model())
clip_sd = clip.get_sd()
comfy.model_management.load_models_gpu(load_models, force_patch_weights=True)
clip_vision_sd = clip_vision.get_sd() if clip_vision is not None else None
vae_sd = vae.get_sd() if vae is not None else None #THIS ALLOWS SAVING UNET ONLY
sd = model.model.state_dict_for_saving(clip_sd, vae_sd, clip_vision_sd)
for k in extra_keys:
sd[k] = extra_keys[k]
for k in sd:
t = sd[k]
if not t.is_contiguous():
sd[k] = t.contiguous()
comfy.utils.save_torch_file(sd, output_path, metadata=metadata)
class TorchCompileModelFluxAdvanced: #adapted from https://github.com/kijai/ComfyUI-KJNodes
def __init__(self):
self._compiled = False
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"backend": (["inductor", "cudagraphs"],),
"fullgraph": ("BOOLEAN", {"default": False, "tooltip": "Enable full graph mode"}),
"mode": (["default", "max-autotune", "max-autotune-no-cudagraphs", "reduce-overhead"], {"default": "default"}),
"double_blocks": ("STRING", {"default": "0-18", "multiline": True}),
"single_blocks": ("STRING", {"default": "0-37", "multiline": True}),
"dynamic": ("BOOLEAN", {"default": False, "tooltip": "Enable dynamic mode"}),
}}
RETURN_TYPES = ("MODEL",)
FUNCTION = "patch"
CATEGORY = "RES4LYF/model_patches"
EXPERIMENTAL = True
def parse_blocks(self, blocks_str):
blocks = []
for part in blocks_str.split(','):
part = part.strip()
if '-' in part:
start, end = map(int, part.split('-'))
blocks.extend(range(start, end + 1))
else:
blocks.append(int(part))
return blocks
def patch(self, model, backend, mode, fullgraph, single_blocks, double_blocks, dynamic):
single_block_list = self.parse_blocks(single_blocks)
double_block_list = self.parse_blocks(double_blocks)
m = model.clone()
diffusion_model = m.get_model_object("diffusion_model")
if not self._compiled:
try:
for i, block in enumerate(diffusion_model.double_blocks):
if i in double_block_list:
#print("Compiling double_block", i)
m.add_object_patch(f"diffusion_model.double_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
for i, block in enumerate(diffusion_model.single_blocks):
if i in single_block_list:
#print("Compiling single block", i)
m.add_object_patch(f"diffusion_model.single_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
compile_settings = {
"backend": backend,
"mode": mode,
"fullgraph": fullgraph,
"dynamic": dynamic,
}
setattr(m.model, "compile_settings", compile_settings)
except:
raise RuntimeError("Failed to compile model")
return (m, )
# rest of the layers that are not patched
# diffusion_model.final_layer = torch.compile(diffusion_model.final_layer, mode=mode, fullgraph=fullgraph, backend=backend)
# diffusion_model.guidance_in = torch.compile(diffusion_model.guidance_in, mode=mode, fullgraph=fullgraph, backend=backend)
# diffusion_model.img_in = torch.compile(diffusion_model.img_in, mode=mode, fullgraph=fullgraph, backend=backend)
# diffusion_model.time_in = torch.compile(diffusion_model.time_in, mode=mode, fullgraph=fullgraph, backend=backend)
# diffusion_model.txt_in = torch.compile(diffusion_model.txt_in, mode=mode, fullgraph=fullgraph, backend=backend)
# diffusion_model.vector_in = torch.compile(diffusion_model.vector_in, mode=mode, fullgraph=fullgraph, backend=backend)
================================================
FILE: legacy/noise_classes.py
================================================
import torch
from torch import nn, Tensor, Generator, lerp
from torch.nn.functional import unfold
import torch.nn.functional as F
from typing import Callable, Tuple
from math import pi
from comfy.k_diffusion.sampling import BrownianTreeNoiseSampler
from torch.distributions import StudentT, Laplace
import numpy as np
import pywt
import functools
from ..res4lyf import RESplain
# Set this to "True" if you have installed OpenSimplex. Recommended to install without dependencies due to conflicting packages: pip3 install opensimplex --no-deps
OPENSIMPLEX_ENABLE = False
if OPENSIMPLEX_ENABLE:
from opensimplex import OpenSimplex
class PrecisionTool:
def __init__(self, cast_type='fp64'):
self.cast_type = cast_type
def cast_tensor(self, func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if self.cast_type not in ['fp64', 'fp32', 'fp16']:
return func(*args, **kwargs)
target_device = None
for arg in args:
if torch.is_tensor(arg):
target_device = arg.device
break
if target_device is None:
for v in kwargs.values():
if torch.is_tensor(v):
target_device = v.device
break
# recursively zs_recast tensors in nested dictionaries
def cast_and_move_to_device(data):
if torch.is_tensor(data):
if self.cast_type == 'fp64':
return data.to(torch.float64).to(target_device)
elif self.cast_type == 'fp32':
return data.to(torch.float32).to(target_device)
elif self.cast_type == 'fp16':
return data.to(torch.float16).to(target_device)
elif isinstance(data, dict):
return {k: cast_and_move_to_device(v) for k, v in data.items()}
return data
new_args = [cast_and_move_to_device(arg) for arg in args]
new_kwargs = {k: cast_and_move_to_device(v) for k, v in kwargs.items()}
return func(*new_args, **new_kwargs)
return wrapper
def set_cast_type(self, new_value):
if new_value in ['fp64', 'fp32', 'fp16']:
self.cast_type = new_value
else:
self.cast_type = 'fp64'
precision_tool = PrecisionTool(cast_type='fp64')
def noise_generator_factory(cls, **fixed_params):
def create_instance(**kwargs):
params = {**fixed_params, **kwargs}
return cls(**params)
return create_instance
def like(x):
return {'size': x.shape, 'dtype': x.dtype, 'layout': x.layout, 'device': x.device}
def scale_to_range(x, scaled_min = -1.73, scaled_max = 1.73): #1.73 is roughly the square root of 3
return scaled_min + (x - x.min()) * (scaled_max - scaled_min) / (x.max() - x.min())
def normalize(x):
return (x - x.mean())/ x.std()
class NoiseGenerator:
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None):
self.seed = seed
if x is not None:
self.x = x
self.size = x.shape
self.dtype = x.dtype
self.layout = x.layout
self.device = x.device
else:
self.x = torch.zeros(size, dtype, layout, device)
# allow overriding parameters imported from latent 'x' if specified
if size is not None:
self.size = size
if dtype is not None:
self.dtype = dtype
if layout is not None:
self.layout = layout
if device is not None:
self.device = device
self.sigma_max = sigma_max.to(device) if isinstance(sigma_max, torch.Tensor) else sigma_max
self.sigma_min = sigma_min.to(device) if isinstance(sigma_min, torch.Tensor) else sigma_min
self.last_seed = seed
if generator is None:
self.generator = torch.Generator(device=self.device).manual_seed(seed)
else:
self.generator = generator
def __call__(self):
raise NotImplementedError("This method got clownsharked!")
def update(self, **kwargs):
if not isinstance(self, BrownianNoiseGenerator):
self.last_seed += 1
updated_values = []
for attribute_name, value in kwargs.items():
if value is not None:
setattr(self, attribute_name, value)
updated_values.append(getattr(self, attribute_name))
return tuple(updated_values)
class BrownianNoiseGenerator(NoiseGenerator):
def __call__(self, *, sigma=None, sigma_next=None, **kwargs):
return BrownianTreeNoiseSampler(self.x, self.sigma_min, self.sigma_max, seed=self.seed, cpu = self.device.type=='cpu')(sigma, sigma_next)
class FractalNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
alpha=0.0, k=1.0, scale=0.1):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(alpha=alpha, k=k, scale=scale)
def __call__(self, *, alpha=None, k=None, scale=None, **kwargs):
self.update(alpha=alpha, k=k, scale=scale)
if len(self.size) == 5:
b, c, t, h, w = self.size
else:
b, c, h, w = self.size
noise = torch.normal(mean=0.0, std=1.0, size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
y_freq = torch.fft.fftfreq(h, 1/h, device=self.device)
x_freq = torch.fft.fftfreq(w, 1/w, device=self.device)
if len(self.size) == 5:
t_freq = torch.fft.fftfreq(t, 1/t, device=self.device)
freq = torch.sqrt(y_freq[:, None, None]**2 + x_freq[None, :, None]**2 + t_freq[None, None, :]**2).clamp(min=1e-10)
else:
freq = torch.sqrt(y_freq[:, None]**2 + x_freq[None, :]**2).clamp(min=1e-10)
spectral_density = self.k / torch.pow(freq, self.alpha * self.scale)
spectral_density[0, 0] = 0
noise_fft = torch.fft.fftn(noise)
modified_fft = noise_fft * spectral_density
noise = torch.fft.ifftn(modified_fft).real
return noise / torch.std(noise)
class SimplexNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
scale=0.01):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.noise = OpenSimplex(seed=seed)
self.scale = scale
def __call__(self, *, scale=None, **kwargs):
self.update(scale=scale)
if len(self.size) == 5:
b, c, t, h, w = self.size
else:
b, c, h, w = self.size
noise_array = self.noise.noise3array(np.arange(w),np.arange(h),np.arange(c))
self.noise = OpenSimplex(seed=self.noise.get_seed()+1)
noise_tensor = torch.from_numpy(noise_array).to(self.device)
noise_tensor = torch.unsqueeze(noise_tensor, dim=0)
if len(self.size) == 5:
noise_tensor = torch.unsqueeze(noise_tensor, dim=0)
return noise_tensor / noise_tensor.std()
#return normalize(scale_to_range(noise_tensor))
class HiresPyramidNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
discount=0.7, mode='nearest-exact'):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(discount=discount, mode=mode)
def __call__(self, *, discount=None, mode=None, **kwargs):
self.update(discount=discount, mode=mode)
if len(self.size) == 5:
b, c, t, h, w = self.size
orig_h, orig_w, orig_t = h, w, t
u = nn.Upsample(size=(orig_h, orig_w, orig_t), mode=self.mode).to(self.device)
else:
b, c, h, w = self.size
orig_h, orig_w = h, w
orig_t = t = 1
u = nn.Upsample(size=(orig_h, orig_w), mode=self.mode).to(self.device)
noise = ((torch.rand(size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) - 0.5) * 2 * 1.73)
for i in range(4):
r = torch.rand(1, device=self.device, generator=self.generator).item() * 2 + 2
h, w = min(orig_h * 15, int(h * (r ** i))), min(orig_w * 15, int(w * (r ** i)))
if len(self.size) == 5:
t = min(orig_t * 15, int(t * (r ** i)))
new_noise = torch.randn((b, c, t, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
else:
new_noise = torch.randn((b, c, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
upsampled_noise = u(new_noise)
noise += upsampled_noise * self.discount ** i
if h >= orig_h * 15 or w >= orig_w * 15 or t >= orig_t * 15:
break # if resolution is too high
return noise / noise.std()
class PyramidNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
discount=0.8, mode='nearest-exact'):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(discount=discount, mode=mode)
def __call__(self, *, discount=None, mode=None, **kwargs):
self.update(discount=discount, mode=mode)
x = torch.zeros(self.size, dtype=self.dtype, layout=self.layout, device=self.device)
if len(self.size) == 5:
b, c, t, h, w = self.size
orig_h, orig_w, orig_t = h, w, t
else:
b, c, h, w = self.size
orig_h, orig_w = h, w
r = 1
for i in range(5):
r *= 2
if len(self.size) == 5:
scaledSize = (b, c, t * r, h * r, w * r)
origSize = (orig_h, orig_w, orig_t)
else:
scaledSize = (b, c, h * r, w * r)
origSize = (orig_h, orig_w)
x += torch.nn.functional.interpolate(
torch.normal(mean=0, std=0.5 ** i, size=scaledSize, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator),
size=origSize, mode=self.mode
) * self.discount ** i
return x / x.std()
class InterpolatedPyramidNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
discount=0.7, mode='nearest-exact'):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(discount=discount, mode=mode)
def __call__(self, *, discount=None, mode=None, **kwargs):
self.update(discount=discount, mode=mode)
if len(self.size) == 5:
b, c, t, h, w = self.size
orig_t, orig_h, orig_w = t, h, w
else:
b, c, h, w = self.size
orig_h, orig_w = h, w
t = orig_t = 1
noise = ((torch.rand(size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) - 0.5) * 2 * 1.73)
multipliers = [1]
for i in range(4):
r = torch.rand(1, device=self.device, generator=self.generator).item() * 2 + 2
h, w = min(orig_h * 15, int(h * (r ** i))), min(orig_w * 15, int(w * (r ** i)))
if len(self.size) == 5:
t = min(orig_t * 15, int(t * (r ** i)))
new_noise = torch.randn((b, c, t, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
upsampled_noise = nn.functional.interpolate(new_noise, size=(orig_t, orig_h, orig_w), mode=self.mode)
else:
new_noise = torch.randn((b, c, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
upsampled_noise = nn.functional.interpolate(new_noise, size=(orig_h, orig_w), mode=self.mode)
noise += upsampled_noise * self.discount ** i
multipliers.append(self.discount ** i)
if h >= orig_h * 15 or w >= orig_w * 15 or (len(self.size) == 5 and t >= orig_t * 15):
break # if resolution is too high
noise = noise / sum([m ** 2 for m in multipliers]) ** 0.5
return noise / noise.std()
class CascadeBPyramidNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
levels=10, mode='nearest', size_range=[1,16]):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(epsilon=x, levels=levels, mode=mode, size_range=size_range)
def __call__(self, *, levels=10, mode='nearest', size_range=[1,16], **kwargs):
self.update(levels=levels, mode=mode)
b, c, h, w = self.size
epsilon = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
multipliers = [1]
for i in range(1, levels):
m = 0.75 ** i
h, w = int(epsilon.size(-2) // (2 ** i)), int(epsilon.size(-2) // (2 ** i))
if size_range is None or (size_range[0] <= h <= size_range[1] or size_range[0] <= w <= size_range[1]):
offset = torch.randn(epsilon.size(0), epsilon.size(1), h, w, device=self.device, generator=self.generator)
epsilon = epsilon + torch.nn.functional.interpolate(offset, size=epsilon.shape[-2:], mode=self.mode) * m
multipliers.append(m)
if h <= 1 or w <= 1:
break
epsilon = epsilon / sum([m ** 2 for m in multipliers]) ** 0.5 #divides the epsilon tensor by the square root of the sum of the squared multipliers.
return epsilon
class UniformNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
mean=0.0, scale=1.73):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(mean=mean, scale=scale)
def __call__(self, *, mean=None, scale=None, **kwargs):
self.update(mean=mean, scale=scale)
noise = torch.rand(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
return self.scale * 2 * (noise - 0.5) + self.mean
class GaussianNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
mean=0.0, std=1.0):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(mean=mean, std=std)
def __call__(self, *, mean=None, std=None, **kwargs):
self.update(mean=mean, std=std)
noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
return (noise - noise.mean()) / noise.std()
class GaussianBackwardsNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
mean=0.0, std=1.0):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(mean=mean, std=std)
def __call__(self, *, mean=None, std=None, **kwargs):
self.update(mean=mean, std=std)
RESplain("GaussianBackwards last seed:", self.generator.initial_seed())
self.generator.manual_seed(self.generator.initial_seed() - 1)
noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator)
return (noise - noise.mean()) / noise.std()
class LaplacianNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
loc=0, scale=1.0):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(loc=loc, scale=scale)
def __call__(self, *, loc=None, scale=None, **kwargs):
self.update(loc=loc, scale=scale)
# b, c, h, w = self.size
# orig_h, orig_w = h, w
noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 4.0
rng_state = torch.random.get_rng_state()
torch.manual_seed(self.generator.initial_seed())
laplacian_noise = Laplace(loc=self.loc, scale=self.scale).rsample(self.size).to(self.device)
self.generator.manual_seed(self.generator.initial_seed() + 1)
torch.random.set_rng_state(rng_state)
noise += laplacian_noise
return noise / noise.std()
class StudentTNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
loc=0, scale=0.2, df=1):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(loc=loc, scale=scale, df=df)
def __call__(self, *, loc=None, scale=None, df=None, **kwargs):
self.update(loc=loc, scale=scale, df=df)
# b, c, h, w = self.size
# orig_h, orig_w = h, w
rng_state = torch.random.get_rng_state()
torch.manual_seed(self.generator.initial_seed())
noise = StudentT(loc=self.loc, scale=self.scale, df=self.df).rsample(self.size)
s = torch.quantile(noise.flatten(start_dim=1).abs(), 0.75, dim=-1)
if len(self.size) == 5:
s = s.reshape(*s.shape, 1, 1, 1, 1)
else:
s = s.reshape(*s.shape, 1, 1, 1)
noise = noise.clamp(-s, s)
noise_latent = torch.copysign(torch.pow(torch.abs(noise), 0.5), noise).to(self.device)
self.generator.manual_seed(self.generator.initial_seed() + 1)
torch.random.set_rng_state(rng_state)
return (noise_latent - noise_latent.mean()) / noise_latent.std()
class WaveletNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
wavelet='haar'):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(wavelet=wavelet)
def __call__(self, *, wavelet=None, **kwargs):
self.update(wavelet=wavelet)
# b, c, h, w = self.size
# orig_h, orig_w = h, w
# noise for spatial dimensions only
coeffs = pywt.wavedecn(torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator).to(self.device), wavelet=self.wavelet, mode='periodization')
noise = pywt.waverecn(coeffs, wavelet=self.wavelet, mode='periodization')
noise_tensor = torch.tensor(noise, dtype=self.dtype, device=self.device)
noise_tensor = (noise_tensor - noise_tensor.mean()) / noise_tensor.std()
return noise_tensor
class PerlinNoiseGenerator(NoiseGenerator):
def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None,
detail=0.0):
super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max)
self.update(detail=detail)
@staticmethod
def get_positions(block_shape: Tuple[int, int]) -> Tensor:
bh, bw = block_shape
positions = torch.stack(
torch.meshgrid(
[(torch.arange(b) + 0.5) / b for b in (bw, bh)],
indexing="xy",
),
-1,
).view(1, bh, bw, 1, 1, 2)
return positions
@staticmethod
def unfold_grid(vectors: Tensor) -> Tensor:
batch_size, _, gpy, gpx = vectors.shape
return (
unfold(vectors, (2, 2))
.view(batch_size, 2, 4, -1)
.permute(0, 2, 3, 1)
.view(batch_size, 4, gpy - 1, gpx - 1, 2)
)
@staticmethod
def smooth_step(t: Tensor) -> Tensor:
return t * t * (3.0 - 2.0 * t)
@staticmethod
def perlin_noise_tensor(
self,
vectors: Tensor, positions: Tensor, step: Callable = None
) -> Tensor:
if step is None:
step = self.smooth_step
batch_size = vectors.shape[0]
# grid height, grid width
gh, gw = vectors.shape[2:4]
# block height, block width
bh, bw = positions.shape[1:3]
for i in range(2):
if positions.shape[i + 3] not in (1, vectors.shape[i + 2]):
raise Exception(
f"Blocks shapes do not match: vectors ({vectors.shape[1]}, {vectors.shape[2]}), positions {gh}, {gw})"
)
if positions.shape[0] not in (1, batch_size):
raise Exception(
f"Batch sizes do not match: vectors ({vectors.shape[0]}), positions ({positions.shape[0]})"
)
vectors = vectors.view(batch_size, 4, 1, gh * gw, 2)
positions = positions.view(positions.shape[0], bh * bw, -1, 2)
step_x = step(positions[..., 0])
step_y = step(positions[..., 1])
row0 = lerp(
(vectors[:, 0] * positions).sum(dim=-1),
(vectors[:, 1] * (positions - positions.new_tensor((1, 0)))).sum(dim=-1),
step_x,
)
row1 = lerp(
(vectors[:, 2] * (positions - positions.new_tensor((0, 1)))).sum(dim=-1),
(vectors[:, 3] * (positions - positions.new_tensor((1, 1)))).sum(dim=-1),
step_x,
)
noise = lerp(row0, row1, step_y)
return (
noise.view(
batch_size,
bh,
bw,
gh,
gw,
)
.permute(0, 3, 1, 4, 2)
.reshape(batch_size, gh * bh, gw * bw)
)
def perlin_noise(
self,
grid_shape: Tuple[int, int],
out_shape: Tuple[int, int],
batch_size: int = 1,
generator: Generator = None,
*args,
**kwargs,
) -> Tensor:
gh, gw = grid_shape # grid height and width
oh, ow = out_shape # output height and width
bh, bw = oh // gh, ow // gw # block height and width
if oh != bh * gh:
raise Exception(f"Output height {oh} must be divisible by grid height {gh}")
if ow != bw * gw != 0:
raise Exception(f"Output width {ow} must be divisible by grid width {gw}")
angle = torch.empty(
[batch_size] + [s + 1 for s in grid_shape], device=self.device, *args, **kwargs
).uniform_(to=2.0 * pi, generator=self.generator)
# random vectors on grid points
vectors = self.unfold_grid(torch.stack((torch.cos(angle), torch.sin(angle)), dim=1))
# positions inside grid cells [0, 1)
positions = self.get_positions((bh, bw)).to(vectors)
return self.perlin_noise_tensor(self, vectors, positions).squeeze(0)
def __call__(self, *, detail=None, **kwargs):
self.update(detail=detail) #currently unused
if len(self.size) == 5:
b, c, t, h, w = self.size
noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 2.0
for tt in range(t):
for i in range(2):
perlin_slice = self.perlin_noise((h, w), (h, w), batch_size=c, generator=self.generator).to(self.device)
perlin_expanded = perlin_slice.unsqueeze(0).unsqueeze(2)
time_slice = noise[:, :, tt:tt+1, :, :]
noise[:, :, tt:tt+1, :, :] += perlin_expanded
else:
b, c, h, w = self.size
#orig_h, orig_w = h, w
noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 2.0
for i in range(2):
noise += self.perlin_noise((h, w), (h, w), batch_size=c, generator=self.generator).to(self.device)
return noise / noise.std()
from functools import partial
NOISE_GENERATOR_CLASSES = {
"fractal": FractalNoiseGenerator,
"gaussian": GaussianNoiseGenerator,
"gaussian_backwards": GaussianBackwardsNoiseGenerator,
"uniform": UniformNoiseGenerator,
"pyramid-cascade_B": CascadeBPyramidNoiseGenerator,
"pyramid-interpolated": InterpolatedPyramidNoiseGenerator,
"pyramid-bilinear": noise_generator_factory(PyramidNoiseGenerator, mode='bilinear'),
"pyramid-bicubic": noise_generator_factory(PyramidNoiseGenerator, mode='bicubic'),
"pyramid-nearest": noise_generator_factory(PyramidNoiseGenerator, mode='nearest'),
"hires-pyramid-bilinear": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bilinear'),
"hires-pyramid-bicubic": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bicubic'),
"hires-pyramid-nearest": noise_generator_factory(HiresPyramidNoiseGenerator, mode='nearest'),
"brownian": BrownianNoiseGenerator,
"laplacian": LaplacianNoiseGenerator,
"studentt": StudentTNoiseGenerator,
"wavelet": WaveletNoiseGenerator,
"perlin": PerlinNoiseGenerator,
}
NOISE_GENERATOR_CLASSES_SIMPLE = {
"none": GaussianNoiseGenerator,
"brownian": BrownianNoiseGenerator,
"gaussian": GaussianNoiseGenerator,
"gaussian_backwards": GaussianBackwardsNoiseGenerator,
"laplacian": LaplacianNoiseGenerator,
"perlin": PerlinNoiseGenerator,
"studentt": StudentTNoiseGenerator,
"uniform": UniformNoiseGenerator,
"wavelet": WaveletNoiseGenerator,
"brown": noise_generator_factory(FractalNoiseGenerator, alpha=2.0),
"pink": noise_generator_factory(FractalNoiseGenerator, alpha=1.0),
"white": noise_generator_factory(FractalNoiseGenerator, alpha=0.0),
"blue": noise_generator_factory(FractalNoiseGenerator, alpha=-1.0),
"violet": noise_generator_factory(FractalNoiseGenerator, alpha=-2.0),
"hires-pyramid-bicubic": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bicubic'),
"hires-pyramid-bilinear": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bilinear'),
"hires-pyramid-nearest": noise_generator_factory(HiresPyramidNoiseGenerator, mode='nearest'),
"pyramid-bicubic": noise_generator_factory(PyramidNoiseGenerator, mode='bicubic'),
"pyramid-bilinear": noise_generator_factory(PyramidNoiseGenerator, mode='bilinear'),
"pyramid-nearest": noise_generator_factory(PyramidNoiseGenerator, mode='nearest'),
"pyramid-interpolated": InterpolatedPyramidNoiseGenerator,
"pyramid-cascade_B": CascadeBPyramidNoiseGenerator,
}
if OPENSIMPLEX_ENABLE:
NOISE_GENERATOR_CLASSES.update({
"simplex": SimplexNoiseGenerator,
})
NOISE_GENERATOR_NAMES = tuple(NOISE_GENERATOR_CLASSES.keys())
NOISE_GENERATOR_NAMES_SIMPLE = tuple(NOISE_GENERATOR_CLASSES_SIMPLE.keys())
@precision_tool.cast_tensor
def prepare_noise(latent_image, seed, noise_type, noise_inds=None, alpha=1.0, k=1.0): # adapted from comfy/sample.py: https://github.com/comfyanonymous/ComfyUI
#optional arg skip can be used to skip and discard x number of noise generations for a given seed
noise_func = NOISE_GENERATOR_CLASSES.get(noise_type)(x=latent_image, seed=seed, sigma_min=0.0291675, sigma_max=14.614642)
if noise_type == "fractal":
noise_func.alpha = alpha
noise_func.k = k
# from here until return is very similar to comfy/sample.py
if noise_inds is None:
return noise_func(sigma=14.614642, sigma_next=0.0291675)
unique_inds, inverse = np.unique(noise_inds, return_inverse=True)
noises = []
for i in range(unique_inds[-1]+1):
noise = noise_func(size = [1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, device=latent_image.device)
if i in unique_inds:
noises.append(noise)
noises = [noises[i] for i in inverse]
noises = torch.cat(noises, axis=0)
return noises
================================================
FILE: legacy/noise_sigmas_timesteps_scaling.py
================================================
import torch
#from..noise_classes import *
import comfy.model_patcher
from .helper import has_nested_attr
def get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max=1.0):
if sigma_up >= sigma_next and sigma_next > 0:
print("Maximum VPSDE noise level exceeded: falling back to hard noise mode.")
# Values below are the theoretical max, but break with exponential integrator stepsize calcs:
#sigma_up = sigma_next
#alpha_ratio = sigma_max - sigma_next
#sigma_down = 0 * sigma_next
#return alpha_ratio, sigma_up, sigma_down
if eta >= 1:
sigma_up = sigma_next * 0.9999 #avoid sqrt(neg_num) later
else:
sigma_up = sigma_next * eta
sigma_signal = sigma_max - sigma_next
sigma_residual = torch.sqrt(sigma_next**2 - sigma_up**2)
alpha_ratio = sigma_signal + sigma_residual
sigma_down = sigma_residual / alpha_ratio
return alpha_ratio, sigma_up, sigma_down
def get_alpha_ratio_from_sigma_down(sigma_down, sigma_next, eta, sigma_max=1.0):
alpha_ratio = (1 - sigma_next) / (1 - sigma_down)
sigma_up = (sigma_next ** 2 - sigma_down ** 2 * alpha_ratio ** 2) ** 0.5
if sigma_up >= sigma_next: # "clamp" noise level to max if max exceeded
alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max)
return alpha_ratio, sigma_up, sigma_down
def get_ancestral_step_RF_var(sigma, sigma_next, eta, sigma_max=1.0):
dtype = sigma.dtype #calculate variance adjusted sigma up... sigma_up = sqrt(dt)
sigma, sigma_next = sigma.to(torch.float64), sigma_next.to(torch.float64) # float64 is very important to avoid numerical precision issues
sigma_diff = (sigma - sigma_next).abs() + 1e-10
sigma_up = torch.sqrt(sigma_diff).to(torch.float64) * eta
sigma_down_num = (sigma_next**2 - sigma_up**2).to(torch.float64)
sigma_down = torch.sqrt(sigma_down_num) / ((1 - sigma_next).to(torch.float64) + torch.sqrt(sigma_down_num).to(torch.float64))
alpha_ratio = (1 - sigma_next).to(torch.float64) / (1 - sigma_down).to(torch.float64)
return sigma_up.to(dtype), sigma_down.to(dtype), alpha_ratio.to(dtype)
def get_ancestral_step_RF_lorentzian(sigma, sigma_next, eta, sigma_max=1.0):
dtype = sigma.dtype
alpha = 1 / ((sigma.to(torch.float64))**2 + 1)
sigma_up = eta * (1 - alpha) ** 0.5
alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max)
return sigma_up.to(dtype), sigma_down.to(dtype), alpha_ratio.to(dtype)
def get_ancestral_step_EPS(sigma, sigma_next, eta=1.):
# Calculates the noise level (sigma_down) to step down to and the amount of noise to add (sigma_up) when doing an ancestral sampling step.
alpha_ratio = torch.full_like(sigma, 1.0)
if not eta or not sigma_next:
return torch.full_like(sigma, 0.0), sigma_next, alpha_ratio
sigma_up = min(sigma_next, eta * (sigma_next ** 2 * (sigma ** 2 - sigma_next ** 2) / sigma ** 2) ** 0.5)
sigma_down = (sigma_next ** 2 - sigma_up ** 2) ** 0.5
return sigma_up, sigma_down, alpha_ratio
def get_ancestral_step_RF_sinusoidal(sigma_next, eta, sigma_max=1.0):
sigma_up = eta * sigma_next * torch.sin(torch.pi * sigma_next) ** 2
alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max)
return sigma_up, sigma_down, alpha_ratio
def get_ancestral_step_RF_softer(sigma, sigma_next, eta, sigma_max=1.0):
# math adapted from get_ancestral_step_EPS to work with RF
sigma_down = sigma_next * torch.sqrt(1 - (eta**2 * (sigma**2 - sigma_next**2)) / sigma**2)
alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_down(sigma_down, sigma_next, eta, sigma_max)
return sigma_up, sigma_down, alpha_ratio
def get_ancestral_step_RF_soft(sigma, sigma_next, eta, sigma_max=1.0):
"""Calculates the noise level (sigma_down) to step down to and the amount of noise to add (sigma_up) when doing a rectified flow sampling step,
and a mixing ratio (alpha_ratio) for scaling the latent during noise addition. Scale is to shape the sigma_down curve."""
down_ratio = (1 - eta) + eta * ((sigma_next) / sigma)
sigma_down = down_ratio * sigma_next
alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_down(sigma_down, sigma_next, eta, sigma_max)
return sigma_up, sigma_down, alpha_ratio
def get_ancestral_step_RF_soft_linear(sigma, sigma_next, eta, sigma_max=1.0):
sigma_down = sigma_next + eta * (sigma_next - sigma)
if sigma_down < 0:
return torch.full_like(sigma, 0.), sigma_next, torch.full_like(sigma, 1.)
alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_down(sigma_down, sigma_next, eta, sigma_max)
return sigma_up, sigma_down, alpha_ratio
def get_ancestral_step_RF_exp(sigma, sigma_next, eta, sigma_max=1.0): # TODO: fix black image issue with linear RK
h = -torch.log(sigma_next/sigma)
sigma_up = sigma_next * (1 - (-2*eta*h).exp())**0.5
alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max)
return sigma_up, sigma_down, alpha_ratio
def get_ancestral_step_RF_sqrd(sigma, sigma_next, eta, sigma_max=1.0):
sigma_hat = sigma * (1 + eta)
sigma_up = (sigma_hat ** 2 - sigma ** 2) ** .5
alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max)
return sigma_up, sigma_down, alpha_ratio
def get_ancestral_step_RF_hard(sigma_next, eta, sigma_max=1.0):
sigma_up = sigma_next * eta
alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max)
return sigma_up, sigma_down, alpha_ratio
def get_vpsde_step_RF(sigma, sigma_next, eta, sigma_max=1.0):
dt = sigma - sigma_next
sigma_up = eta * sigma * dt**0.5
alpha_ratio = 1 - dt * (eta**2/4) * (1 + sigma)
sigma_down = sigma_next - (eta/4)*sigma*(1-sigma)*(sigma - sigma_next)
return sigma_up, sigma_down, alpha_ratio
def get_fuckery_step_RF(sigma, sigma_next, eta, sigma_max=1.0):
sigma_down = (1-eta) * sigma_next
sigma_up = torch.sqrt(sigma_next**2 - sigma_down**2)
alpha_ratio = torch.ones_like(sigma_next)
return sigma_up, sigma_down, alpha_ratio
def get_res4lyf_step_with_model(model, sigma, sigma_next, eta=0.0, noise_mode="hard"):
su, sd, alpha_ratio = torch.zeros_like(sigma), sigma_next.clone(), torch.ones_like(sigma)
if has_nested_attr(model, "inner_model.inner_model.model_sampling"):
model_sampling = model.inner_model.inner_model.model_sampling
elif has_nested_attr(model, "model.model_sampling"):
model_sampling = model.model.model_sampling
if isinstance(model_sampling, comfy.model_sampling.CONST):
sigma_var = (-1 + torch.sqrt(1 + 4 * sigma)) / 2 # sigma_var = (torch.sqrt(1 + 4 * sigma) - 1) / 2 sigma_var = ((4*sigma+1)**0.5 - 1) / 2
if noise_mode == "hard_var" and eta > 0.0 and sigma_next > sigma_var:
su, sd, alpha_ratio = get_ancestral_step_RF_var(sigma, sigma_next, eta)
else:
if noise_mode == "soft":
su, sd, alpha_ratio = get_ancestral_step_RF_soft(sigma, sigma_next, eta)
elif noise_mode == "softer":
su, sd, alpha_ratio = get_ancestral_step_RF_softer(sigma, sigma_next, eta)
elif noise_mode == "hard_sq":
su, sd, alpha_ratio = get_ancestral_step_RF_sqrd(sigma, sigma_next, eta)
elif noise_mode == "sinusoidal":
su, sd, alpha_ratio = get_ancestral_step_RF_sinusoidal(sigma_next, eta)
elif noise_mode == "exp":
su, sd, alpha_ratio = get_ancestral_step_RF_exp(sigma, sigma_next, eta)
elif noise_mode == "soft-linear":
su, sd, alpha_ratio = get_ancestral_step_RF_soft_linear(sigma, sigma_next, eta)
elif noise_mode == "lorentzian":
su, sd, alpha_ratio = get_ancestral_step_RF_lorentzian(sigma, sigma_next, eta)
elif noise_mode == "vpsde":
su, sd, alpha_ratio = get_vpsde_step_RF(sigma, sigma_next, eta)
elif noise_mode == "fuckery":
su, sd, alpha_ratio = get_fuckery_step_RF(sigma, sigma_next, eta)
else: #elif noise_mode == "hard": #fall back to hard noise from hard_var
su, sd, alpha_ratio = get_ancestral_step_RF_hard(sigma_next, eta)
else:
alpha_ratio = torch.full_like(sigma, 1.0)
if noise_mode == "hard_sq":
sd = sigma_next
sigma_hat = sigma * (1 + eta)
su = (sigma_hat ** 2 - sigma ** 2) ** .5
sigma = sigma_hat
elif noise_mode == "hard":
su = eta * sigma_next
sd = (sigma_next ** 2 - su ** 2) ** 0.5
elif noise_mode == "exp":
h = -torch.log(sigma_next/sigma)
su = sigma_next * (1 - (-2*eta*h).exp())**0.5
sd = (sigma_next ** 2 - su ** 2) ** 0.5
else: #if noise_mode == "soft" or noise_mode == "softer":
su = min(sigma_next, eta * (sigma_next ** 2 * (sigma ** 2 - sigma_next ** 2) / sigma ** 2) ** 0.5)
#su, sd, alpha_ratio = get_ancestral_step_EPS(sigma, sigma_next, eta)
su = torch.nan_to_num(su, 0.0)
sd = torch.nan_to_num(sd, float(sigma_next))
alpha_ratio = torch.nan_to_num(alpha_ratio, 1.0)
return su, sigma, sd, alpha_ratio
NOISE_MODE_NAMES = ["none",
"hard_sq",
"hard",
#"hard_down",
"lorentzian",
"soft",
"soft-linear",
"softer",
"eps",
"sinusoidal",
"exp",
"vpsde",
#"fuckery",
"hard_var",
]
def get_res4lyf_half_step3(sigma, sigma_next, c2=0.5, c3=1.0, t_fn=None, sigma_fn=None, t_fn_formula="", sigma_fn_formula="", ):
t_fn_x = eval(f"lambda sigma: {t_fn_formula}", {"torch": torch}) if t_fn_formula else t_fn
sigma_fn_x = eval(f"lambda t: {sigma_fn_formula}", {"torch": torch}) if sigma_fn_formula else sigma_fn
t_x, t_next_x = t_fn_x(sigma), t_fn_x(sigma_next)
h_x = t_next_x - t_x
s2 = t_x + h_x * c2
s3 = t_x + h_x * c3
sigma_2 = sigma_fn_x(s2)
sigma_3 = sigma_fn_x(s3)
h = t_fn(sigma_next) - t_fn(sigma)
c2 = (t_fn(sigma_2) - t_fn(sigma)) / h
c3 = (t_fn(sigma_3) - t_fn(sigma)) / h
return c2, c3
================================================
FILE: legacy/phi_functions.py
================================================
import torch
import math
from typing import Optional
# Remainder solution
def _phi(j, neg_h):
remainder = torch.zeros_like(neg_h)
for k in range(j):
remainder += (neg_h)**k / math.factorial(k)
phi_j_h = ((neg_h).exp() - remainder) / (neg_h)**j
return phi_j_h
def calculate_gamma(c2, c3):
return (3*(c3**3) - 2*c3) / (c2*(2 - 3*c2))
# Exact analytic solution originally calculated by Clybius. https://github.com/Clybius/ComfyUI-Extra-Samplers/tree/main
def _gamma(n: int,) -> int:
"""
https://en.wikipedia.org/wiki/Gamma_function
for every positive integer n,
Γ(n) = (n-1)!
"""
return math.factorial(n-1)
def _incomplete_gamma(s: int, x: float, gamma_s: Optional[int] = None) -> float:
"""
https://en.wikipedia.org/wiki/Incomplete_gamma_function#Special_values
if s is a positive integer,
Γ(s, x) = (s-1)!*∑{k=0..s-1}(x^k/k!)
"""
if gamma_s is None:
gamma_s = _gamma(s)
sum_: float = 0
# {k=0..s-1} inclusive
for k in range(s):
numerator: float = x**k
denom: int = math.factorial(k)
quotient: float = numerator/denom
sum_ += quotient
incomplete_gamma_: float = sum_ * math.exp(-x) * gamma_s
return incomplete_gamma_
def phi(j: int, neg_h: float, ):
"""
For j={1,2,3}: you could alternatively use Kat's phi_1, phi_2, phi_3 which perform fewer steps
Lemma 1
https://arxiv.org/abs/2308.02157
ϕj(-h) = 1/h^j*∫{0..h}(e^(τ-h)*(τ^(j-1))/((j-1)!)dτ)
https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84
= 1/h^j*[(e^(-h)*(-τ)^(-j)*τ(j))/((j-1)!)]{0..h}
https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84+between+0+and+h
= 1/h^j*((e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h)))/(j-1)!)
= (e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h))/((j-1)!*h^j)
= (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/(j-1)!
= (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/Γ(j)
= (e^(-h)*(-h)^(-j)*(1-Γ(j,-h)/Γ(j))
requires j>0
"""
assert j > 0
gamma_: float = _gamma(j)
incomp_gamma_: float = _incomplete_gamma(j, neg_h, gamma_s=gamma_)
phi_: float = math.exp(neg_h) * neg_h**-j * (1-incomp_gamma_/gamma_)
return phi_
class Phi:
def __init__(self, h, c, analytic_solution=False):
self.h = h
self.c = c
self.cache = {}
if analytic_solution:
self.phi_f = phi
else:
self.phi_f = _phi # remainder method
def __call__(self, j, i=-1):
if (j, i) in self.cache:
return self.cache[(j, i)]
if i < 0:
c = 1
else:
c = self.c[i - 1]
if c == 0:
self.cache[(j, i)] = 0
return 0
if j == 0:
result = torch.exp(-self.h * c)
else:
result = self.phi_f(j, -self.h * c)
self.cache[(j, i)] = result
return result
================================================
FILE: legacy/rk_coefficients.py
================================================
import torch
import copy
import math
from .deis_coefficients import get_deis_coeff_list
from .phi_functions import *
from .helper import get_extra_options_kv
from itertools import permutations #, combinations
import random
RK_SAMPLER_NAMES = ["none",
"res_2m",
"res_3m",
"res_2s",
"res_3s",
"res_3s_alt",
"res_3s_cox_matthews",
"res_3s_lie",
"res_3s_strehmel_weiner",
"res_4s_krogstad",
"res_4s_strehmel_weiner",
"res_4s_cox_matthews",
"res_4s_munthe-kaas",
"res_5s",
"res_6s",
"res_8s",
"res_10s",
"res_15s",
"res_16s",
"etdrk2_2s",
"etdrk3_a_3s",
"etdrk3_b_3s",
#"etdrk4_4s"
"deis_2m",
"deis_3m",
"deis_4m",
"ralston_2s",
"ralston_3s",
"ralston_4s",
"dpmpp_2m",
"dpmpp_3m",
"dpmpp_2s",
"dpmpp_sde_2s",
"dpmpp_3s",
"lawson4_4s",
"genlawson41_4s",
"modgenlawson41_4s",
"midpoint_2s",
"heun_2s",
"heun_3s",
"houwen-wray_3s",
"kutta_3s",
"ssprk3_3s",
"rk38_4s",
"rk4_4s",
"rk5_7s",
"rk6_7s",
"bogacki-shampine_4s",
"bogacki-shampine_7s",
"dormand-prince_6s",
"dormand-prince_13s",
"tsi_7s",
#"verner_robust_16s",
"ddim",
"buehler",
]
IRK_SAMPLER_NAMES = ["none",
"explicit_diagonal",
"explicit_full",
"irk_exp_diag_2s",
"gauss-legendre_2s",
"gauss-legendre_3s",
"gauss-legendre_4s",
"gauss-legendre_5s",
"radau_ia_2s",
"radau_ia_3s",
"radau_iia_2s",
"radau_iia_3s",
"lobatto_iiia_2s",
"lobatto_iiia_3s",
"lobatto_iiib_2s",
"lobatto_iiib_3s",
"lobatto_iiic_2s",
"lobatto_iiic_3s",
"lobatto_iiic_star_2s",
"lobatto_iiic_star_3s",
"lobatto_iiid_2s",
"lobatto_iiid_3s",
"kraaijevanger_spijker_2s",
"qin_zhang_2s",
"pareschi_russo_2s",
"pareschi_russo_alt_2s",
"crouzeix_2s",
"crouzeix_3s",
]
alpha_crouzeix = (2/(3**0.5)) * math.cos(math.pi / 18)
rk_coeff = {
"gauss-legendre_5s": (
[
[4563950663 / 32115191526,
(310937500000000 / 2597974476091533 + 45156250000 * (739**0.5) / 8747388808389),
(310937500000000 / 2597974476091533 - 45156250000 * (739**0.5) / 8747388808389),
(5236016175 / 88357462711 + 709703235 * (739**0.5) / 353429850844),
(5236016175 / 88357462711 - 709703235 * (739**0.5) / 353429850844)],
[(4563950663 / 32115191526 - 38339103 * (739**0.5) / 6250000000),
(310937500000000 / 2597974476091533 + 9557056475401 * (739**0.5) / 3498955523355600000),
(310937500000000 / 2597974476091533 - 14074198220719489 * (739**0.5) / 3498955523355600000),
(5236016175 / 88357462711 + 5601362553163918341 * (739**0.5) / 2208936567775000000000),
(5236016175 / 88357462711 - 5040458465159165409 * (739**0.5) / 2208936567775000000000)],
[(4563950663 / 32115191526 + 38339103 * (739**0.5) / 6250000000),
(310937500000000 / 2597974476091533 + 14074198220719489 * (739**0.5) / 3498955523355600000),
(310937500000000 / 2597974476091533 - 9557056475401 * (739**0.5) / 3498955523355600000),
(5236016175 / 88357462711 + 5040458465159165409 * (739**0.5) / 2208936567775000000000),
(5236016175 / 88357462711 - 5601362553163918341 * (739**0.5) / 2208936567775000000000)],
[(4563950663 / 32115191526 - 38209 * (739**0.5) / 7938810),
(310937500000000 / 2597974476091533 - 359369071093750 * (739**0.5) / 70145310854471391),
(310937500000000 / 2597974476091533 - 323282178906250 * (739**0.5) / 70145310854471391),
(5236016175 / 88357462711 - 470139 * (739**0.5) / 1413719403376),
(5236016175 / 88357462711 - 44986764863 * (739**0.5) / 21205791050640)],
[(4563950663 / 32115191526 + 38209 * (739**0.5) / 7938810),
(310937500000000 / 2597974476091533 + 359369071093750 * (739**0.5) / 70145310854471391),
(310937500000000 / 2597974476091533 + 323282178906250 * (739**0.5) / 70145310854471391),
(5236016175 / 88357462711 + 44986764863 * (739**0.5) / 21205791050640),
(5236016175 / 88357462711 + 470139 * (739**0.5) / 1413719403376)],
],
[
[4563950663 / 16057595763,
621875000000000 / 2597974476091533,
621875000000000 / 2597974476091533,
10472032350 / 88357462711,
10472032350 / 88357462711]
],
[
1 / 2,
1 / 2 - 99 * (739**0.5) / 10000,
1 / 2 + 99 * (739**0.5) / 10000,
1 / 2 - (739**0.5) / 60,
1 / 2 + (739**0.5) / 60
]
),
"gauss-legendre_4s": (
[
[1/4, 1/4 - 15**0.5 / 6, 1/4 + 15**0.5 / 6, 1/4],
[1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6, 1/4],
[1/4, 1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6],
[1/4 - 15**0.5 / 6, 1/4, 1/4 + 15**0.5 / 6, 1/4],
],
[
[1/8, 3/8, 3/8, 1/8]
],
[
1/2 - 15**0.5 / 10,
1/2 + 15**0.5 / 10,
1/2 + 15**0.5 / 10,
1/2 - 15**0.5 / 10
]
),
"gauss-legendre_3s": (
[
[5/36, 2/9 - 15**0.5 / 15, 5/36 - 15**0.5 / 30],
[5/36 + 15**0.5 / 24, 2/9, 5/36 - 15**0.5 / 24],
[5/36 + 15**0.5 / 30, 2/9 + 15**0.5 / 15, 5/36],
],
[
[5/18, 4/9, 5/18]
],
[1/2 - 15**0.5 / 10, 1/2, 1/2 + 15**0.5 / 10]
),
"gauss-legendre_2s": (
[
[1/4, 1/4 - 3**0.5 / 6],
[1/4 + 3**0.5 / 6, 1/4],
],
[
[1/2, 1/2],
],
[1/2 - 3**0.5 / 6, 1/2 + 3**0.5 / 6]
),
"radau_iia_4s": (
[
[],
[],
[],
[],
],
[
[1/4, 1/4, 1/4, 1/4],
],
[(1/11)*(4-6**0.5), (1/11)*(4+6**0.5), 1/2, 1]
),
"radau_iia_3s": (
[
[11/45 - 7*6**0.5 / 360, 37/225 - 169*6**0.5 / 1800, -2/225 + 6**0.5 / 75],
[37/225 + 169*6**0.5 / 1800, 11/45 + 7*6**0.5 / 360, -2/225 - 6**0.5 / 75],
[4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9],
],
[
[4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9],
],
[2/5 - 6**0.5 / 10, 2/5 + 6**0.5 / 10, 1.]
),
"radau_iia_2s": (
[
[5/12, -1/12],
[3/4, 1/4],
],
[
[3/4, 1/4],
],
[1/3, 1]
),
"radau_ia_3s": (
[
[1/9, (-1-6**0.5)/18, (-1+6**0.5)/18],
[1/9, 11/45 + 7*6**0.5/360, 11/45-43*6**0.5/360],
[1/9, 11/45-43*6**0.5/360, 11/45 + 7*6**0.5/360],
],
[
[1/9, 4/9 + 6**0.5/36, 4/9 - 6**0.5/36],
],
[0, 3/5-6**0.5/10, 3/5+6**0.5/10]
),
"radau_ia_2s": (
[
[1/4, -1/4],
[1/4, 5/12],
],
[
[1/4, 3/4],
],
[0, 2/3]
),
"lobatto_iiia_3s": (
[
[0, 0, 0],
[5/24, 1/3, -1/24],
[1/6, 2/3, 1/6],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1]
),
"lobatto_iiia_2s": (
[
[0, 0],
[1/2, 1/2],
],
[
[1/2, 1/2],
],
[0, 1]
),
"lobatto_iiib_3s": (
[
[1/6, -1/6, 0],
[1/6, 1/3, 0],
[1/6, 5/6, 0],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1]
),
"lobatto_iiib_2s": (
[
[1/2, 0],
[1/2, 0],
],
[
[1/2, 1/2],
],
[0, 1]
),
"lobatto_iiic_3s": (
[
[1/6, -1/3, 1/6],
[1/6, 5/12, -1/12],
[1/6, 2/3, 1/6],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1]
),
"lobatto_iiic_2s": (
[
[1/2, -1/2],
[1/2, 1/2],
],
[
[1/2, 1/2],
],
[0, 1]
),
"lobatto_iiic_star_3s": (
[
[0, 0, 0],
[1/4, 1/4, 0],
[0, 1, 0],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1]
),
"lobatto_iiic_star_2s": (
[
[0, 0],
[1, 0],
],
[
[1/2, 1/2],
],
[0, 1]
),
"lobatto_iiid_3s": (
[
[1/6, 0, -1/6],
[1/12, 5/12, 0],
[1/2, 1/3, 1/6],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1]
),
"lobatto_iiid_2s": (
[
[1/2, 1/2],
[-1/2, 1/2],
],
[
[1/2, 1/2],
],
[0, 1]
),
"kraaijevanger_spijker_2s": (
[
[1/2, 0],
[-1/2, 2],
],
[
[-1/2, 3/2],
],
[1/2, 3/2]
),
"qin_zhang_2s": (
[
[1/4, 0],
[1/2, 1/4],
],
[
[1/2, 1/2],
],
[1/4, 3/4]
),
"pareschi_russo_2s": (
[
[(1-2**0.5/2), 0],
[1-2*(1-2**0.5/2), (1-2**0.5/2)],
],
[
[1/2, 1/2],
],
[(1-2**0.5/2), 1-(1-2**0.5/2)]
),
"pareschi_russo_alt_2s": (
[
[(1-2**0.5/2), 0],
[1-(1-2**0.5/2), (1-2**0.5/2)],
],
[
[1-(1-2**0.5/2), (1-2**0.5/2)],
],
[(1-2**0.5/2), 1]
),
"crouzeix_3s": (
[
[(1+alpha_crouzeix)/2, 0, 0],
[-alpha_crouzeix/2, (1+alpha_crouzeix)/2, 0],
[1+alpha_crouzeix, -(1+2*alpha_crouzeix), (1+alpha_crouzeix)/2],
],
[
[1/(6*alpha_crouzeix**2), 1-(1/(3*alpha_crouzeix**2)), 1/(6*alpha_crouzeix**2)],
],
[(1+alpha_crouzeix)/2, 1/2, (1-alpha_crouzeix)/2],
),
"crouzeix_2s": (
[
[1/2 + 3**0.5 / 6, 0],
[-(3**0.5 / 3), 1/2 + 3**0.5 / 6]
],
[
[1/2, 1/2],
],
[1/2 + 3**0.5 / 6, 1/2 - 3**0.5 / 6],
),
"verner_13s": ( #verner9. some values are missing, need to revise
[
[],
],
[
[],
],
[
0.03462,
0.09702435063878045,
0.14553652595817068,
0.561,
0.22900791159048503,
0.544992088409515,
0.645,
0.48375,
0.06757,
0.25,
0.6590650618730999,
0.8206,
0.9012,
]
),
"verner_robust_16s": (
[
[],
[0.04],
[-0.01988527319182291, 0.11637263332969652],
[0.0361827600517026, 0, 0.10854828015510781],
[2.272114264290177, 0, -8.526886447976398, 6.830772183686221],
[0.050943855353893744, 0, 0, 0.1755865049809071, 0.007022961270757467],
[0.1424783668683285, 0, 0, -0.3541799434668684, 0.07595315450295101, 0.6765157656337123],
[0.07111111111111111, 0, 0, 0, 0, 0.3279909287605898, 0.24089796012829906],
[0.07125, 0, 0, 0, 0, 0.32688424515752457, 0.11561575484247544, -0.03375],
[0.0482267732246581, 0, 0, 0, 0, 0.039485599804954, 0.10588511619346581, -0.021520063204743093, -0.10453742601833482],
[-0.026091134357549235, 0, 0, 0, 0, 0.03333333333333333, -0.1652504006638105, 0.03434664118368617, 0.1595758283215209, 0.21408573218281934],
[-0.03628423396255658, 0, 0, 0, 0, -1.0961675974272087, 0.1826035504321331, 0.07082254444170683, -0.02313647018482431, 0.2711204726320933, 1.3081337494229808],
[-0.5074635056416975, 0, 0, 0, 0, -6.631342198657237, -0.2527480100908801, -0.49526123800360955, 0.2932525545253887, 1.440108693768281, 6.237934498647056, 0.7270192054526988],
[0.6130118256955932, 0, 0, 0, 0, 9.088803891640463, -0.40737881562934486, 1.7907333894903747, 0.714927166761755, -1.4385808578417227, -8.26332931206474, -1.537570570808865, 0.34538328275648716],
[-1.2116979103438739, 0, 0, 0, 0, -19.055818715595954, 1.263060675389875, -6.913916969178458, -0.6764622665094981, 3.367860445026608, 18.00675164312591, 6.83882892679428, -1.0315164519219504, 0.4129106232130623],
[2.1573890074940536, 0, 0, 0, 0, 23.807122198095804, 0.8862779249216555, 13.139130397598764, -2.604415709287715, -5.193859949783872, -20.412340711541507, -12.300856252505723, 1.5215530950085394],
],
[
0.014588852784055396, 0, 0, 0, 0, 0, 0, 0.0020241978878893325, 0.21780470845697167,
0.12748953408543898, 0.2244617745463132, 0.1787254491259903, 0.07594344758096556,
0.12948458791975614, 0.029477447612619417, 0
],
[
0, 0.04, 0.09648736013787361, 0.1447310402068104, 0.576, 0.2272326564618766,
0.5407673435381234, 0.64, 0.48, 0.06754, 0.25, 0.6770920153543243, 0.8115,
0.906, 1, 1
],
),
"dormand-prince_13s": (
[
[],
[1/18],
[1/48, 1/16],
[1/32, 0, 3/32],
[5/16, 0, -75/64, 75/64],
[3/80, 0, 0, 3/16, 3/20],
[29443841/614563906, 0, 0, 77736538/692538347, -28693883/1125000000, 23124283/1800000000],
[16016141/946692911, 0, 0, 61564180/158732637, 22789713/633445777, 545815736/2771057229, -180193667/1043307555],
[39632708/573591083, 0, 0, -433636366/683701615, -421739975/2616292301, 100302831/723423059, 790204164/839813087, 800635310/3783071287],
[246121993/1340847787, 0, 0, -37695042795/15268766246, -309121744/1061227803, -12992083/490766935, 6005943493/2108947869, 393006217/1396673457, 123872331/1001029789],
[-1028468189/846180014, 0, 0, 8478235783/508512852, 1311729495/1432422823, -10304129995/1701304382, -48777925059/3047939560, 15336726248/1032824649, -45442868181/3398467696, 3065993473/597172653],
[185892177/718116043, 0, 0, -3185094517/667107341, -477755414/1098053517, -703635378/230739211, 5731566787/1027545527, 5232866602/850066563, -4093664535/808688257, 3962137247/1805957418, 65686358/487910083],
[403863854/491063109, 0, 0, -5068492393/434740067, -411421997/543043805, 652783627/914296604, 11173962825/925320556, -13158990841/6184727034, 3936647629/1978049680, -160528059/685178525, 248638103/1413531060],
],
[
[14005451/335480064, 0, 0, 0, 0, -59238493/1068277825, 181606767/758867731, 561292985/797845732, -1041891430/1371343529, 760417239/1151165299, 118820643/751138087, -528747749/2220607170, 1/4],
],
[0, 1/18, 1/12, 1/8, 5/16, 3/8, 59/400, 93/200, 5490023248 / 9719169821, 13/20, 1201146811 / 1299019798, 1, 1],
),
"dormand-prince_6s": (
[
[],
[1/5],
[3/40, 9/40],
[44/45, -56/15, 32/9],
[19372/6561, -25360/2187, 64448/6561, -212/729],
[9017/3168, -355/33, 46732/5247, 49/176, -5103/18656],
],
[
[35/384, 0, 500/1113, 125/192, -2187/6784, 11/84],
],
[0, 1/5, 3/10, 4/5, 8/9, 1],
),
"bogacki-shampine_7s": ( #5th order
[
[],
[1/6],
[2/27, 4/27],
[183/1372, -162/343, 1053/1372],
[68/297, -4/11, 42/143, 1960/3861],
[597/22528, 81/352, 63099/585728, 58653/366080, 4617/20480],
[174197/959244, -30942/79937, 8152137/19744439, 666106/1039181, -29421/29068, 482048/414219],
],
[
[587/8064, 0, 4440339/15491840, 24353/124800, 387/44800, 2152/5985, 7267/94080],
],
[0, 1/6, 2/9, 3/7, 2/3, 3/4, 1]
),
"bogacki-shampine_4s": ( #5th order
[
[],
[1/2],
[0, 3/4],
[2/9, 1/3, 4/9],
],
[
[2/9, 1/3, 4/9, 0],
],
[0, 1/2, 3/4, 1]
),
"tsi_7s": ( #5th order
[
[],
[0.161],
[-0.008480655492356989, 0.335480655492357],
[2.8971530571054935, -6.359448489975075, 4.3622954328695815],
[5.325864828439257, -11.748883564062828, 7.4955393428898365, -0.09249506636175525],
[5.86145544294642, -12.92096931784711, 8.159367898576159, -0.071584973281401, -0.02826905039406838],
[0.09646076681806523, 0.01, 0.4798896504144996, 1.379008574103742, -3.290069515436081, 2.324710524099774],
],
[
[0.09646076681806523, 0.01, 0.4798896504144996, 1.379008574103742, -3.290069515436081, 2.324710524099774, 0.0],
],
[0.0, 0.161, 0.327, 0.9, 0.9800255409045097, 1.0, 1.0],
),
"rk6_7s": ( #5th order
[
[],
[1/3],
[0, 2/3],
[1/12, 1/3, -1/12],
[-1/16, 9/8, -3/16, -3/8],
[0, 9/8, -3/8, -3/4, 1/2],
[9/44, -9/11, 63/44, 18/11, 0, -16/11],
],
[
[11/120, 0, 27/40, 27/40, -4/15, -4/15, 11/120],
],
[0, 1/3, 2/3, 1/3, 1/2, 1/2, 1],
),
"rk5_7s": ( #5th order
[
[],
[1/5],
[3/40, 9/40],
[44/45, -56/15, 32/9],
[19372/6561, -25360/2187, 64448/6561, 212/729], #flipped 212 sign
[-9017/3168, -355/33, 46732/5247, 49/176, -5103/18656],
[35/384, 0, 500/1113, 125/192, -2187/6784, 11/84],
],
[
[5179/57600, 0, 7571/16695, 393/640, -92097/339200, 187/2100, 1/40],
],
[0, 1/5, 3/10, 4/5, 8/9, 1, 1],
),
"ssprk_4s": ( #https://link.springer.com/article/10.1007/s41980-022-00731-x
[
[],
[1/2],
[1/2, 1/2],
[1/6, 1/6, 1/6],
],
[
[1/6, 1/6, 1/6, 1/2],
],
[0, 1/2, 1, 1/2],
),
"rk4_4s": (
[
[],
[1/2],
[0, 1/2],
[0, 0, 1],
],
[
[1/6, 1/3, 1/3, 1/6],
],
[0, 1/2, 1/2, 1],
),
"rk38_4s": (
[
[],
[1/3],
[-1/3, 1],
[1, -1, 1],
],
[
[1/8, 3/8, 3/8, 1/8],
],
[0, 1/3, 2/3, 1],
),
"ralston_4s": (
[
[],
[2/5],
[(-2889+1428 * 5**0.5)/1024, (3785-1620 * 5**0.5)/1024],
[(-3365+2094 * 5**0.5)/6040, (-975-3046 * 5**0.5)/2552, (467040+203968*5**0.5)/240845],
],
[
[(263+24*5**0.5)/1812, (125-1000*5**0.5)/3828, (3426304+1661952*5**0.5)/5924787, (30-4*5**0.5)/123],
],
[0, 2/5, (14-3 * 5**0.5)/16, 1],
),
"heun_3s": (
[
[],
[1/3],
[0, 2/3],
],
[
[1/4, 0, 3/4],
],
[0, 1/3, 2/3],
),
"kutta_3s": (
[
[],
[1/2],
[-1, 2],
],
[
[1/6, 2/3, 1/6],
],
[0, 1/2, 1],
),
"ralston_3s": (
[
[],
[1/2],
[0, 3/4],
],
[
[2/9, 1/3, 4/9],
],
[0, 1/2, 3/4],
),
"houwen-wray_3s": (
[
[],
[8/15],
[1/4, 5/12],
],
[
[1/4, 0, 3/4],
],
[0, 8/15, 2/3],
),
"ssprk3_3s": (
[
[],
[1],
[1/4, 1/4],
],
[
[1/6, 1/6, 2/3],
],
[0, 1, 1/2],
),
"midpoint_2s": (
[
[],
[1/2],
],
[
[0, 1],
],
[0, 1/2],
),
"heun_2s": (
[
[],
[1],
],
[
[1/2, 1/2],
],
[0, 1],
),
"ralston_2s": (
[
[],
[2/3],
],
[
[1/4, 3/4],
],
[0, 2/3],
),
"buehler": (
[
[],
],
[
[1],
],
[0],
),
}
def get_rk_methods(rk_type, h, c1=0.0, c2=0.5, c3=1.0, h_prev=None, h_prev2=None, step=0, sigmas=None, sigma=None, sigma_next=None, sigma_down=None, extra_options=None):
FSAL = False
multistep_stages = 0
if rk_type.startswith(("res", "dpmpp", "ddim" )):
h_no_eta = -torch.log(sigma_next/sigma)
h_prev_no_eta = -torch.log(sigmas[step] /sigmas[step-1]) if step >= 1 else None
h_prev2_no_eta = -torch.log(sigmas[step-1]/sigmas[step-2]) if step >= 2 else None
else:
h_no_eta = sigma_next - sigma
h_prev_no_eta = sigmas[step] - sigmas[step-1] if step >= 1 else None
h_prev2_no_eta = sigmas[step-1] - sigmas[step-2] if step >= 2 else None
if type(c1) == torch.Tensor:
c1 = c1.item()
if type(c2) == torch.Tensor:
c2 = c2.item()
if type(c3) == torch.Tensor:
c3 = c3.item()
if c1 == -1:
c1 = random.uniform(0, 1)
if c2 == -1:
c2 = random.uniform(0, 1)
if c3 == -1:
c3 = random.uniform(0, 1)
if rk_type[:4] == "deis":
order = int(rk_type[-2])
if step < order:
if order == 4:
rk_type = "res_3s"
order = 3
elif order == 3:
rk_type = "res_3s"
elif order == 2:
rk_type = "res_2s"
else:
rk_type = "deis"
multistep_stages = order-1
if rk_type[-2:] == "2m": #multistep method
rk_type = rk_type[:-2] + "2s"
if h_prev is not None:
multistep_stages = 1
c2 = (-h_prev / h).item()
#print("c2: ", c2, h_prev, h)
if rk_type[-2:] == "3m": #multistep method
rk_type = rk_type[:-2] + "3s"
if h_prev2 is not None:
multistep_stages = 2
#print("3m")
#c2 = (-h_prev2 / (h_prev + h)).item()
c2 = (-h_prev2 / h).item()
#c3 = (-h_prev / h).item()
c3 = (-(h_prev2 + h_prev) / h).item()
#print(c2, h_prev2, h_prev)
#print(c3, h_prev, h)
if rk_type in rk_coeff:
a, b, ci = copy.deepcopy(rk_coeff[rk_type])
a, b, ci = rk_coeff[rk_type]
a = [row + [0] * (len(ci) - len(row)) for row in a]
match rk_type:
case "deis":
coeff_list = get_deis_coeff_list(sigmas, multistep_stages+1, deis_mode="rhoab")
coeff_list = [[elem / h for elem in inner_list] for inner_list in coeff_list]
if multistep_stages == 1:
b1, b2 = coeff_list[step]
a = [
[0, 0],
[0, 0],
]
b = [
[b1, b2],
]
ci = [0, 0]
if multistep_stages == 2:
b1, b2, b3 = coeff_list[step]
a = [
[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
]
b = [
[b1, b2, b3],
]
ci = [0, 0, 0]
if multistep_stages == 3:
b1, b2, b3, b4 = coeff_list[step]
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
]
b = [
[b1, b2, b3, b4],
]
ci = [0, 0, 0, 0]
if multistep_stages > 0:
for i in range(len(b[0])):
b[0][i] *= ((sigma_down - sigma) / (sigma_next - sigma))
case "dormand-prince_6s":
FSAL = True
case "ddim":
b1 = phi(1, -h)
a = [
[0],
]
b = [
[b1],
]
ci = [0]
case "res_2s":
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
ci = [0, c2]
φ = Phi(h, ci)
a2_1 = c2 * φ(1,2)
b2 = φ(2)/c2
b1 = φ(1) - b2
a = [
[0,0],
[a2_1, 0],
]
b = [
[b1, b2],
]
case "res_3s":
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
c3 = float(get_extra_options_kv("c3", str(c3), extra_options))
gamma = calculate_gamma(c2, c3)
a2_1 = c2 * phi(1, -h*c2)
a3_2 = gamma * c2 * phi(2, -h*c2) + (c3 ** 2 / c2) * phi(2, -h*c3) #phi_2_c3_h # a32 from k2 to k3
a3_1 = c3 * phi(1, -h*c3) - a3_2 # a31 from k1 to k3
b3 = (1 / (gamma * c2 + c3)) * phi(2, -h)
b2 = gamma * b3 #simplified version of: b2 = (gamma / (gamma * c2 + c3)) * phi_2_h
b1 = phi(1, -h) - b2 - b3
a = [
[0, 0, 0],
[a2_1, 0, 0],
[a3_1, a3_2, 0],
]
b = [
[b1, b2, b3],
]
ci = [c1, c2, c3]
case "res_3s_alt":
c2 = 1/3
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
c1,c2,c3 = 0, c2, 2/3
ci = [c1,c2,c3]
φ = Phi(h, ci)
a = [
[0, 0, 0],
[0, 0, 0],
[0, (4/(9*c2)) * φ(2,3), 0],
]
b = [
[0, 0, (1/c3)*φ(2)],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_3s_strehmel_weiner": # weak 4th order, Krogstad
c2 = 1/2
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
ci = [0,c2,1]
φ = Phi(h, ci)
a = [
[0, 0, 0],
[0, 0, 0],
[0, (1/c2) * φ(2,3), 0],
]
b = [
[0,
0,
φ(2)],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_3s_cox_matthews": # Cox & Matthews; known as ETD3RK
c1,c2,c3 = 0,1/2,1
ci = [0,c2,1]
φ = Phi(h, ci)
a = [
[0, 0, 0],
[0, 0, 0],
[0, (1/c2) * φ(1,3), 0], # paper said 2 * φ(1,3), but this is the same and more consistent with res_3s_strehmel_weiner
]
b = [
[0,
-8*φ(3) + 4*φ(2),
4*φ(3) - φ(2)],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_3s_lie": # Lie; known as ETD2CF3
c1,c2,c3 = 0, 1/3, 2/3
ci = [c1,c2,c3]
φ = Phi(h, ci)
a = [
[0, 0, 0],
[0, 0, 0],
[0, (4/3)*φ(2,3), 0], # paper said 2 * φ(1,3), but this is the same and more consistent with res_3s_strehmel_weiner
]
b = [
[0,
6*φ(2) - 18*φ(3),
(-3/2)*φ(2) + 9*φ(3)],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_4s_cox_matthews": # weak 4th order, Cox & Matthews; unresolved issue, see below
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci)
a2_1 = c2 * φ(1,2)
a3_2 = c3 * φ(1,3)
a4_1 = (1/2) * φ(1,3) * (φ(0,3) - 1) # φ(0,3) == torch.exp(-h*c3)
a4_3 = φ(1,3)
b1 = φ(1) - 3*φ(2) + 4*φ(3)
b2 = 2*φ(2) - 4*φ(3)
b3 = 2*φ(2) - 4*φ(3)
b4 = 4*φ(3) - φ(2)
a = [
[0, 0,0,0],
[a2_1, 0,0,0],
[0, a3_2,0,0],
[a4_1, 0,a4_3,0],
]
b = [
[b1, b2, b3, b4],
]
case "res_4s_munthe-kaas": # unstable RKMK4t
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci)
a = [
[0, 0, 0, 0],
[c2*φ(1,2), 0, 0, 0],
[(h/8)*φ(1,2), (1/2)*(1-h/4)*φ(1,2), 0, 0],
[0, 0, φ(1), 0],
]
b = [
[(1/6)*φ(1)*(1+h/2),
(1/3)*φ(1),
(1/3)*φ(1),
(1/6)*φ(1)*(1-h/2)],
]
case "res_4s_krogstad": # weak 4th order, Krogstad
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, φ(2,3), 0, 0],
[0, 0, 2*φ(2,4), 0],
]
b = [
[0,
2*φ(2) - 4*φ(3),
2*φ(2) - 4*φ(3),
-φ(2) + 4*φ(3)],
]
#a = [row + [0] * (len(ci) - len(row)) for row in a]
a, b = gen_first_col_exp(a,b,ci,φ)
case "res_4s_strehmel_weiner": # weak 4th order, Strehmel & Weiner
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, c3*φ(2,3), 0, 0],
[0, -2*φ(2,4), 4*φ(2,4), 0],
]
b = [
[0,
0,
4*φ(2) - 8*φ(3),
-φ(2) + 4*φ(3)],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "lawson4_4s":
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci)
a2_1 = c2 * φ(0,2)
a3_2 = 1/2
a4_3 = φ(0,2)
b1 = (1/6) * φ(0)
b2 = (1/3) * φ(0,2)
b3 = (1/3) * φ(0,2)
b4 = 1/6
a = [
[0, 0, 0, 0],
[a2_1, 0, 0, 0],
[0, a3_2, 0, 0],
[0, 0, a4_3, 0],
]
b = [
[b1,b2,b3,b4],
]
case "genlawson41_4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci)
a3_2 = 1/2
a4_3 = φ(0,2)
b2 = (1/3) * φ(0,2)
b3 = (1/3) * φ(0,2)
b4 = 1/6
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, a3_2, 0, 0],
[0, 0, a4_3, 0],
]
b = [
[0,
b2,
b3,
b4,],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "modgenlawson41_4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774
c1,c2,c3,c4 = 0, 1/2, 1/2, 1
ci = [c1,c2,c3,c4]
φ = Phi(h, ci)
a3_2 = 1/2
a4_3 = φ(0,2)
b2 = (1/3) * φ(0,2)
b3 = (1/3) * φ(0,2)
b4 = φ(2) - (1/3)*φ(0,2)
a = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, a3_2, 0, 0],
[0, 0, a4_3, 0],
]
b = [
[0,
b2,
b3,
b4,],
]
a, b = gen_first_col_exp(a,b,ci,φ)
case "etdrk2_2s": # https://arxiv.org/pdf/2402.15142v1
c1,c2 = 0, 1
ci = [c1,c2]
φ = Phi(h, ci)
a = [
[0, 0],
[φ(1), 0],
]
b = [
[φ(1)-φ(2), φ(2)],
]
case "etdrk3_a_3s": # https://arxiv.org/pdf/2402.15142v1
c1,c2,c3 = 0, 1, 2/3
ci = [c1,c2,c3]
φ = Phi(h, ci)
a2_1 = c2*φ(1)
a3_2 = (4/9)*φ(2,3)
a3_1 = c3*φ(1,3) - a3_2
b2 = φ(2) - (1/2)*φ(1)
b3 = (3/4) * φ(1)
b1 = φ(1) - b2 - b3
a = [
[0, 0, 0],
[a2_1, 0, 0],
[a3_1, a3_2, 0 ]
]
b = [
[b1, b2, b3],
]
case "etdrk3_b_3s": # https://arxiv.org/pdf/2402.15142v1
c1,c2,c3 = 0, 4/9, 2/3
ci = [c1,c2,c3]
φ = Phi(h, ci)
a2_1 = c2*φ(1,2)
a3_2 = φ(2,3)
a3_1 = c3*φ(1,3) - a3_2
b2 = 0
b3 = (3/2) * φ(2)
b1 = φ(1) - b2 - b3
a = [
[0, 0, 0],
[a2_1, 0, 0],
[a3_1, a3_2, 0 ]
]
b = [
[b1, b2, b3],
]
case "dpmpp_2s":
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
a2_1 = c2 * phi(1, -h*c2)
b1 = (1 - 1/(2*c2)) * phi(1, -h)
b2 = (1/(2*c2)) * phi(1, -h)
a = [
[0, 0],
[a2_1, 0],
]
b = [
[b1, b2],
]
ci = [0, c2]
case "dpmpp_sde_2s":
c2 = 1.0 #hardcoded to 1.0 to more closely emulate the configuration for k-diffusion's implementation
a2_1 = c2 * phi(1, -h*c2)
b1 = (1 - 1/(2*c2)) * phi(1, -h)
b2 = (1/(2*c2)) * phi(1, -h)
a = [
[0, 0],
[a2_1, 0],
]
b = [
[b1, b2],
]
ci = [0, c2]
case "dpmpp_3s":
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
c3 = float(get_extra_options_kv("c3", str(c3), extra_options))
a2_1 = c2 * phi(1, -h*c2)
a3_2 = (c3**2 / c2) * phi(2, -h*c3)
a3_1 = c3 * phi(1, -h*c3) - a3_2
b2 = 0
b3 = (1/c3) * phi(2, -h)
b1 = phi(1, -h) - b2 - b3
a = [
[0, 0, 0],
[a2_1, 0, 0],
[a3_1, a3_2, 0],
]
b = [
[b1, b2, b3],
]
ci = [0, c2, c3]
case "res_5s": #4th order
c1, c2, c3, c4, c5 = 0, 1/2, 1/2, 1, 1/2
a2_1 = c2 * phi(1, -h * c2)
a3_2 = phi(2, -h * c3)
a3_1 = c3 * phi(1, -h * c3) - a3_2
#a3_1 = c3 * phi(1, -h * c3) - phi(2, -h * c3)
a4_2 = a4_3 = phi(2, -h * c4)
a4_1 = c4 * phi(1, -h * c4) - a4_2 - a4_3
#a4_1 = phi(1, -h * c4) - 2 * phi(2, -h * c4)
a5_2 = a5_3 = 0.5 * phi(2, -h * c5) - phi(3, -h * c4) + 0.25 * phi(2, -h * c4) - 0.5 * phi(3, -h * c5)
a5_4 = 0.25 * phi(2, -h * c5) - a5_2
a5_1 = c5 * phi(1, -h * c5) - a5_2 - a5_3 - a5_4
b2 = b3 = 0
b4 = -phi(2, -h) + 4*phi(3, -h)
b5 = 4 * phi(2, -h) - 8 * phi(3, -h)
#b1 = phi(1, -h) - 3 * phi(2, -h) + 4 * phi(3, -h)
b1 = phi(1,-h) - b2 - b3 - b4 - b5
a = [
[0, 0, 0, 0, 0],
[a2_1, 0, 0, 0, 0],
[a3_1, a3_2, 0, 0, 0],
[a4_1, a4_2, a4_3, 0, 0],
[a5_1, a5_2, a5_3, a5_4, 0],
]
b = [
[b1, b2, b3, b4, b5],
]
ci = [0., 0.5, 0.5, 1., 0.5]
case "res_6s": #4th order
c1, c2, c3, c4, c5, c6 = 0, 1/2, 1/2, 1/3, 1/3, 5/6
ci = [c1, c2, c3, c4, c5, c6]
φ = Phi(h, ci)
a2_1 = c2 * φ(1,2)
a3_1 = 0
a3_2 = (c3**2 / c2) * φ(2,3)
a4_1 = 0
a4_2 = (c4**2 / c2) * φ(2,4)
a4_3 = (c4**2 * φ(2,4) - a4_2 * c2) / c3
a5_1 = 0
a5_2 = 0 #zero
a5_3 = (-c4 * c5**2 * φ(2,5) + 2*c5**3 * φ(3,5)) / (c3 * (c3 - c4))
a5_4 = (-c3 * c5**2 * φ(2,5) + 2*c5**3 * φ(3,5)) / (c4 * (c4 - c3))
a6_1 = 0
a6_2 = 0 #zero
a6_3 = (-c4 * c6**2 * φ(2,6) + 2*c6**3 * φ(3,6)) / (c3 * (c3 - c4))
a6_4 = (-c3 * c6**2 * φ(2,6) + 2*c6**3 * φ(3,6)) / (c4 * (c4 - c3))
a6_5 = (c6**2 * φ(2,6) - a6_3*c3 - a6_4*c4) / c5
#a6_5_alt = (2*c6**3 * φ(3,6) - a6_3*c3**2 - a6_4*c4**2) / c5**2
b1 = 0
b2 = 0
b3 = 0
b4 = 0
b5 = (-c6*φ(2) + 2*φ(3)) / (c5 * (c5 - c6))
b6 = (-c5*φ(2) + 2*φ(3)) / (c6 * (c6 - c5))
a = [
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, a3_2, 0, 0, 0, 0],
[0, a4_2, a4_3, 0, 0, 0],
[0, a5_2, a5_3, a5_4, 0, 0],
[0, a6_2, a6_3, a6_4, a6_5, 0],
]
b = [
[0, b2, b3, b4, b5, b6],
]
for i in range(len(ci)):
a[i][0] = ci[i] * φ(1,i+1) - sum(a[i])
for i in range(len(b)):
b[i][0] = φ(1) - sum(b[i])
case "res_8s": #todo: add EKPRK5S8
c1, c2, c3, c4, c5, c6, c7, c8 = 0, 1/2, 1/2, 1/4, 1/2, 1/5, 2/3, 1
ci = [c1, c2, c3, c4, c5, c6, c7, c8]
φ = Phi(h, ci, analytic_solution=True)
a3_2 = (1/2) * φ(2,3)
a4_3 = (1/8) * φ(2,4)
a5_3 = (-1/2) * φ(2,5) + 2 * φ(3,5)
a5_4 = 2 * φ(2,5) - 4 * φ(3,5)
a6_4 = (8/25) * φ(2,6) - (32/125) * φ(3,6)
a6_5 = (2/25) * φ(2,6) - (1/2) * a6_4
a7_4 = (-125/162) * a6_4
a7_5 = (125/1944) * a6_4 - (16/27) * φ(2,7) + (320/81) * φ(3,7)
a7_6 = (3125/3888) * a6_4 + (100/27) * φ(2,7) - (800/81) * φ(3,7)
Φ = (5/32)*a6_4 - (1/28)*φ(2,6) + (36/175)*φ(2,7) - (48/25)*φ(3,7) + (6/175)*φ(4,6) + (192/35)*φ(4,7) + 6*φ(4,8)
a8_5 = (208/3)*φ(3,8) - (16/3) *φ(2,8) - 40*Φ
a8_6 = (-250/3)*φ(3,8) + (250/21)*φ(2,8) + (250/7)*Φ
a8_7 = -27*φ(3,8) + (27/14)*φ(2,8) + (135/7)*Φ
b6 = (125/14)*φ(2) - (625/14)*φ(3) + (1125/14)*φ(4)
b7 = (-27/14)*φ(2) + (162/7) *φ(3) - (405/7) *φ(4)
b8 = (1/2) *φ(2) - (13/2) *φ(3) + (45/2) *φ(4)
a2_1 = c2*φ(1,2)
a3_1 = c3*φ(1,3) - a3_2
a4_1 = c4*φ(1,4) - a4_3
a5_1 = c5*φ(1,5) - a5_3 - a5_4
a6_1 = c6*φ(1,6) - a6_4 - a6_5
a7_1 = c7*φ(1,7) - a7_4 - a7_5 - a7_6
a8_1 = c8*φ(1,8) - a8_5 - a8_6 - a8_7
b1 = φ(1) - b6 - b7 - b8
a = [
[0, 0, 0, 0, 0, 0, 0, 0],
[a2_1, 0, 0, 0, 0, 0, 0, 0],
[a3_1, a3_2, 0, 0, 0, 0, 0, 0],
[a4_1, 0, a4_3, 0, 0, 0, 0, 0],
[a5_1, 0, a5_3, a5_4, 0, 0, 0, 0],
[a6_1, 0, 0, a6_4, a6_5, 0, 0, 0],
[a7_1 , 0, 0, a7_4, a7_5, a7_6, 0, 0],
[a8_1 , 0, 0, 0, a8_5, a8_6, a8_7, 0],
]
b = [
[b1, 0, 0, 0, 0, b6, b7, b8],
]
a = [
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, a3_2, 0, 0, 0, 0, 0, 0],
[0, 0, a4_3, 0, 0, 0, 0, 0],
[0, 0, a5_3, a5_4, 0, 0, 0, 0],
[0, 0, 0, a6_4, a6_5, 0, 0, 0],
[0 , 0, 0, a7_4, a7_5, a7_6, 0, 0],
[0 , 0, 0, 0, a8_5, a8_6, a8_7, 0],
]
b = [
[0, 0, 0, 0, 0, b6, b7, b8],
]
for i in range(len(a)):
a[i][0] = ci[i] * φ(1,i+1) - sum(a[i])
for i in range(len(b)):
b[i][0] = φ(1) - sum(b[i])
case "res_10s":
c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 = 0, 1/2, 1/2, 1/3, 1/2, 1/3, 1/4, 3/10, 3/4, 1
ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10]
φ = Phi(h, ci, analytic_solution=True)
a3_2 = (c3**2 / c2) * φ(2,3)
a4_2 = (c4**2 / c2) * φ(2,4)
b8 = (c9*c10*φ(2) - 2*(c9+c10)*φ(3) + 6*φ(4)) / (c8 * (c8-c9) * (c8-c10))
b9 = (c8*c10*φ(2) - 2*(c8+c10)*φ(3) + 6*φ(4)) / (c9 * (c9-c8) * (c9-c10))
b10 = (c8*c9*φ(2) - 2*(c8+c9) *φ(3) + 6*φ(4)) / (c10 * (c10-c8) * (c10-c9))
a = [
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, a3_2, 0, 0, 0, 0, 0, 0, 0, 0],
[0, a4_2, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]
b = [
[0, 0, 0, 0, 0, 0, 0, b8, b9, b10],
]
# a5_3, a5_4
# a6_3, a6_4
# a7_3, a7_4
for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k
jk = [(3, 4), (4, 3)]
jk = list(permutations([3, 4], 2))
for j,k in jk:
a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]))
for i in range(8, 11): # i=8,9,10 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff
jkl = list(permutations([5, 6, 7], 3))
for j,k,l in jkl:
a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1]))
for i in range(len(a)):
a[i][0] = ci[i] * φ(1,i+1) - sum(a[i])
for i in range(len(b)):
b[i][0] = φ(1) - sum(b[i])
case "res_15s":
c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15 = 0, 1/2, 1/2, 1/3, 1/2, 1/5, 1/4, 18/25, 1/3, 3/10, 1/6, 90/103, 1/3, 3/10, 1/5
c1 = 0
c2 = c3 = c5 = 1/2
c4 = c9 = c13 = 1/3
c6 = c15 = 1/5
c7 = 1/4
c8 = 18/25
c10 = c14 = 3/10
c11 = 1/6
c12 = 90/103
c15 = 1/5
ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15]
φ = Phi(h, ci, analytic_solution=True)
a = [[0 for _ in range(15)] for _ in range(15)]
b = [[0 for _ in range(15)]]
for i in range(3, 5): # i=3,4 j=2
j=2
a[i-1][j-1] = (ci[i-1]**2 / ci[j-1]) * φ(j,i)
for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k
jk = list(permutations([3, 4], 2))
for j,k in jk:
a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / prod_diff(ci[j-1], ci[k-1])
for i in range(8, 12): # i=8,9,10,11 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff
jkl = list(permutations([5, 6, 7], 3))
for j,k,l in jkl:
a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1]))
for i in range(12,16): # i=12,13,14,15
jkld = list(permutations([8,9,10,11], 4))
for j,k,l,d in jkld:
numerator = -ci[i-1]**2 * ci[d-1]*ci[k-1]*ci[l-1] * φ(2,i) + 2*ci[i-1]**3 * (ci[d-1]*ci[k-1] + ci[d-1]*ci[l-1] + ci[k-1]*ci[l-1]) * φ(3,i) - 6*ci[i-1]**4 * (ci[d-1] + ci[k-1] + ci[l-1]) * φ(4,i) + 24*ci[i-1]**5 * φ(5,i)
a[i-1][j-1] = numerator / prod_diff(ci[j-1], ci[k-1], ci[l-1], ci[d-1])
"""ijkl = list(permutations([12,13,14,15], 4))
for i,j,k,l in ijkl:
#numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5)
#b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])
for jjj in range (2, 6): # 2,3,4,5
b[0][i-1] += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj)
b[0][i-1] /= prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])"""
ijkl = list(permutations([12,13,14,15], 4))
for i,j,k,l in ijkl:
numerator = 0
for jjj in range(2, 6): # 2, 3, 4, 5
numerator += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj)
#print(i,j,k,l)
b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])
ijkl = list(permutations([12, 13, 14, 15], 4))
selected_permutations = {}
sign = 1
for i in range(12, 16):
results = []
for j, k, l, d in ijkl:
if i != j and i != k and i != l and i != d:
numerator = 0
for jjj in range(2, 6): # 2, 3, 4, 5
numerator += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj)
theta_value = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])
results.append((theta_value, (i, j, k, l, d)))
results.sort(key=lambda x: abs(x[0]))
for theta_value, permutation in results:
if sign == 1 and theta_value > 0:
selected_permutations[i] = (theta_value, permutation)
sign *= -1
break
elif sign == -1 and theta_value < 0:
selected_permutations[i] = (theta_value, permutation)
sign *= -1
break
for i in range(12, 16):
if i in selected_permutations:
theta_value, (i, j, k, l, d) = selected_permutations[i]
b[0][i-1] = theta_value
for i in selected_permutations:
theta_value, permutation = selected_permutations[i]
#print(f"i={i}")
#print(f" Selected Theta: {theta_value:.6f}, Permutation: {permutation}")
for i in range(len(a)):
a[i][0] = ci[i] * φ(1,i+1) - sum(a[i])
for i in range(len(b)):
b[i][0] = φ(1) - sum(b[i])
case "res_16s": # 6th order without weakened order conditions
c1 = 0
c2 = c3 = c5 = c8 = c12 = 1/2
c4 = c11 = c15 = 1/3
c6 = c9 = c13 = 1/5
c7 = c10 = c14 = 1/4
c16 = 1
ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16]
φ = Phi(h, ci, analytic_solution=True)
a3_2 = (1/2) * φ(2,3)
a = [[0 for _ in range(16)] for _ in range(16)]
b = [[0 for _ in range(16)]]
for i in range(3, 5): # i=3,4 j=2
j=2
a[i-1][j-1] = (ci[i-1]**2 / ci[j-1]) * φ(j,i)
for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k
jk = list(permutations([3, 4], 2))
for j,k in jk:
a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / prod_diff(ci[j-1], ci[k-1])
for i in range(8, 12): # i=8,9,10,11 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff
jkl = list(permutations([5, 6, 7], 3))
for j,k,l in jkl:
a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1]))
for i in range(12,17): # i=12,13,14,15,16
jkld = list(permutations([8,9,10,11], 4))
for j,k,l,d in jkld:
numerator = -ci[i-1]**2 * ci[d-1]*ci[k-1]*ci[l-1] * φ(2,i) + 2*ci[i-1]**3 * (ci[d-1]*ci[k-1] + ci[d-1]*ci[l-1] + ci[k-1]*ci[l-1]) * φ(3,i) - 6*ci[i-1]**4 * (ci[d-1] + ci[k-1] + ci[l-1]) * φ(4,i) + 24*ci[i-1]**5 * φ(5,i)
a[i-1][j-1] = numerator / prod_diff(ci[j-1], ci[k-1], ci[l-1], ci[d-1])
"""ijdkl = list(permutations([12,13,14,15,16], 5))
for i,j,d,k,l in ijdkl:
#numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5)
b[0][i-1] = theta(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6)
#b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])"""
ijdkl = list(permutations([12,13,14,15,16], 5))
for i,j,d,k,l in ijdkl:
#numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5)
#numerator = theta_numerator(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta_numerator(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta_numerator(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta_numerator(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta_numerator(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6)
#b[0][i-1] = numerator / (ci[i-1] *, ci[d-1], ci[j-1], ci[k-1], ci[l-1])
#b[0][i-1] = numerator / denominator(ci[i-1], ci[d-1], ci[j-1], ci[k-1], ci[l-1])
b[0][i-1] = theta(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6)
ijdkl = list(permutations([12,13,14,15,16], 5))
for i,j,d,k,l in ijdkl:
numerator = 0
for jjj in range(2, 7): # 2, 3, 4, 5, 6
numerator += theta_numerator(jjj, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(jjj)
#print(i,j,d,k,l)
b[0][i-1] = numerator / (ci[i-1] * (ci[i-1] - ci[k-1]) * (ci[i-1] - ci[j-1] * (ci[i-1] - ci[d-1]) * (ci[i-1] - ci[l-1])))
for i in range(len(a)):
a[i][0] = ci[i] * φ(1,i+1) - sum(a[i])
for i in range(len(b)):
b[i][0] = φ(1) - sum(b[i])
case "irk_exp_diag_2s":
c1 = 1/3
c2 = 2/3
c1 = float(get_extra_options_kv("c1", str(c1), extra_options))
c2 = float(get_extra_options_kv("c2", str(c2), extra_options))
lam = (1 - torch.exp(-c1 * h)) / h
a2_1 = ( torch.exp(c2*h) - torch.exp(c1*h)) / (h * torch.exp(2*c1*h))
b1 = (1 + c2*h + torch.exp(h) * (-1 + h - c2*h)) / ((c1-c2) * h**2 * torch.exp(c1*h))
b2 = -(1 + c1*h - torch.exp(h) * ( 1 - h + c1*h)) / ((c1-c2) * h**2 * torch.exp(c2*h))
a = [
[lam, 0],
[a2_1, lam],
]
b = [
[b1, b2],
]
ci = [c1, c2]
ci = ci[:]
if rk_type.startswith("lob") == False:
ci.append(1)
return a, b, ci, multistep_stages, FSAL
def gen_first_col_exp(a, b, c, φ):
for i in range(len(c)):
a[i][0] = c[i] * φ(1,i+1) - sum(a[i])
for i in range(len(b)):
b[i][0] = φ(1) - sum(b[i])
return a, b
def rho(j, ci, ck, cl):
if j == 2:
numerator = ck*cl
if j == 3:
numerator = (-2 * (ck + cl))
if j == 4:
numerator = 6
return numerator / denominator(ci, ck, cl)
def mu(j, cd, ci, ck, cl):
if j == 2:
numerator = -cd * ck * cl
if j == 3:
numerator = 2 * (cd * ck + cd * cl + ck * cl)
if j == 4:
numerator = -6 * (cd + ck + cl)
if j == 5:
numerator = 24
return numerator / denominator(ci, cd, ck, cl)
def mu_numerator(j, cd, ci, ck, cl):
if j == 2:
numerator = -cd * ck * cl
if j == 3:
numerator = 2 * (cd * ck + cd * cl + ck * cl)
if j == 4:
numerator = -6 * (cd + ck + cl)
if j == 5:
numerator = 24
return numerator #/ denominator(ci, cd, ck, cl)
def theta_numerator(j, cd, ci, ck, cj, cl):
if j == 2:
numerator = -cj * cd * ck * cl
if j == 3:
numerator = 2 * (cj * ck * cd + cj*ck*cl + ck*cd*cl + cd*cl*cj)
if j == 4:
numerator = -6*(cj*ck + cj*cd + cj*cl + ck*cd + ck*cl + cd*cl)
if j == 5:
numerator = 24 * (cj + ck + cl + cd)
if j == 6:
numerator = -120
return numerator # / denominator(ci, cj, ck, cl, cd)
def theta(j, cd, ci, ck, cj, cl):
if j == 2:
numerator = -cj * cd * ck * cl
if j == 3:
numerator = 2 * (cj * ck * cd + cj*ck*cl + ck*cd*cl + cd*cl*cj)
if j == 4:
numerator = -6*(cj*ck + cj*cd + cj*cl + ck*cd + ck*cl + cd*cl)
if j == 5:
numerator = 24 * (cj + ck + cl + cd)
if j == 6:
numerator = -120
return numerator / ( ci * (ci - cj) * (ci - ck) * (ci - cl) * (ci - cd))
return numerator / denominator(ci, cj, ck, cl, cd)
def prod_diff(cj, ck, cl=None, cd=None, cblah=None):
if cl is None and cd is None:
return cj * (cj - ck)
if cd is None:
return cj * (cj - ck) * (cj - cl)
else:
return cj * (cj - ck) * (cj - cl) * (cj - cd)
def denominator(ci, *args):
result = ci
for arg in args:
result *= (ci - arg)
return result
def check_condition_4_2(nodes):
c12, c13, c14, c15 = nodes
term_1 = (1 / 5) * (c12 + c13 + c14 + c15)
term_2 = (1 / 4) * (c12 * c13 + c12 * c14 + c12 * c15 + c13 * c14 + c13 * c15 + c14 * c15)
term_3 = (1 / 3) * (c12 * c13 * c14 + c12 * c13 * c15 + c12 * c14 * c15 + c13 * c14 * c15)
term_4 = (1 / 2) * (c12 * c13 * c14 * c15)
result = term_1 - term_2 + term_3 - term_4
return abs(result - (1 / 6)) < 1e-6
================================================
FILE: legacy/rk_guide_func.py
================================================
import torch
import torch.nn.functional as F
from typing import Tuple
from einops import rearrange
from .sigmas import get_sigmas
from .latents import hard_light_blend, normalize_latent, initialize_or_scale
from .rk_method import RK_Method
from .helper import get_extra_options_kv, extra_options_flag, get_cosine_similarity, get_extra_options_list
import itertools
def normalize_inputs(x, y0, y0_inv, guide_mode, extra_options):
if guide_mode == "epsilon_guide_mean_std_from_bkg":
y0 = normalize_latent(y0, y0_inv)
input_norm = get_extra_options_kv("input_norm", "", extra_options)
input_std = float(get_extra_options_kv("input_std", "1.0", extra_options))
if input_norm == "input_ch_mean_set_std_to":
x = normalize_latent(x, set_std=input_std)
if input_norm == "input_ch_set_std_to":
x = normalize_latent(x, set_std=input_std, mean=False)
if input_norm == "input_mean_set_std_to":
x = normalize_latent(x, set_std=input_std, channelwise=False)
if input_norm == "input_std_set_std_to":
x = normalize_latent(x, set_std=input_std, mean=False, channelwise=False)
return x, y0, y0_inv
class LatentGuide:
def __init__(self, guides, x, model, sigmas, UNSAMPLE, LGW_MASK_RESCALE_MIN, extra_options, device='cuda', dtype=torch.float64, max_steps=10000):
self.model = model
self.sigma_min = model.inner_model.inner_model.model_sampling.sigma_min.to(dtype)
self.sigma_max = model.inner_model.inner_model.model_sampling.sigma_max.to(dtype)
self.sigmas = sigmas
self.UNSAMPLE = UNSAMPLE
self.SAMPLE = (sigmas[0] > sigmas[1])
self.extra_options = extra_options
self.y0 = torch.zeros_like(x)
self.y0_inv = torch.zeros_like(x)
self.guide_mode = ""
self.mask = None
self.mask_inv = None
self.latent_guide = None
self.latent_guide_inv = None
self.lgw_masks = []
self.lgw_masks_inv = []
self.lgw, self.lgw_inv = [torch.full_like(sigmas, 0.) for _ in range(2)]
self.guide_cossim_cutoff_, self.guide_bkg_cossim_cutoff_ = 1.0, 1.0
latent_guide_weight, latent_guide_weight_inv = 0.,0.
latent_guide_weights, latent_guide_weights_inv = None, None
latent_guide_weights = torch.zeros_like(sigmas)
latent_guide_weights_inv = torch.zeros_like(sigmas)
if guides is not None:
self.guide_mode, latent_guide_weight, latent_guide_weight_inv, latent_guide_weights, latent_guide_weights_inv, self.latent_guide, self.latent_guide_inv, latent_guide_mask, latent_guide_mask_inv, scheduler_, scheduler_inv_, steps_, steps_inv_, denoise_, denoise_inv_ = guides
self.mask, self.mask_inv = latent_guide_mask, latent_guide_mask_inv
self.guide_cossim_cutoff_, self.guide_bkg_cossim_cutoff_ = denoise_, denoise_inv_
if latent_guide_weights == None:
latent_guide_weights = get_sigmas(model, scheduler_, steps_, 1.0).to(x.dtype)
if latent_guide_weights_inv == None:
latent_guide_weights_inv = get_sigmas(model, scheduler_inv_, steps_inv_, 1.0).to(x.dtype)
latent_guide_weights = initialize_or_scale(latent_guide_weights, latent_guide_weight, max_steps).to(dtype)
latent_guide_weights_inv = initialize_or_scale(latent_guide_weights_inv, latent_guide_weight_inv, max_steps).to(dtype)
latent_guide_weights = F.pad(latent_guide_weights, (0, max_steps), value=0.0)
latent_guide_weights_inv = F.pad(latent_guide_weights_inv, (0, max_steps), value=0.0)
if latent_guide_weights is not None:
self.lgw = latent_guide_weights.to(x.device)
if latent_guide_weights_inv is not None:
self.lgw_inv = latent_guide_weights_inv.to(x.device)
self.mask, LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask, LGW_MASK_RESCALE_MIN)
if self.mask_inv is not None:
self.mask_inv, LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_inv, LGW_MASK_RESCALE_MIN)
elif not self.SAMPLE:
self.mask_inv = (1-self.mask)
for step in range(len(self.sigmas)-1):
lgw_mask, lgw_mask_inv = prepare_weighted_masks(self.mask, self.mask_inv, self.lgw[step], self.lgw_inv[step], self.latent_guide, self.latent_guide_inv, LGW_MASK_RESCALE_MIN)
self.lgw_masks.append(lgw_mask)
self.lgw_masks_inv.append(lgw_mask_inv)
def init_guides(self, x, noise_sampler, latent_guide=None, latent_guide_inv=None):
self.y0, self.y0_inv = torch.zeros_like(x), torch.zeros_like(x)
latent_guide = self.latent_guide if latent_guide is None else latent_guide
latent_guide_inv = self.latent_guide_inv if latent_guide_inv is None else latent_guide_inv
if latent_guide is not None:
if type(latent_guide) == dict:
latent_guide_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide['samples']).clone().to(x.device)
else:
latent_guide_samples = latent_guide
if self.SAMPLE:
self.y0 = latent_guide_samples
elif self.UNSAMPLE: # and self.mask is not None:
x = (1-self.mask) * x + self.mask * latent_guide_samples
else:
x = latent_guide_samples
if latent_guide_inv is not None:
if type(latent_guide_inv) == dict:
latent_guide_inv_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_inv['samples']).clone().to(x.device)
else:
latent_guide_inv_samples = latent_guide_inv
if self.SAMPLE:
self.y0_inv = latent_guide_inv_samples
elif self.UNSAMPLE: # and self.mask is not None:
x = (1-self.mask_inv) * x + self.mask_inv * latent_guide_inv_samples #fixed old approach, which was mask, (1-mask)
else:
x = latent_guide_inv_samples #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_INV AFTER SETTING TO LG above!
if self.UNSAMPLE and not self.SAMPLE: #sigma_next > sigma:
self.y0 = noise_sampler(sigma=self.sigma_max, sigma_next=self.sigma_min)
self.y0 = (self.y0 - self.y0.mean()) / self.y0.std()
self.y0_inv = noise_sampler(sigma=self.sigma_max, sigma_next=self.sigma_min)
self.y0_inv = (self.y0_inv - self.y0_inv.mean()) / self.y0_inv.std()
x, self.y0, self.y0_inv = normalize_inputs(x, self.y0, self.y0_inv, self.guide_mode, self.extra_options)
return x
def process_guides_substep(self, x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_, unsample_resample_scale, rk, rk_type, extra_options, frame_weights_grp=None):
y0 = self.y0
if self.y0.shape[0] > 1:
y0 = self.y0[min(step, self.y0.shape[0]-1)].unsqueeze(0)
y0_inv = self.y0_inv
lgw_mask = self.lgw_masks[step].clone()
lgw_mask_inv = self.lgw_masks_inv[step].clone() if self.lgw_masks_inv is not None else None
lgw = self.lgw[step]
lgw_inv = self.lgw_inv[step]
latent_guide = self.latent_guide
latent_guide_inv = self.latent_guide_inv
guide_mode = self.guide_mode
UNSAMPLE = self.UNSAMPLE
if x_0.dim() == 5 and frame_weights_grp is not None:
apply_frame_weights(lgw_mask, frame_weights_grp[0])
apply_frame_weights(lgw_mask_inv, frame_weights_grp[1])
if self.guide_mode:
data_norm = data_[row] - data_[row].mean(dim=(-2,-1), keepdim=True)
y0_norm = y0 - y0.mean(dim=(-2,-1), keepdim=True)
y0_inv_norm = y0_inv - y0_inv.mean(dim=(-2,-1), keepdim=True)
y0_cossim = get_cosine_similarity(data_norm*lgw_mask, y0_norm *lgw_mask)
y0_cossim_inv = get_cosine_similarity(data_norm*lgw_mask_inv, y0_inv_norm*lgw_mask_inv)
if y0_cossim < self.guide_cossim_cutoff_ or y0_cossim_inv < self.guide_bkg_cossim_cutoff_:
lgw_mask_cossim, lgw_mask_cossim_inv = lgw_mask, lgw_mask_inv
if y0_cossim >= self.guide_cossim_cutoff_:
lgw_mask_cossim = torch.zeros_like(lgw_mask)
if y0_cossim_inv >= self.guide_bkg_cossim_cutoff_:
lgw_mask_cossim_inv = torch.zeros_like(lgw_mask_inv)
lgw_mask = lgw_mask_cossim
lgw_mask_inv = lgw_mask_cossim_inv
else:
return eps_, x_
else:
return eps_, x_
if self.UNSAMPLE and RK_Method.is_exponential(rk_type):
if not (extra_options_flag("disable_power_unsample", extra_options) or extra_options_flag("disable_power_resample", extra_options)):
extra_options += "\npower_unsample\npower_resample\n"
if not extra_options_flag("disable_lgw_scaling_substep_ch_mean_std", extra_options):
extra_options += "\nsubstep_eps_ch_mean_std\n"
s_in = x_0.new_ones([x_0.shape[0]])
eps_orig = eps_.clone()
if extra_options_flag("dynamic_guides_mean_std", extra_options):
y_shift, y_inv_shift = normalize_latent([y0, y0_inv], [data_, data_])
y0 = y_shift
if extra_options_flag("dynamic_guides_inv", extra_options):
y0_inv = y_inv_shift
if extra_options_flag("dynamic_guides_mean", extra_options):
y_shift, y_inv_shift = normalize_latent([y0, y0_inv], [data_, data_], std=False)
y0 = y_shift
if extra_options_flag("dynamic_guides_inv", extra_options):
y0_inv = y_inv_shift
if "data" == guide_mode:
y0_tmp = y0.clone()
if latent_guide_inv is not None:
y0_tmp = (1-lgw_mask) * data_[row] + lgw_mask * y0
y0_tmp = (1-lgw_mask_inv) * y0_tmp + lgw_mask_inv * y0_inv
x_[row+1] = y0_tmp + eps_[row]
if guide_mode == "data_projection":
d_lerp = data_[row] + lgw_mask * (y0-data_[row]) + lgw_mask_inv * (y0_inv-data_[row])
d_collinear_d_lerp = get_collinear(data_[row], d_lerp)
d_lerp_ortho_d = get_orthogonal(d_lerp, data_[row])
data_[row] = d_collinear_d_lerp + d_lerp_ortho_d
x_[row+1] = data_[row] + eps_[row] * sigma
elif "epsilon" in guide_mode:
if sigma > sigma_next:
tol_value = float(get_extra_options_kv("tol", "-1.0", extra_options))
if tol_value >= 0 and (lgw > 0 or lgw_inv > 0):
for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])):
current_diff = torch.norm(data_[row][b][c] - y0 [b][c])
current_diff_inv = torch.norm(data_[row][b][c] - y0_inv[b][c])
lgw_scaled = torch.nan_to_num(1-(tol_value/current_diff), 0)
lgw_scaled_inv = torch.nan_to_num(1-(tol_value/current_diff_inv), 0)
lgw_tmp = min(lgw , lgw_scaled)
lgw_tmp_inv = min(lgw_inv, lgw_scaled_inv)
lgw_mask_clamp = torch.clamp(lgw_mask, max=lgw_tmp)
lgw_mask_clamp_inv = torch.clamp(lgw_mask_inv, max=lgw_tmp_inv)
eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type, b, c)
eps_[row][b][c] = eps_[row][b][c] + lgw_mask_clamp[b][c] * (eps_row - eps_[row][b][c]) + lgw_mask_clamp_inv[b][c] * (eps_row_inv - eps_[row][b][c])
elif guide_mode == "epsilon_projection":
eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type)
if extra_options_flag("eps_proj_v2", extra_options):
eps_row_lerp_fg = eps_[row] + lgw_mask * (eps_row-eps_[row])
eps_row_lerp_bg = eps_[row] + lgw_mask_inv * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp_fg = get_collinear(eps_[row], eps_row_lerp_fg)
eps_lerp_ortho_eps_fg = get_orthogonal(eps_row_lerp_fg, eps_[row])
eps_collinear_eps_lerp_bg = get_collinear(eps_[row], eps_row_lerp_bg)
eps_lerp_ortho_eps_bg = get_orthogonal(eps_row_lerp_bg, eps_[row])
eps_[row] = eps_[row] + lgw_mask * (eps_collinear_eps_lerp_fg + eps_lerp_ortho_eps_fg - eps_[row]) + lgw_mask_inv * (eps_collinear_eps_lerp_bg + eps_lerp_ortho_eps_bg - eps_[row])
elif extra_options_flag("eps_proj_v3", extra_options):
eps_collinear_eps_lerp_fg = get_collinear(eps_[row], eps_row)
eps_lerp_ortho_eps_fg = get_orthogonal(eps_row, eps_[row])
eps_collinear_eps_lerp_bg = get_collinear(eps_[row], eps_row_inv)
eps_lerp_ortho_eps_bg = get_orthogonal(eps_row_inv, eps_[row])
eps_[row] = eps_[row] + lgw_mask * (eps_collinear_eps_lerp_fg + eps_lerp_ortho_eps_fg - eps_[row]) + lgw_mask_inv * (eps_collinear_eps_lerp_bg + eps_lerp_ortho_eps_bg - eps_[row])
elif extra_options_flag("eps_proj_v5", extra_options):
eps2g_collin = get_collinear(eps_[row], eps_row)
g2eps_ortho = get_orthogonal(eps_row, eps_[row])
g2eps_collin = get_collinear(eps_row, eps_[row])
eps2g_ortho = get_orthogonal(eps_[row], eps_row)
eps2i_collin = get_collinear(eps_[row], eps_row_inv)
i2eps_ortho = get_orthogonal(eps_row_inv, eps_[row])
i2eps_collin = get_collinear(eps_row_inv, eps_[row])
eps2i_ortho = get_orthogonal(eps_[row], eps_row_inv)
#eps_[row] = (eps2g_collin+g2eps_ortho) + (g2eps_collin+eps2g_ortho) + (eps2i_collin+i2eps_ortho) + (i2eps_collin+eps2i_ortho)
#eps_[row] = eps_[row] + lgw_mask * (eps2g_collin+g2eps_ortho) + (1-lgw_mask) * (g2eps_collin+eps2g_ortho) + lgw_mask_inv * (eps2i_collin+i2eps_ortho) + (1-lgw_mask_inv) * (i2eps_collin+eps2i_ortho)
eps_[row] = lgw_mask * (eps2g_collin+g2eps_ortho) - lgw_mask * (g2eps_collin+eps2g_ortho) + lgw_mask_inv * (eps2i_collin+i2eps_ortho) - lgw_mask_inv * (i2eps_collin+eps2i_ortho)
#eps_[row] = eps_[row] + lgw_mask * (eps_collinear_eps_lerp_fg + eps_lerp_ortho_eps_fg - eps_[row]) + lgw_mask_inv * (eps_collinear_eps_lerp_bg + eps_lerp_ortho_eps_bg - eps_[row])
elif extra_options_flag("eps_proj_v4a", extra_options):
eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row])
eps_[row] = (1 - torch.clamp(lgw_mask + lgw_mask_inv, max=1.0)) * eps_[row] + torch.clamp((lgw_mask + lgw_mask_inv), max=1.0) * (eps_collinear_eps_lerp + eps_lerp_ortho_eps)
elif extra_options_flag("eps_proj_v4b", extra_options):
eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row])
eps_[row] = (1 - (lgw_mask + lgw_mask_inv)/2) * eps_[row] + ((lgw_mask + lgw_mask_inv)/2) * (eps_collinear_eps_lerp + eps_lerp_ortho_eps)
elif extra_options_flag("eps_proj_v4c", extra_options):
eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row])
lgw_mask_sum = (lgw_mask + lgw_mask_inv)
eps_[row] = (1 - (lgw_mask + lgw_mask_inv)/2) * eps_[row] + ((lgw_mask + lgw_mask_inv)/2) * (eps_collinear_eps_lerp + eps_lerp_ortho_eps)
elif extra_options_flag("eps_proj_v4e", extra_options):
eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row])
eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps
eps_[row] = eps_[row] + self.mask * (eps_sum - eps_[row]) + self.mask_inv * (eps_sum - eps_[row])
elif extra_options_flag("eps_proj_self1", extra_options):
eps_row_lerp = eps_[row] + self.mask * (eps_row-eps_[row]) + self.mask_inv * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_[row])
eps_lerp_ortho_eps = get_orthogonal(eps_[row], eps_[row])
eps_[row] = eps_collinear_eps_lerp + eps_lerp_ortho_eps
elif extra_options_flag("eps_proj_v4z", extra_options):
eps_row_lerp = eps_[row] + self.mask * (eps_row-eps_[row]) + self.mask_inv * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row])
peak = max(lgw, lgw_inv)
lgw_mask_sum = (lgw_mask + lgw_mask_inv)
eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps
#NOT FINISHED!!!
#eps_[row] = eps_[row] + lgw_mask * (eps_sum - eps_[row]) + lgw_mask_inv * (eps_sum - eps_[row])
elif extra_options_flag("eps_proj_v5", extra_options):
eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row])
eps_[row] = ((lgw_mask + lgw_mask_inv)==0) * eps_[row] + ((lgw_mask + lgw_mask_inv)>0) * (eps_collinear_eps_lerp + eps_lerp_ortho_eps)
elif extra_options_flag("eps_proj_v6", extra_options):
eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row])
eps_[row] = ((lgw_mask * lgw_mask_inv)==0) * eps_[row] + ((lgw_mask * lgw_mask_inv)>0) * (eps_collinear_eps_lerp + eps_lerp_ortho_eps)
elif extra_options_flag("eps_proj_old_default", extra_options):
eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row])
#eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + (1-lgw_mask) * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row])
eps_[row] = eps_collinear_eps_lerp + eps_lerp_ortho_eps
else: #elif extra_options_flag("eps_proj_v4d", extra_options):
#if row > 0:
#lgw_mask_factor = float(get_extra_options_kv("substep_lgw_mask_factor", "1.0", extra_options))
#lgw_mask_inv_factor = float(get_extra_options_kv("substep_lgw_mask_inv_factor", "1.0", extra_options))
lgw_mask_factor = 1
if extra_options_flag("substep_eps_proj_scaling", extra_options):
lgw_mask_factor = 1/(row+1)
if extra_options_flag("substep_eps_proj_factors", extra_options):
#value_str = get_extra_options_list("substep_eps_proj_factors", "", extra_options)
#float_list = [float(item.strip()) for item in value_str.split(',') if item.strip()]
float_list = get_extra_options_list("substep_eps_proj_factors", "", extra_options, ret_type=float)
lgw_mask_factor = float_list[row]
eps_row_lerp = eps_[row] + self.mask * (eps_row-eps_[row]) + (1-self.mask) * (eps_row_inv-eps_[row])
eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp)
eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row])
eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps
eps_[row] = eps_[row] + lgw_mask_factor*lgw_mask * (eps_sum - eps_[row]) + lgw_mask_factor*lgw_mask_inv * (eps_sum - eps_[row])
elif extra_options_flag("disable_lgw_scaling", extra_options):
eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type)
eps_[row] = eps_[row] + lgw_mask * (eps_row - eps_[row]) + lgw_mask_inv * (eps_row_inv - eps_[row])
elif (lgw > 0 or lgw_inv > 0): # default old channelwise epsilon
avg, avg_inv = 0, 0
for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])):
avg += torch.norm(data_[row][b][c] - y0 [b][c])
avg_inv += torch.norm(data_[row][b][c] - y0_inv[b][c])
avg /= x_0.shape[1]
avg_inv /= x_0.shape[1]
for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])):
ratio = torch.nan_to_num(torch.norm(data_[row][b][c] - y0 [b][c]) / avg, 0)
ratio_inv = torch.nan_to_num(torch.norm(data_[row][b][c] - y0_inv[b][c]) / avg_inv, 0)
eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type, b, c)
eps_[row][b][c] = eps_[row][b][c] + ratio * lgw_mask[b][c] * (eps_row - eps_[row][b][c]) + ratio_inv * lgw_mask_inv[b][c] * (eps_row_inv - eps_[row][b][c])
temporal_smoothing = float(get_extra_options_kv("temporal_smoothing", "0.0", extra_options))
if temporal_smoothing > 0:
eps_[row] = apply_temporal_smoothing(eps_[row], temporal_smoothing)
elif (UNSAMPLE or guide_mode in {"resample", "unsample"}) and (lgw > 0 or lgw_inv > 0):
cvf = rk.get_epsilon(x_0, x_[row+1], y0, sigma, s_[row], sigma_down, unsample_resample_scale, extra_options)
if UNSAMPLE and sigma > sigma_next and latent_guide_inv is not None:
cvf_inv = rk.get_epsilon(x_0, x_[row+1], y0_inv, sigma, s_[row], sigma_down, unsample_resample_scale, extra_options)
else:
cvf_inv = torch.zeros_like(cvf)
tol_value = float(get_extra_options_kv("tol", "-1.0", extra_options))
if tol_value >= 0:
for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])):
current_diff = torch.norm(data_[row][b][c] - y0 [b][c])
current_diff_inv = torch.norm(data_[row][b][c] - y0_inv[b][c])
lgw_scaled = torch.nan_to_num(1-(tol_value/current_diff), 0)
lgw_scaled_inv = torch.nan_to_num(1-(tol_value/current_diff_inv), 0)
lgw_tmp = min(lgw , lgw_scaled)
lgw_tmp_inv = min(lgw_inv, lgw_scaled_inv)
lgw_mask_clamp = torch.clamp(lgw_mask, max=lgw_tmp)
lgw_mask_clamp_inv = torch.clamp(lgw_mask_inv, max=lgw_tmp_inv)
eps_[row][b][c] = eps_[row][b][c] + lgw_mask_clamp[b][c] * (cvf[b][c] - eps_[row][b][c]) + lgw_mask_clamp_inv[b][c] * (cvf_inv[b][c] - eps_[row][b][c])
elif extra_options_flag("disable_lgw_scaling", extra_options):
eps_[row] = eps_[row] + lgw_mask * (cvf - eps_[row]) + lgw_mask_inv * (cvf_inv - eps_[row])
else:
avg, avg_inv = 0, 0
for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])):
avg += torch.norm(lgw_mask [b][c] * data_[row][b][c] - lgw_mask [b][c] * y0 [b][c])
avg_inv += torch.norm(lgw_mask_inv[b][c] * data_[row][b][c] - lgw_mask_inv[b][c] * y0_inv[b][c])
avg /= x_0.shape[1]
avg_inv /= x_0.shape[1]
for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])):
ratio = torch.nan_to_num(torch.norm(lgw_mask [b][c] * data_[row][b][c] - lgw_mask [b][c] * y0 [b][c]) / avg, 0)
ratio_inv = torch.nan_to_num(torch.norm(lgw_mask_inv[b][c] * data_[row][b][c] - lgw_mask_inv[b][c] * y0_inv[b][c]) / avg_inv, 0)
eps_[row][b][c] = eps_[row][b][c] + ratio * lgw_mask[b][c] * (cvf[b][c] - eps_[row][b][c]) + ratio_inv * lgw_mask_inv[b][c] * (cvf_inv[b][c] - eps_[row][b][c])
if extra_options_flag("substep_eps_ch_mean_std", extra_options):
eps_[row] = normalize_latent(eps_[row], eps_orig[row])
if extra_options_flag("substep_eps_ch_mean", extra_options):
eps_[row] = normalize_latent(eps_[row], eps_orig[row], std=False)
if extra_options_flag("substep_eps_ch_std", extra_options):
eps_[row] = normalize_latent(eps_[row], eps_orig[row], mean=False)
if extra_options_flag("substep_eps_mean_std", extra_options):
eps_[row] = normalize_latent(eps_[row], eps_orig[row], channelwise=False)
if extra_options_flag("substep_eps_mean", extra_options):
eps_[row] = normalize_latent(eps_[row], eps_orig[row], std=False, channelwise=False)
if extra_options_flag("substep_eps_std", extra_options):
eps_[row] = normalize_latent(eps_[row], eps_orig[row], mean=False, channelwise=False)
return eps_, x_
@torch.no_grad
def process_guides_poststep(self, x, denoised, eps, step, extra_options):
x_orig = x.clone()
mean_weight = float(get_extra_options_kv("mean_weight", "0.01", extra_options))
y0 = self.y0
if self.y0.shape[0] > 1:
y0 = self.y0[min(step, self.y0.shape[0]-1)].unsqueeze(0)
y0_inv = self.y0_inv
lgw_mask = self.lgw_masks[step].clone()
lgw_mask_inv = self.lgw_masks_inv[step].clone() if self.lgw_masks_inv is not None else None
mask = self.mask #needed for bitwise mask below
lgw = self.lgw[step]
lgw_inv = self.lgw_inv[step]
latent_guide = self.latent_guide
latent_guide_inv = self.latent_guide_inv
guide_mode = self.guide_mode
UNSAMPLE = self.UNSAMPLE
if self.guide_mode:
data_norm = denoised - denoised.mean(dim=(-2,-1), keepdim=True)
y0_norm = y0 - y0.mean(dim=(-2,-1), keepdim=True)
y0_inv_norm = y0_inv - y0_inv.mean(dim=(-2,-1), keepdim=True)
y0_cossim = get_cosine_similarity(data_norm*lgw_mask, y0_norm *lgw_mask)
y0_cossim_inv = get_cosine_similarity(data_norm*lgw_mask_inv, y0_inv_norm*lgw_mask_inv)
if y0_cossim < self.guide_cossim_cutoff_ or y0_cossim_inv < self.guide_bkg_cossim_cutoff_:
lgw_mask_cossim, lgw_mask_cossim_inv = lgw_mask, lgw_mask_inv
if y0_cossim >= self.guide_cossim_cutoff_:
lgw_mask_cossim = torch.zeros_like(lgw_mask)
if y0_cossim_inv >= self.guide_bkg_cossim_cutoff_:
lgw_mask_cossim_inv = torch.zeros_like(lgw_mask_inv)
lgw_mask = lgw_mask_cossim
lgw_mask_inv = lgw_mask_cossim_inv
else:
return x
if guide_mode in {"epsilon_dynamic_mean_std", "epsilon_dynamic_mean", "epsilon_dynamic_std", "epsilon_dynamic_mean_from_bkg"}:
denoised_masked = denoised * ((mask==1)*mask)
denoised_masked_inv = denoised * ((mask==0)*(1-mask))
d_shift, d_shift_inv = torch.zeros_like(x), torch.zeros_like(x)
for b, c in itertools.product(range(x.shape[0]), range(x.shape[1])):
denoised_mask = denoised[b][c][mask[b][c] == 1]
denoised_mask_inv = denoised[b][c][mask[b][c] == 0]
if guide_mode == "epsilon_dynamic_mean_std":
d_shift[b][c] = (denoised_masked[b][c] - denoised_mask.mean()) / denoised_mask.std()
d_shift[b][c] = (d_shift[b][c] * denoised_mask_inv.std()) + denoised_mask_inv.mean()
elif guide_mode == "epsilon_dynamic_mean":
d_shift[b][c] = denoised_masked[b][c] - denoised_mask.mean() + denoised_mask_inv.mean()
d_shift_inv[b][c] = denoised_masked_inv[b][c] - denoised_mask_inv.mean() + denoised_mask.mean()
elif guide_mode == "epsilon_dynamic_mean_from_bkg":
d_shift[b][c] = denoised_masked[b][c] - denoised_mask.mean() + denoised_mask_inv.mean()
if guide_mode in {"epsilon_dynamic_mean_std", "epsilon_dynamic_mean_from_bkg"}:
denoised_shifted = denoised + mean_weight * lgw_mask * (d_shift - denoised_masked)
elif guide_mode == "epsilon_dynamic_mean":
denoised_shifted = denoised + mean_weight * lgw_mask * (d_shift - denoised_masked) + mean_weight * lgw_mask_inv * (d_shift_inv - denoised_masked_inv)
x = denoised_shifted + eps
if UNSAMPLE == False and (latent_guide is not None or latent_guide_inv is not None) and guide_mode in ("hard_light", "blend", "blend_projection", "mean_std", "mean", "mean_tiled", "std"):
if guide_mode == "hard_light":
d_shift, d_shift_inv = hard_light_blend(y0, denoised), hard_light_blend(y0_inv, denoised)
elif guide_mode == "blend":
d_shift, d_shift_inv = y0, y0_inv
elif guide_mode == "blend_projection":
#d_shift = get_collinear(denoised, y0)
#d_shift_inv = get_collinear(denoised, y0_inv)
d_lerp = denoised + lgw_mask * (y0-denoised) + lgw_mask_inv * (y0_inv-denoised)
d_collinear_d_lerp = get_collinear(denoised, d_lerp)
d_lerp_ortho_d = get_orthogonal(d_lerp, denoised)
denoised_shifted = d_collinear_d_lerp + d_lerp_ortho_d
x = denoised_shifted + eps
return x
elif guide_mode == "mean_std":
d_shift, d_shift_inv = normalize_latent([denoised, denoised], [y0, y0_inv])
elif guide_mode == "mean":
d_shift, d_shift_inv = normalize_latent([denoised, denoised], [y0, y0_inv], std=False)
elif guide_mode == "std":
d_shift, d_shift_inv = normalize_latent([denoised, denoised], [y0, y0_inv], mean=False)
elif guide_mode == "mean_tiled":
mean_tile_size = int(get_extra_options_kv("mean_tile", "8", extra_options))
y0_tiled = rearrange(y0, "b c (h t1) (w t2) -> (t1 t2) b c h w", t1=mean_tile_size, t2=mean_tile_size)
y0_inv_tiled = rearrange(y0_inv, "b c (h t1) (w t2) -> (t1 t2) b c h w", t1=mean_tile_size, t2=mean_tile_size)
denoised_tiled = rearrange(denoised, "b c (h t1) (w t2) -> (t1 t2) b c h w", t1=mean_tile_size, t2=mean_tile_size)
d_shift_tiled, d_shift_inv_tiled = torch.zeros_like(y0_tiled), torch.zeros_like(y0_tiled)
for i in range(y0_tiled.shape[0]):
d_shift_tiled[i], d_shift_inv_tiled[i] = normalize_latent([denoised_tiled[i], denoised_tiled[i]], [y0_tiled[i], y0_inv_tiled[i]], std=False)
d_shift = rearrange(d_shift_tiled, "(t1 t2) b c h w -> b c (h t1) (w t2)", t1=mean_tile_size, t2=mean_tile_size)
d_shift_inv = rearrange(d_shift_inv_tiled, "(t1 t2) b c h w -> b c (h t1) (w t2)", t1=mean_tile_size, t2=mean_tile_size)
if guide_mode in ("hard_light", "blend", "mean_std", "mean", "mean_tiled", "std"):
if latent_guide_inv is None:
denoised_shifted = denoised + lgw_mask * (d_shift - denoised)
else:
denoised_shifted = denoised + lgw_mask * (d_shift - denoised) + lgw_mask_inv * (d_shift_inv - denoised)
if extra_options_flag("poststep_denoised_ch_mean_std", extra_options):
denoised_shifted = normalize_latent(denoised_shifted, denoised)
if extra_options_flag("poststep_denoised_ch_mean", extra_options):
denoised_shifted = normalize_latent(denoised_shifted, denoised, std=False)
if extra_options_flag("poststep_denoised_ch_std", extra_options):
denoised_shifted = normalize_latent(denoised_shifted, denoised, mean=False)
if extra_options_flag("poststep_denoised_mean_std", extra_options):
denoised_shifted = normalize_latent(denoised_shifted, denoised, channelwise=False)
if extra_options_flag("poststep_denoised_mean", extra_options):
denoised_shifted = normalize_latent(denoised_shifted, denoised, std=False, channelwise=False)
if extra_options_flag("poststep_denoised_std", extra_options):
denoised_shifted = normalize_latent(denoised_shifted, denoised, mean=False, channelwise=False)
x = denoised_shifted + eps
if extra_options_flag("poststep_x_ch_mean_std", extra_options):
x = normalize_latent(x, x_orig)
if extra_options_flag("poststep_x_ch_mean", extra_options):
x = normalize_latent(x, x_orig, std=False)
if extra_options_flag("poststep_x_ch_std", extra_options):
x = normalize_latent(x, x_orig, mean=False)
if extra_options_flag("poststep_x_mean_std", extra_options):
x = normalize_latent(x, x_orig, channelwise=False)
if extra_options_flag("poststep_x_mean", extra_options):
x = normalize_latent(x, x_orig, std=False, channelwise=False)
if extra_options_flag("poststep_x_std", extra_options):
x = normalize_latent(x, x_orig, mean=False, channelwise=False)
return x
def apply_frame_weights(mask, frame_weights):
if frame_weights is not None:
for f in range(mask.shape[2]):
frame_weight = frame_weights[f]
mask[..., f:f+1, :, :] *= frame_weight
def prepare_mask(x, mask, LGW_MASK_RESCALE_MIN) -> Tuple[torch.Tensor, bool]:
if mask is None:
mask = torch.ones_like(x)
LGW_MASK_RESCALE_MIN = False
return mask, LGW_MASK_RESCALE_MIN
spatial_mask = mask.unsqueeze(1)
target_height = x.shape[-2]
target_width = x.shape[-1]
spatial_mask = F.interpolate(spatial_mask, size=(target_height, target_width), mode='bilinear', align_corners=False)
while spatial_mask.dim() < x.dim():
spatial_mask = spatial_mask.unsqueeze(2)
repeat_shape = [1] #batch
for i in range(1, x.dim() - 2):
repeat_shape.append(x.shape[i])
repeat_shape.extend([1, 1]) #height and width
mask = spatial_mask.repeat(*repeat_shape).to(x.dtype).to(x.device)
del spatial_mask
return mask, LGW_MASK_RESCALE_MIN
def prepare_weighted_masks(mask, mask_inv, lgw_, lgw_inv_, latent_guide, latent_guide_inv, LGW_MASK_RESCALE_MIN):
if LGW_MASK_RESCALE_MIN:
lgw_mask = mask * (1-lgw_) + lgw_
lgw_mask_inv = (1-mask) * (1-lgw_inv_) + lgw_inv_
else:
if latent_guide is not None:
lgw_mask = mask * lgw_
else:
lgw_mask = torch.zeros_like(mask)
if latent_guide_inv is not None:
if mask_inv is not None:
lgw_mask_inv = torch.minimum(1-mask_inv, (1-mask) * lgw_inv_)
else:
lgw_mask_inv = (1-mask) * lgw_inv_
else:
lgw_mask_inv = torch.zeros_like(mask)
return lgw_mask, lgw_mask_inv
def apply_temporal_smoothing(tensor, temporal_smoothing):
if temporal_smoothing <= 0 or tensor.dim() != 5:
return tensor
kernel_size = 5
padding = kernel_size // 2
temporal_kernel = torch.tensor(
[0.1, 0.2, 0.4, 0.2, 0.1],
device=tensor.device, dtype=tensor.dtype
) * temporal_smoothing
temporal_kernel[kernel_size//2] += (1 - temporal_smoothing)
temporal_kernel = temporal_kernel / temporal_kernel.sum()
# resahpe for conv1d
b, c, f, h, w = tensor.shape
data_flat = tensor.permute(0, 1, 3, 4, 2).reshape(-1, f)
# apply smoohting
data_smooth = F.conv1d(
data_flat.unsqueeze(1),
temporal_kernel.view(1, 1, -1),
padding=padding
).squeeze(1)
return data_smooth.view(b, c, h, w, f).permute(0, 1, 4, 2, 3)
def get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type, b=None, c=None):
s_in = x_0.new_ones([x_0.shape[0]])
if b is not None and c is not None:
index = (b, c)
elif b is not None:
index = (b,)
else:
index = ()
if RK_Method.is_exponential(rk_type):
eps_row = y0 [index] - x_0[index]
eps_row_inv = y0_inv[index] - x_0[index]
else:
eps_row = (x_[row+1][index] - y0 [index]) / (s_[row] * s_in)
eps_row_inv = (x_[row+1][index] - y0_inv[index]) / (s_[row] * s_in)
return eps_row, eps_row_inv
def get_guide_epsilon(x_0, x_, y0, sigma, rk_type, b=None, c=None):
s_in = x_0.new_ones([x_0.shape[0]])
if b is not None and c is not None:
index = (b, c)
elif b is not None:
index = (b,)
else:
index = ()
if RK_Method.is_exponential(rk_type):
eps = y0 [index] - x_0[index]
else:
eps = (x_[index] - y0 [index]) / (sigma * s_in)
return eps
@torch.no_grad
def noise_cossim_guide_tiled(x_list, guide, cossim_mode="forward", tile_size=2, step=0):
guide_tiled = rearrange(guide, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)
x_tiled_list = [
rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)
for x in x_list
]
x_tiled_stack = torch.stack([x_tiled[0] for x_tiled in x_tiled_list]) # [n_x, n_tiles, c, h, w]
guide_flat = guide_tiled[0].view(guide_tiled.shape[1], -1).unsqueeze(0) # [1, n_tiles, c*h*w]
x_flat = x_tiled_stack.view(x_tiled_stack.size(0), x_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w]
cossim_tmp_all = F.cosine_similarity(x_flat, guide_flat, dim=-1) # [n_x, n_tiles]
if cossim_mode == "forward":
indices = cossim_tmp_all.argmax(dim=0)
elif cossim_mode == "reverse":
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal":
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
elif cossim_mode == "forward_reverse":
if step % 2 == 0:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_forward":
if step % 2 == 1:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal_reverse":
if step % 2 == 0:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_orthogonal":
if step % 2 == 1:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
else:
target_value = float(cossim_mode)
indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0)
x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w]
x_tiled_out = x_tiled_out.unsqueeze(0)
x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size)
return x_detiled
@torch.no_grad
def noise_cossim_eps_tiled(x_list, eps, noise_list, cossim_mode="forward", tile_size=2, step=0):
eps_tiled = rearrange(eps, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)
x_tiled_list = [
rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)
for x in x_list
]
noise_tiled_list = [
rearrange(noise, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)
for noise in noise_list
]
noise_tiled_stack = torch.stack([noise_tiled[0] for noise_tiled in noise_tiled_list]) # [n_x, n_tiles, c, h, w]
eps_expanded = eps_tiled[0].view(eps_tiled.shape[1], -1).unsqueeze(0) # [1, n_tiles, c*h*w]
noise_flat = noise_tiled_stack.view(noise_tiled_stack.size(0), noise_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w]
cossim_tmp_all = F.cosine_similarity(noise_flat, eps_expanded, dim=-1) # [n_x, n_tiles]
if cossim_mode == "forward":
indices = cossim_tmp_all.argmax(dim=0)
elif cossim_mode == "reverse":
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal":
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
elif cossim_mode == "orthogonal_pos":
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
elif cossim_mode == "orthogonal_neg":
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "orthogonal_posneg":
if step % 2 == 0:
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
else:
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "orthogonal_negpos":
if step % 2 == 1:
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
else:
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "forward_reverse":
if step % 2 == 0:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_forward":
if step % 2 == 1:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal_reverse":
if step % 2 == 0:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_orthogonal":
if step % 2 == 1:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
else:
target_value = float(cossim_mode)
indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0)
#else:
# raise ValueError(f"Unknown cossim_mode: {cossim_mode}")
x_tiled_stack = torch.stack([x_tiled[0] for x_tiled in x_tiled_list]) # [n_x, n_tiles, c, h, w]
x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w]
x_tiled_out = x_tiled_out.unsqueeze(0) # restore batch dim
x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size)
return x_detiled
@torch.no_grad
def noise_cossim_guide_eps_tiled(x_0, x_list, y0, noise_list, cossim_mode="forward", tile_size=2, step=0, sigma=None, rk_type=None):
x_tiled_stack = torch.stack([
rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0]
for x in x_list
]) # [n_x, n_tiles, c, h, w]
eps_guide_stack = torch.stack([
rearrange(x - y0, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0]
for x in x_list
]) # [n_x, n_tiles, c, h, w]
del x_list
noise_tiled_stack = torch.stack([
rearrange(noise, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0]
for noise in noise_list
]) # [n_x, n_tiles, c, h, w]
del noise_list
noise_flat = noise_tiled_stack.view(noise_tiled_stack.size(0), noise_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w]
eps_guide_flat = eps_guide_stack.view(eps_guide_stack.size(0), eps_guide_stack.size(1), -1) # [n_x, n_tiles, c*h*w]
cossim_tmp_all = F.cosine_similarity(noise_flat, eps_guide_flat, dim=-1) # [n_x, n_tiles]
del noise_tiled_stack, noise_flat, eps_guide_stack, eps_guide_flat
if cossim_mode == "forward":
indices = cossim_tmp_all.argmax(dim=0)
elif cossim_mode == "reverse":
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal":
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
elif cossim_mode == "orthogonal_pos":
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
elif cossim_mode == "orthogonal_neg":
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "orthogonal_posneg":
if step % 2 == 0:
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
else:
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "orthogonal_negpos":
if step % 2 == 1:
positive_mask = cossim_tmp_all > 0
positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf')))
indices = positive_tmp.argmin(dim=0)
else:
negative_mask = cossim_tmp_all < 0
negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf')))
indices = negative_tmp.argmax(dim=0)
elif cossim_mode == "forward_reverse":
if step % 2 == 0:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_forward":
if step % 2 == 1:
indices = cossim_tmp_all.argmax(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "orthogonal_reverse":
if step % 2 == 0:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
elif cossim_mode == "reverse_orthogonal":
if step % 2 == 1:
indices = torch.abs(cossim_tmp_all).argmin(dim=0)
else:
indices = cossim_tmp_all.argmin(dim=0)
else:
target_value = float(cossim_mode)
indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0)
x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w]
del x_tiled_stack
x_tiled_out = x_tiled_out.unsqueeze(0)
x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size)
return x_detiled
def get_collinear(x, y):
y_flat = y.view(y.size(0), -1).clone()
x_flat = x.view(x.size(0), -1).clone()
y_flat /= y_flat.norm(dim=-1, keepdim=True)
x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat
return x_proj_y.view_as(x)
def get_orthogonal(x, y):
y_flat = y.view(y.size(0), -1).clone()
x_flat = x.view(x.size(0), -1).clone()
y_flat /= y_flat.norm(dim=-1, keepdim=True)
x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat
x_ortho_y = x_flat - x_proj_y
return x_ortho_y.view_as(x)
def get_orthogonal_noise_from_channelwise(*refs, max_iter=500, max_score=1e-15):
noise, *refs = refs
noise_tmp = noise.clone()
#b,c,h,w = noise.shape
if (noise.dim() == 4):
b,ch,h,w = noise.shape
elif (noise.dim() == 5):
b,ch,t,h,w = noise.shape
for i in range(max_iter):
noise_tmp = gram_schmidt_channels_optimized(noise_tmp, *refs)
cossim_scores = []
for ref in refs:
#for c in range(noise.shape[-3]):
for c in range(ch):
cossim_scores.append(get_cosine_similarity(noise_tmp[0][c], ref[0][c]).abs())
cossim_scores.append(get_cosine_similarity(noise_tmp[0], ref[0]).abs())
if max(cossim_scores) < max_score:
break
return noise_tmp
def gram_schmidt_channels_optimized(A, *refs):
if (A.dim() == 4):
b,c,h,w = A.shape
elif (A.dim() == 5):
b,c,t,h,w = A.shape
A_flat = A.view(b, c, -1)
for ref in refs:
ref_flat = ref.view(b, c, -1).clone()
ref_flat /= ref_flat.norm(dim=-1, keepdim=True)
proj_coeff = torch.sum(A_flat * ref_flat, dim=-1, keepdim=True)
projection = proj_coeff * ref_flat
A_flat -= projection
return A_flat.view_as(A)
class NoiseStepHandlerOSDE:
def __init__(self, x, eps=None, data=None, x_init=None, guide=None, guide_bkg=None):
self.noise = None
self.x = x
self.eps = eps
self.data = data
self.x_init = x_init
self.guide = guide
self.guide_bkg = guide_bkg
self.eps_list = None
self.noise_cossim_map = {
"eps_orthogonal": [self.noise, self.eps],
"eps_data_orthogonal": [self.noise, self.eps, self.data],
"data_orthogonal": [self.noise, self.data],
"xinit_orthogonal": [self.noise, self.x_init],
"x_orthogonal": [self.noise, self.x],
"x_data_orthogonal": [self.noise, self.x, self.data],
"x_eps_orthogonal": [self.noise, self.x, self.eps],
"x_eps_data_orthogonal": [self.noise, self.x, self.eps, self.data],
"x_eps_data_xinit_orthogonal": [self.noise, self.x, self.eps, self.data, self.x_init],
"x_eps_guide_orthogonal": [self.noise, self.x, self.eps, self.guide],
"x_eps_guide_bkg_orthogonal": [self.noise, self.x, self.eps, self.guide_bkg],
"noise_orthogonal": [self.noise, self.x_init],
"guide_orthogonal": [self.noise, self.guide],
"guide_bkg_orthogonal": [self.noise, self.guide_bkg],
}
def check_cossim_source(self, source):
return source in self.noise_cossim_map
def get_ortho_noise(self, noise, prev_noises=None, max_iter=100, max_score=1e-7, NOISE_COSSIM_SOURCE="eps_orthogonal"):
if NOISE_COSSIM_SOURCE not in self.noise_cossim_map:
raise ValueError(f"Invalid NOISE_COSSIM_SOURCE: {NOISE_COSSIM_SOURCE}")
self.noise_cossim_map[NOISE_COSSIM_SOURCE][0] = noise
params = self.noise_cossim_map[NOISE_COSSIM_SOURCE]
noise = get_orthogonal_noise_from_channelwise(*params, max_iter=max_iter, max_score=max_score)
return noise
def handle_tiled_etc_noise_steps(x_0, x, x_prenoise, x_init, eps, denoised, y0, y0_inv, step,
rk_type, rk, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t,
NOISE_COSSIM_SOURCE, NOISE_COSSIM_MODE, noise_cossim_tile_size, noise_cossim_iterations,
extra_options):
x_tmp, cossim_tmp, noise_tmp_list = [], [], []
if step > int(get_extra_options_kv("noise_cossim_end_step", "10000", extra_options)):
NOISE_COSSIM_SOURCE = get_extra_options_kv("noise_cossim_takeover_source", "eps", extra_options)
NOISE_COSSIM_MODE = get_extra_options_kv("noise_cossim_takeover_mode", "forward", extra_options)
noise_cossim_tile_size = int(get_extra_options_kv("noise_cossim_takeover_tile", str(noise_cossim_tile_size), extra_options))
noise_cossim_iterations = int(get_extra_options_kv("noise_cossim_takeover_iterations", str(noise_cossim_iterations), extra_options))
for i in range(noise_cossim_iterations):
x_tmp.append(rk.add_noise_post(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) )#y0, lgw, sigma_down are currently unused
noise_tmp = x_tmp[i] - x
if extra_options_flag("noise_noise_zscore_norm", extra_options):
noise_tmp = (noise_tmp - noise_tmp.mean()) / noise_tmp.std()
if extra_options_flag("noise_eps_zscore_norm", extra_options):
eps = (eps - eps.mean()) / eps.std()
if NOISE_COSSIM_SOURCE in ("eps_tiled", "guide_epsilon_tiled", "guide_bkg_epsilon_tiled", "iig_tiled"):
noise_tmp_list.append(noise_tmp)
if NOISE_COSSIM_SOURCE == "eps":
cossim_tmp.append(get_cosine_similarity(eps, noise_tmp))
if NOISE_COSSIM_SOURCE == "eps_ch":
cossim_total = torch.zeros_like(eps[0][0][0][0])
for ch in range(eps.shape[1]):
cossim_total += get_cosine_similarity(eps[0][ch], noise_tmp[0][ch])
cossim_tmp.append(cossim_total)
elif NOISE_COSSIM_SOURCE == "data":
cossim_tmp.append(get_cosine_similarity(denoised, noise_tmp))
elif NOISE_COSSIM_SOURCE == "latent":
cossim_tmp.append(get_cosine_similarity(x_prenoise, noise_tmp))
elif NOISE_COSSIM_SOURCE == "x_prenoise":
cossim_tmp.append(get_cosine_similarity(x_prenoise, x_tmp[i]))
elif NOISE_COSSIM_SOURCE == "x":
cossim_tmp.append(get_cosine_similarity(x, x_tmp[i]))
elif NOISE_COSSIM_SOURCE == "x_data":
cossim_tmp.append(get_cosine_similarity(denoised, x_tmp[i]))
elif NOISE_COSSIM_SOURCE == "x_init_vs_noise":
cossim_tmp.append(get_cosine_similarity(x_init, noise_tmp))
elif NOISE_COSSIM_SOURCE == "mom":
cossim_tmp.append(get_cosine_similarity(denoised, x + sigma_next*noise_tmp))
elif NOISE_COSSIM_SOURCE == "guide":
cossim_tmp.append(get_cosine_similarity(y0, x_tmp[i]))
elif NOISE_COSSIM_SOURCE == "guide_bkg":
cossim_tmp.append(get_cosine_similarity(y0_inv, x_tmp[i]))
if step < int(get_extra_options_kv("noise_cossim_start_step", "0", extra_options)):
x = x_tmp[0]
elif (NOISE_COSSIM_SOURCE == "eps_tiled"):
x = noise_cossim_eps_tiled(x_tmp, eps, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step)
elif (NOISE_COSSIM_SOURCE == "guide_epsilon_tiled"):
x = noise_cossim_guide_eps_tiled(x_0, x_tmp, y0, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step, sigma=sigma, rk_type=rk_type)
elif (NOISE_COSSIM_SOURCE == "guide_bkg_epsilon_tiled"):
x = noise_cossim_guide_eps_tiled(x_0, x_tmp, y0_inv, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step, sigma=sigma, rk_type=rk_type)
elif (NOISE_COSSIM_SOURCE == "guide_tiled"):
x = noise_cossim_guide_tiled(x_tmp, y0, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step)
elif (NOISE_COSSIM_SOURCE == "guide_bkg_tiled"):
x = noise_cossim_guide_tiled(x_tmp, y0_inv, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size)
else:
for i in range(len(x_tmp)):
if (NOISE_COSSIM_MODE == "forward") and (cossim_tmp[i] == max(cossim_tmp)):
x = x_tmp[i]
break
elif (NOISE_COSSIM_MODE == "reverse") and (cossim_tmp[i] == min(cossim_tmp)):
x = x_tmp[i]
break
elif (NOISE_COSSIM_MODE == "orthogonal") and (abs(cossim_tmp[i]) == min(abs(val) for val in cossim_tmp)):
x = x_tmp[i]
break
elif (NOISE_COSSIM_MODE != "forward") and (NOISE_COSSIM_MODE != "reverse") and (NOISE_COSSIM_MODE != "orthogonal"):
x = x_tmp[0]
break
return x
================================================
FILE: legacy/rk_method.py
================================================
import torch
import re
import torch.nn.functional as F
import torchvision.transforms as T
from .noise_classes import *
import comfy.model_patcher
import comfy.supported_models
import itertools
from .rk_coefficients import *
from .phi_functions import *
class RK_Method:
def __init__(self, model, name="", method="explicit", dynamic_method=False, device='cuda', dtype=torch.float64):
self.model = model
self.model_sampling = model.inner_model.inner_model.model_sampling
self.device = device
self.dtype = dtype
self.method = method
self.dynamic_method = dynamic_method
self.stages = 0
self.name = name
self.ab = None
self.a = None
self.b = None
self.c = None
self.denoised = None
self.uncond = None
self.rows = 0
self.cols = 0
self.y0 = None
self.y0_inv = None
self.sigma_min = model.inner_model.inner_model.model_sampling.sigma_min.to(dtype)
self.sigma_max = model.inner_model.inner_model.model_sampling.sigma_max.to(dtype)
self.noise_sampler = None
self.h_prev = None
self.h_prev2 = None
self.multistep_stages = 0
self.cfg_cw = 1.0
@staticmethod
def is_exponential(rk_type):
#if rk_type.startswith(("res", "dpmpp", "ddim", "irk_exp_diag_2s" )):
if rk_type.startswith(("res", "dpmpp", "ddim", "lawson", "genlawson")):
return True
else:
return False
@staticmethod
def create(model, rk_type, device='cuda', dtype=torch.float64, name="", method="explicit"):
if RK_Method.is_exponential(rk_type):
return RK_Method_Exponential(model, name, method, device, dtype)
else:
return RK_Method_Linear(model, name, method, device, dtype)
def __call__(self):
raise NotImplementedError("This method got clownsharked!")
def model_epsilon(self, x, sigma, **extra_args):
s_in = x.new_ones([x.shape[0]])
denoised = self.model(x, sigma * s_in, **extra_args)
denoised = self.calc_cfg_channelwise(denoised)
#return x0 ###################################THIS WORKS ONLY WITH THE MODEL SAMPLING PATCH
eps = (x - denoised) / (sigma * s_in).view(x.shape[0], 1, 1, 1)
return eps, denoised
def model_denoised(self, x, sigma, **extra_args):
s_in = x.new_ones([x.shape[0]])
denoised = self.model(x, sigma * s_in, **extra_args)
denoised = self.calc_cfg_channelwise(denoised)
return denoised
def init_noise_sampler(self, x, noise_seed, noise_sampler_type, alpha, k=1., scale=0.1):
seed = torch.initial_seed()+1 if noise_seed == -1 else noise_seed
if noise_sampler_type == "fractal":
self.noise_sampler = NOISE_GENERATOR_CLASSES.get(noise_sampler_type)(x=x, seed=seed, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
self.noise_sampler.alpha = alpha
self.noise_sampler.k = k
self.noise_sampler.scale = scale
else:
self.noise_sampler = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_sampler_type)(x=x, seed=seed, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
def add_noise_pre(self, x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL=False, sde_noise_t=None):
if isinstance(self.model_sampling, comfy.model_sampling.CONST) == False and noise_mode == "hard":
return self.add_noise(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, SDE_NOISE_EXTERNAL, sde_noise_t)
else:
return x
def add_noise_post(self, x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL=False, sde_noise_t=None):
if isinstance(self.model_sampling, comfy.model_sampling.CONST) == True or (isinstance(self.model_sampling, comfy.model_sampling.CONST) == False and noise_mode != "hard"):
return self.add_noise(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, SDE_NOISE_EXTERNAL, sde_noise_t)
else:
return x
def add_noise(self, x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, SDE_NOISE_EXTERNAL, sde_noise_t):
if sigma_next > 0.0:
noise = self.noise_sampler(sigma=sigma, sigma_next=sigma_next)
noise = torch.nan_to_num((noise - noise.mean()) / noise.std(), 0.0)
if SDE_NOISE_EXTERNAL:
noise = (1-s_noise) * noise + s_noise * sde_noise_t
return alpha_ratio * x + noise * sigma_up * s_noise
else:
return x
def set_coeff(self, rk_type, h, c1=0.0, c2=0.5, c3=1.0, stepcount=0, sigmas=None, sigma=None, sigma_down=None, extra_options=None):
if rk_type == "default":
return
sigma = sigmas[stepcount]
sigma_next = sigmas[stepcount+1]
a, b, ci, multistep_stages, FSAL = get_rk_methods(rk_type, h, c1, c2, c3, self.h_prev, self.h_prev2, stepcount, sigmas, sigma, sigma_next, sigma_down, extra_options)
self.multistep_stages = multistep_stages
self.a = torch.tensor(a, dtype=h.dtype, device=h.device)
self.a = self.a.view(*self.a.shape, 1, 1, 1, 1, 1)
self.b = torch.tensor(b, dtype=h.dtype, device=h.device)
self.b = self.b.view(*self.b.shape, 1, 1, 1, 1, 1)
self.c = torch.tensor(ci, dtype=h.dtype, device=h.device)
self.rows = self.a.shape[0]
self.cols = self.a.shape[1]
def a_k_sum(self, k, row):
if len(k.shape) == 4:
a_coeff = self.a[row].squeeze(-1)
ks = k * a_coeff.sum(dim=0)
elif len(k.shape) == 5:
a_coeff = self.a[row].squeeze(-1)
ks = (k[0:self.cols] * a_coeff).sum(dim=0)
elif len(k.shape) == 6:
a_coeff = self.a[row]
ks = (k[0:self.cols] * a_coeff).sum(dim=0)
else:
raise ValueError(f"Unexpected k shape: {k.shape}")
return ks
def b_k_sum(self, k, row):
if len(k.shape) == 4:
b_coeff = self.b[row].squeeze(-1)
ks = k * b_coeff.sum(dim=0)
elif len(k.shape) == 5:
b_coeff = self.b[row].squeeze(-1)
ks = (k[0:self.cols] * b_coeff).sum(dim=0)
elif len(k.shape) == 6:
b_coeff = self.b[row]
ks = (k[0:self.cols] * b_coeff).sum(dim=0)
else:
raise ValueError(f"Unexpected k shape: {k.shape}")
return ks
def init_cfg_channelwise(self, x, cfg_cw=1.0, **extra_args):
self.uncond = [torch.full_like(x, 0.0)]
self.cfg_cw = cfg_cw
if cfg_cw != 1.0:
def post_cfg_function(args):
self.uncond[0] = args["uncond_denoised"]
return args["denoised"]
model_options = extra_args.get("model_options", {}).copy()
extra_args["model_options"] = comfy.model_patcher.set_model_options_post_cfg_function(model_options, post_cfg_function, disable_cfg1_optimization=True)
return extra_args
def calc_cfg_channelwise(self, denoised):
if self.cfg_cw != 1.0:
avg = 0
for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])):
avg += torch.norm(denoised[b][c] - self.uncond[0][b][c])
avg /= denoised.shape[1]
for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])):
ratio = torch.nan_to_num(torch.norm(denoised[b][c] - self.uncond[0][b][c]) / avg, 0)
denoised_new = self.uncond[0] + ratio * self.cfg_cw * (denoised - self.uncond[0])
return denoised_new
else:
return denoised
class RK_Method_Exponential(RK_Method):
def __init__(self, model, name="", method="explicit", device='cuda', dtype=torch.float64):
super().__init__(model, name, method, device, dtype)
self.exponential = True
self.eps_pred = True
@staticmethod
def alpha_fn(neg_h):
return torch.exp(neg_h)
@staticmethod
def sigma_fn(t):
return t.neg().exp()
@staticmethod
def t_fn(sigma):
return sigma.log().neg()
@staticmethod
def h_fn(sigma_down, sigma):
return -torch.log(sigma_down/sigma)
def __call__(self, x_0, x, sigma, h, **extra_args):
denoised = self.model_denoised(x, sigma, **extra_args)
epsilon = denoised - x_0
"""if self.uncond == None:
self.uncond = [torch.zeros_like(x)]
denoised_u = self.uncond[0].clone()
if torch.all(denoised_u == 0):
epsilon_u = [torch.zeros_like(x_0)]
else:
epsilon_u = denoised_u[0] - x_0"""
if h is not None:
self.h_prev2 = self.h_prev
self.h_prev = h
#print("MODEL SIGMA: ", round(float(sigma),3))
return epsilon, denoised
def data_to_vel(self, x, data, sigma):
return data - x
def get_epsilon(self, x_0, x, y, sigma, sigma_cur, sigma_down=None, unsample_resample_scale=None, extra_options=None):
if sigma_down > sigma:
sigma_cur = self.sigma_max - sigma_cur.clone()
sigma_cur = unsample_resample_scale if unsample_resample_scale is not None else sigma_cur
if extra_options is not None:
if re.search(r"\bpower_unsample\b", extra_options) or re.search(r"\bpower_resample\b", extra_options):
if sigma_down is None:
return y - x_0
else:
if sigma_down > sigma:
return (x_0 - y) * sigma_cur
else:
return (y - x_0) * sigma_cur
else:
if sigma_down is None:
return (y - x_0) / sigma_cur
else:
if sigma_down > sigma:
return (x_0 - y) / sigma_cur
else:
return (y - x_0) / sigma_cur
class RK_Method_Linear(RK_Method):
def __init__(self, model, name="", method="explicit", device='cuda', dtype=torch.float64):
super().__init__(model, name, method, device, dtype)
self.expanential = False
self.eps_pred = True
@staticmethod
def alpha_fn(neg_h):
return torch.ones_like(neg_h)
@staticmethod
def sigma_fn(t):
return t
@staticmethod
def t_fn(sigma):
return sigma
@staticmethod
def h_fn(sigma_down, sigma):
return sigma_down - sigma
def __call__(self, x_0, x, sigma, h, **extra_args):
#s_in = x.new_ones([x.shape[0]])
epsilon, denoised = self.model_epsilon(x, sigma, **extra_args)
"""if self.uncond == None:
self.uncond = [torch.zeros_like(x)]
denoised_u = self.uncond[0].clone()
if torch.all(denoised_u[0] == 0):
epsilon_u = [torch.zeros_like(x_0)]
else:
epsilon_u = (x_0 - denoised_u[0]) / (sigma * s_in).view(x.shape[0], 1, 1, 1)"""
if h is not None:
self.h_prev2 = self.h_prev
self.h_prev = h
#print("MODEL SIGMA: ", round(float(sigma),3))
return epsilon, denoised
def data_to_vel(self, x, data, sigma):
return (data - x) / sigma
def get_epsilon(self, x_0, x, y, sigma, sigma_cur, sigma_down=None, unsample_resample_scale=None, extra_options=None):
if sigma_down > sigma:
sigma_cur = self.sigma_max - sigma_cur.clone()
sigma_cur = unsample_resample_scale if unsample_resample_scale is not None else sigma_cur
if sigma_down is None:
return (x - y) / sigma_cur
else:
if sigma_down > sigma:
return (y - x) / sigma_cur
else:
return (x - y) / sigma_cur
================================================
FILE: legacy/rk_sampler.py
================================================
import torch
import torch.nn.functional as F
from tqdm.auto import trange
from .noise_classes import *
from .noise_sigmas_timesteps_scaling import get_res4lyf_step_with_model, get_res4lyf_half_step3
from .rk_method import RK_Method
from .rk_guide_func import *
from .latents import normalize_latent, initialize_or_scale, latent_normalize_channels
from .helper import get_extra_options_kv, extra_options_flag, get_cosine_similarity, is_RF_model
from .sigmas import get_sigmas
PRINT_DEBUG=False
def prepare_sigmas(model, sigmas):
if sigmas[0] == 0.0: #remove padding used to prevent comfy from adding noise to the latent (for unsampling, etc.)
UNSAMPLE = True
sigmas = sigmas[1:-1]
else:
UNSAMPLE = False
if hasattr(model, "sigmas"):
model.sigmas = sigmas
return sigmas, UNSAMPLE
def prepare_step_to_sigma_zero(rk, irk, rk_type, irk_type, model, x, extra_options, alpha, k, noise_sampler_type, cfg_cw=1.0, **extra_args):
rk_type_final_step = f"ralston_{rk_type[-2:]}" if rk_type[-2:] in {"2s", "3s"} else "ralston_3s"
rk_type_final_step = f"deis_2m" if rk_type[-2:] in {"2m", "3m", "4m"} else rk_type_final_step
rk_type_final_step = f"buehler" if rk_type in {"ddim"} else rk_type_final_step
rk_type_final_step = get_extra_options_kv("rk_type_final_step", rk_type_final_step, extra_options)
rk = RK_Method.create(model, rk_type_final_step, x.device)
rk.init_noise_sampler(x, torch.initial_seed() + 1, noise_sampler_type, alpha=alpha, k=k)
extra_args = rk.init_cfg_channelwise(x, cfg_cw, **extra_args)
if any(element >= 1 for element in irk.c):
irk_type_final_step = f"gauss-legendre_{rk_type[-2:]}" if rk_type[-2:] in {"2s", "3s", "4s", "5s"} else "gauss-legendre_2s"
irk_type_final_step = f"deis_2m" if rk_type[-2:] in {"2m", "3m", "4m"} else irk_type_final_step
irk_type_final_step = get_extra_options_kv("irk_type_final_step", irk_type_final_step, extra_options)
irk = RK_Method.create(model, irk_type_final_step, x.device)
irk.init_noise_sampler(x, torch.initial_seed() + 100, noise_sampler_type, alpha=alpha, k=k)
extra_args = irk.init_cfg_channelwise(x, cfg_cw, **extra_args)
else:
irk_type_final_step = irk_type
eta, eta_var = 0, 0
return rk, irk, rk_type_final_step, irk_type_final_step, eta, eta_var, extra_args
@torch.no_grad()
def sample_rk(model, x, sigmas, extra_args=None, callback=None, disable=None, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_2m", implicit_sampler_name="explicit_full",
sigma_fn_formula="", t_fn_formula="",
eta=0.0, eta_var=0.0, s_noise=1., d_noise=1., alpha=-1.0, k=1.0, scale=0.1, c1=0.0, c2=0.5, c3=1.0, implicit_steps=0, reverse_weight=0.0,
latent_guide=None, latent_guide_inv=None, latent_guide_weight=0.0, latent_guide_weight_inv=0.0, latent_guide_weights=None, latent_guide_weights_inv=None, guide_mode="",
GARBAGE_COLLECT=False, mask=None, mask_inv=None, LGW_MASK_RESCALE_MIN=True, sigmas_override=None, unsample_resample_scales=None,regional_conditioning_weights=None, sde_noise=[],
extra_options="",
etas=None, s_noises=None, momentums=None, guides=None, cfgpp=0.0, cfg_cw = 1.0,regional_conditioning_floors=None, frame_weights_grp=None, eta_substep=0.0, noise_mode_sde_substep="hard", guide_cossim_cutoff_=1.0, guide_bkg_cossim_cutoff_=1.0,
):
extra_args = {} if extra_args is None else extra_args
noise_cossim_iterations = int(get_extra_options_kv("noise_cossim_iterations", "1", extra_options))
noise_substep_cossim_iterations = int(get_extra_options_kv("noise_substep_cossim_iterations", "1", extra_options))
NOISE_COSSIM_MODE = get_extra_options_kv("noise_cossim_mode", "orthogonal", extra_options)
NOISE_COSSIM_SOURCE = get_extra_options_kv("noise_cossim_source", "x_eps_data_xinit_orthogonal", extra_options)
NOISE_SUBSTEP_COSSIM_MODE = get_extra_options_kv("noise_substep_cossim_mode", "orthogonal", extra_options)
NOISE_SUBSTEP_COSSIM_SOURCE = get_extra_options_kv("noise_substep_cossim_source", "x_eps_data_xinit_orthogonal", extra_options)
SUBSTEP_SKIP_LAST = get_extra_options_kv("substep_skip_last", "false", extra_options) == "true"
noise_cossim_tile_size = int(get_extra_options_kv("noise_cossim_tile", "2", extra_options))
noise_substep_cossim_tile_size = int(get_extra_options_kv("noise_substep_cossim_tile", "2", extra_options))
substep_eta = float(get_extra_options_kv("substep_eta", str(eta_substep), extra_options))
substep_noise_scaling = float(get_extra_options_kv("substep_noise_scaling", "0.0", extra_options))
substep_noise_mode = get_extra_options_kv("substep_noise_mode", noise_mode_sde_substep, extra_options)
substep_eta_start_step = int(get_extra_options_kv("substep_noise_start_step", "-1", extra_options))
substep_eta_final_step = int(get_extra_options_kv("substep_noise_final_step", "-1", extra_options))
noise_substep_cossim_max_iter = int(get_extra_options_kv("noise_substep_cossim_max_iter", "5", extra_options))
noise_cossim_max_iter = int(get_extra_options_kv("noise_cossim_max_iter", "5", extra_options))
noise_substep_cossim_max_score = float(get_extra_options_kv("noise_substep_cossim_max_score", "1e-7", extra_options))
noise_cossim_max_score = float(get_extra_options_kv("noise_cossim_max_score", "1e-7", extra_options))
c1 = c1_ = float(get_extra_options_kv("c1", str(c1), extra_options))
c2 = c2_ = float(get_extra_options_kv("c2", str(c2), extra_options))
c3 = c3_ = float(get_extra_options_kv("c3", str(c3), extra_options))
guide_skip_steps = int(get_extra_options_kv("guide_skip_steps", 0, extra_options))
cfg_cw = float(get_extra_options_kv("cfg_cw", str(cfg_cw), extra_options))
MODEL_SAMPLING = model.inner_model.inner_model.model_sampling
s_in, s_one = x.new_ones([x.shape[0]]), x.new_ones([1])
default_dtype = getattr(torch, get_extra_options_kv("default_dtype", "float64", extra_options), torch.float64)
max_steps=10000
if sigmas_override is not None:
sigmas = sigmas_override.clone()
sigmas = sigmas.clone() * d_noise
sigmas, UNSAMPLE = prepare_sigmas(model, sigmas)
SDE_NOISE_EXTERNAL = False
if sde_noise is not None:
if len(sde_noise) > 0 and sigmas[1] > sigmas[2]:
SDE_NOISE_EXTERNAL = True
sigma_up_total = torch.zeros_like(sigmas[0])
for i in range(len(sde_noise)-1):
sigma_up_total += sigmas[i+1]
eta = eta / sigma_up_total
irk_type = implicit_sampler_name
if implicit_sampler_name in ("explicit_full", "explicit_diagonal", "none"):
irk_type = rk_type
rk_type = "buehler" if implicit_steps > 0 and implicit_sampler_name == "explicit_full" else rk_type
rk_type = get_extra_options_kv("rk_type", rk_type, extra_options)
print("rk_type: ", rk_type)
rk = RK_Method.create(model, rk_type, x.device)
irk = RK_Method.create(model, irk_type, x.device)
extra_args = irk.init_cfg_channelwise(x, cfg_cw, **extra_args)
extra_args = rk.init_cfg_channelwise(x, cfg_cw, **extra_args)
rk. init_noise_sampler(x, noise_seed, noise_sampler_type, alpha=alpha, k=k)
irk.init_noise_sampler(x, noise_seed+100, noise_sampler_type, alpha=alpha, k=k)
frame_weights, frame_weights_inv = None, None
if frame_weights_grp is not None and frame_weights_grp[0] is not None:
frame_weights = initialize_or_scale(frame_weights_grp[0], 1.0, max_steps).to(default_dtype)
frame_weights = F.pad(frame_weights, (0, max_steps), value=0.0)
if frame_weights_grp is not None and frame_weights_grp[1] is not None:
frame_weights_inv = initialize_or_scale(frame_weights_grp[1], 1.0, max_steps).to(default_dtype)
frame_weights_inv = F.pad(frame_weights_inv, (0, max_steps), value=0.0)
frame_weights_grp = (frame_weights, frame_weights_inv)
LG = LatentGuide(guides, x, model, sigmas, UNSAMPLE, LGW_MASK_RESCALE_MIN, extra_options)
x = LG.init_guides(x, rk.noise_sampler)
y0, y0_inv = LG.y0, LG.y0_inv
lgw, lgw_inv = LG.lgw, LG.lgw_inv
guide_mode = LG.guide_mode
denoised, denoised_prev, eps, eps_prev = [torch.zeros_like(x) for _ in range(4)]
prev_noises = []
x_init = x.clone()
for step in trange(len(sigmas)-1, disable=disable):
sigma, sigma_next = sigmas[step], sigmas[step+1]
unsample_resample_scale = float(unsample_resample_scales[step]) if unsample_resample_scales is not None else None
if regional_conditioning_weights is not None:
extra_args['model_options']['transformer_options']['regional_conditioning_weight'] = regional_conditioning_weights[step]
extra_args['model_options']['transformer_options']['regional_conditioning_floor'] = regional_conditioning_floors [step]
else:
extra_args['model_options']['transformer_options']['regional_conditioning_weight'] = 0.0
extra_args['model_options']['transformer_options']['regional_conditioning_floor'] = 0.0
eta = eta_var = etas[step] if etas is not None else eta
s_noise = s_noises[step] if s_noises is not None else s_noise
if sigma_next == 0:
rk, irk, rk_type, irk_type, eta, eta_var, extra_args = prepare_step_to_sigma_zero(rk, irk, rk_type, irk_type, model, x, extra_options, alpha, k, noise_sampler_type, cfg_cw=cfg_cw, **extra_args)
sigma_up, sigma, sigma_down, alpha_ratio = get_res4lyf_step_with_model(model, sigma, sigma_next, eta, noise_mode)
h = rk.h_fn(sigma_down, sigma)
h_irk = irk.h_fn(sigma_down, sigma)
c2, c3 = get_res4lyf_half_step3(sigma, sigma_down, c2_, c3_, t_fn=rk.t_fn, sigma_fn=rk.sigma_fn, t_fn_formula=t_fn_formula, sigma_fn_formula=sigma_fn_formula)
rk. set_coeff(rk_type, h, c1, c2, c3, step, sigmas, sigma, sigma_down, extra_options)
irk.set_coeff(irk_type, h_irk, c1, c2, c3, step, sigmas, sigma, sigma_down, extra_options)
s_ = [( rk.sigma_fn( rk.t_fn(sigma) + h*c_)) * s_one for c_ in rk.c]
s_irk_rk = [( rk.sigma_fn( rk.t_fn(sigma) + h*c_)) * s_one for c_ in irk.c]
s_irk = [( irk.sigma_fn(irk.t_fn(sigma) + h_irk*c_)) * s_one for c_ in irk.c]
if step == 0 or step == guide_skip_steps:
x_, data_, data_u, eps_ = (torch.zeros(max(rk.rows, irk.rows) + 2, *x.shape, dtype=x.dtype, device=x.device) for step in range(4))
sde_noise_t = None
if SDE_NOISE_EXTERNAL:
if step >= len(sde_noise):
SDE_NOISE_EXTERNAL=False
else:
sde_noise_t = sde_noise[step]
x_prenoise = x.clone()
x_[0] = x
if sigma_up > 0:
x_[0] = rk.add_noise_pre(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) #y0, lgw, sigma_down are currently unused
x_0 = x_[0].clone()
for ms in range(rk.multistep_stages):
if RK_Method.is_exponential(rk_type):
eps_ [rk.multistep_stages - ms] = -(x_0 - data_ [rk.multistep_stages - ms])
else:
eps_ [rk.multistep_stages - ms] = (x_0 - data_ [rk.multistep_stages - ms]) / sigma
if implicit_steps == 0 or implicit_sampler_name == "explicit_diagonal":
for row in range(rk.rows - rk.multistep_stages):
for exim_iter in range(implicit_steps+1):
sub_sigma_up, sub_sigma, sub_sigma_next, sub_sigma_down, sub_alpha_ratio = 0, s_[row], s_[row+1], s_[row+1], 1
if (substep_eta_final_step < 0 and step == len(sigmas)-1+substep_eta_final_step) or (substep_eta_final_step > 0 and step > substep_eta_final_step):
sub_sigma_up, sub_sigma, sub_sigma_down, sub_alpha_ratio = 0, s_[row], s_[row+1], 1
edsef=1
if extra_options_flag("explicit_diagonal_eta_substep_factors", extra_options):
#value_str = get_extra_options_list("explicit_diagonal_eta_substep_factors", "", extra_options)
#float_list = [float(item.strip()) for item in value_str.split(',') if item.strip()]
float_list = get_extra_options_list("explicit_diagonal_eta_substep_factors", "", extra_options, ret_type=float)
edsef = float_list[exim_iter]
nsef = 1
if extra_options_flag("noise_eta_substep_factors", extra_options):
#value_str = get_extra_options_list("noise_eta_substep_factors", "", extra_options)
#nsef_list = [float(item.strip()) for item in value_str.split(',') if item.strip()]
nsef_list = get_extra_options_list("noise_eta_substep_factors", "", extra_options, ret_type=float)
nsef = nsef_list[row]
if exim_iter > 0 and rk_type.endswith("m") and step >= int(rk_type[-2]):
sub_sigma_up, sub_sigma, sub_sigma_down, sub_alpha_ratio = get_res4lyf_step_with_model(model, sigma, sigma_next, substep_eta*edsef*nsef, substep_noise_mode)
sub_sigma_next = sigma_next
if (row > 0 and not extra_options_flag("disable_rough_noise", extra_options)): # and s_[row-1] >= s_[row]:
sub_sigma_up, sub_sigma, sub_sigma_down, sub_alpha_ratio = get_res4lyf_step_with_model(model, s_[row-1], s_[row], substep_eta*edsef*nsef, substep_noise_mode)
sub_sigma_next = s_[row]
if row > 0 and substep_eta*edsef*nsef > 0 and row < rk.rows and ((SUBSTEP_SKIP_LAST == False) or (row < rk.rows - rk.multistep_stages - 1)) and (sub_sigma_down > 0) and sigma_next > 0:
substep_noise_scaling_ratio = s_[row+1]/sub_sigma_down
eps_[row-1] *= 1 + substep_noise_scaling*(substep_noise_scaling_ratio-1)
h_new = h.clone()
if (rk_type.endswith("m") and step >= int(rk_type[-2]) and sub_sigma_up > 0) or (row > 0 and sub_sigma_up > 0):
if extra_options_flag("substep_eta_c_row_plus_one", extra_options):
h_new = (rk.h_fn(sub_sigma_down, sigma) / rk.c[row+1])[0]
else:
if exim_iter > 0 and rk_type.endswith("m") and step >= int(rk_type[-2]):
c_val = -rk.h_prev/h
h_new = (rk.h_fn(sub_sigma_down, sigma)) / c_val
else:
h_new = (rk.h_fn(sub_sigma_down, sigma) / rk.c[row])[0] #used to be rk.c[row+1]
s_new_ = [( rk.sigma_fn( rk.t_fn(sigma) + h_new*c_)) * s_one for c_ in rk.c]
"""print("step, row: ", step, row)
print("h, h_new: ", h.item(), h_new.item())
print("s_: ", s_)
print("s_new_: ", s_new_)
print("sub_sigma_up, sub_sigma, sub_sigma_next, sub_sigma_down, sub_alpha_ratio: ", sub_sigma_up.item(), sub_sigma.item(), sub_sigma_next.item(), sub_sigma_down.item(), sub_alpha_ratio.item())"""
# UPDATE
#print("UPDATE: step,row,h_new: ", step, row, h_new.item())
x_[row+1] = x_0 + h_new * rk.a_k_sum(eps_, row)
if row > 0:
if PRINT_DEBUG:
print("A: step,row,h,h_new: \n", step, row, round(float(h.item()),3), round(float(h_new.item()),3))
#print("step, row, exim_iter: ", step, row, exim_iter)
# NOISE ADD
if is_RF_model(model) == True or (is_RF_model(model) == False and noise_mode != "hard"):
if (exim_iter < implicit_steps and sub_sigma_up > 0) or ((row > 0) and (sub_sigma_up > 0) and ((SUBSTEP_SKIP_LAST == False) or (row < rk.rows - rk.multistep_stages - 1))):
if PRINT_DEBUG:
print("A: sub_sigma_up, sub_sigma, sub_sigma_next, sub_sigma_down, sub_alpha_ratio: \n", round(float(sub_sigma_up),3), round(float(sub_sigma),3), round(float(sub_sigma_next),3), round(float(sub_sigma_down),3), round(float(sub_alpha_ratio),3))
data_tmp = denoised_prev if data_[row-1].sum() == 0 else data_[row-1]
eps_tmp = eps_prev if eps_[row-1].sum() == 0 else eps_ [row-1]
Osde = NoiseStepHandlerOSDE(x_[row+1], eps_tmp, data_tmp, x_init, y0, y0_inv)
if Osde.check_cossim_source(NOISE_SUBSTEP_COSSIM_SOURCE):
noise = rk.noise_sampler(sigma=sub_sigma, sigma_next=sub_sigma_next)
noise_osde = Osde.get_ortho_noise(noise, prev_noises, max_iter=noise_substep_cossim_max_iter, max_score=noise_substep_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_SUBSTEP_COSSIM_SOURCE)
x_[row+1] = sub_alpha_ratio * x_[row+1] + sub_sigma_up * noise_osde * s_noise
elif extra_options_flag("noise_substep_cossim", extra_options):
x_[row+1] = handle_tiled_etc_noise_steps(x_0, x_[row+1], x_prenoise, x_init, eps_tmp, data_tmp, y0, y0_inv, row, rk_type, rk, sub_sigma_up, s_[row-1], s_[row], sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t,
NOISE_SUBSTEP_COSSIM_SOURCE, NOISE_SUBSTEP_COSSIM_MODE, noise_substep_cossim_tile_size, noise_substep_cossim_iterations, extra_options)
else:
x_[row+1] = rk.add_noise_post(x_[row+1], sub_sigma_up, sub_sigma, sub_sigma_next, sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t)
# MODEL CALL
if step < guide_skip_steps:
eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type)
eps_[row] = LG.mask * eps_row + (1-LG.mask) * eps_row_inv
else:
if implicit_steps == 0 or row > 0 or (row == 0 and not extra_options_flag("explicit_diagonal_implicit_predictor", extra_options)):
eps_[row], data_[row] = rk(x_0, x_[row+1], s_[row], h, **extra_args)
#print("exim: ", step, row, exim_iter)
else:
if extra_options_flag("explicit_diagonal_implicit_predictor_disable_noise", extra_options):
sub_sigma_up, sub_sigma_down, sub_alpha_ratio = sub_sigma_up*0, sub_sigma_next, sub_alpha_ratio/sub_alpha_ratio
eps_[row], data_[row] = rk(x_0, x_[row+1], s_[row], h, **extra_args)
eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_, unsample_resample_scale, rk, rk_type, extra_options, frame_weights_grp)
h_mini = rk.h_fn(sub_sigma_down, sub_sigma)
x_[row+1] = x_0 + h_mini * eps_[row]
Osde = NoiseStepHandlerOSDE(x_[row+1], eps_[row], data_[row], x_init, y0, y0_inv)
if Osde.check_cossim_source(NOISE_SUBSTEP_COSSIM_SOURCE):
noise = rk.noise_sampler(sigma=sub_sigma, sigma_next=sub_sigma_next)
noise_osde = Osde.get_ortho_noise(noise, prev_noises, max_iter=noise_substep_cossim_max_iter, max_score=noise_substep_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_SUBSTEP_COSSIM_SOURCE)
x_[row+1] = sub_alpha_ratio * x_[row+1] + sub_sigma_up * noise_osde * s_noise
else:
x_[row+1] = rk.add_noise_post(x_[row+1], sub_sigma_up, sub_sigma, sub_sigma_next, sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t)
for inner_exim_iter in range(implicit_steps): # implicit buehler update to find Yn+1
#print("inner_exim: ", step, row, inner_exim_iter)
eps_[row], data_[row] = rk(x_0, x_[row+1], s_[row+1], h, **extra_args)
eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_, unsample_resample_scale, rk, rk_type, extra_options, frame_weights_grp)
x_[row+1] = x_0 + h_mini * eps_[row]
Osde = NoiseStepHandlerOSDE(x_[row+1], eps_[row], data_[row], x_init, y0, y0_inv)
if Osde.check_cossim_source(NOISE_SUBSTEP_COSSIM_SOURCE):
noise = rk.noise_sampler(sigma=sub_sigma, sigma_next=sub_sigma_next)
noise_osde = Osde.get_ortho_noise(noise, prev_noises, max_iter=noise_substep_cossim_max_iter, max_score=noise_substep_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_SUBSTEP_COSSIM_SOURCE)
x_[row+1] = sub_alpha_ratio * x_[row+1] + sub_sigma_up * noise_osde * s_noise
else:
x_[row+1] = rk.add_noise_post(x_[row+1], sub_sigma_up, sub_sigma, sub_sigma_next, sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t)
if extra_options_flag("rk_linear_straight", extra_options):
eps_[row] = (x_0 - data_[row]) / sigma
if sub_sigma_up > 0 and not RK_Method.is_exponential(rk_type):
eps_[row] = (x_0 - data_[row]) / sigma
# GUIDES
eps_row_tmp, x_row_tmp = eps_[row].clone(), x_[row+1].clone()
eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_, unsample_resample_scale, rk, rk_type, extra_options, frame_weights_grp)
if extra_options_flag("explicit_diagonal_eps_proj_factors", extra_options):
#value_str = get_extra_options_list("explicit_diagonal_eps_proj_factors", "", extra_options)
#float_list = [float(item.strip()) for item in value_str.split(',') if item.strip()]
value_str = get_extra_options_list("explicit_diagonal_eps_proj_factors", "", extra_options, ret_type=float)
eps_[row] = (float_list[exim_iter]) * eps_[row] + (1-float_list[exim_iter]) * eps_row_tmp
x_[row+1] = (float_list[exim_iter]) * x_[row+1] + (1-float_list[exim_iter]) * x_row_tmp
if row > 0 and exim_iter <= implicit_steps and implicit_steps > 0:
eps_[row-1] = eps_[row]
if implicit_steps > 0 and row == 0:
break
if PRINT_DEBUG:
print("B: step,h,h_new: \n", step, round(float(h.item()),3), round(float(h_new.item()),3))
print("B: sub_sigma_up, sub_sigma, sub_sigma_next, sub_sigma_down, sub_alpha_ratio: \n", round(float(sub_sigma_up),3), round(float(sub_sigma),3), round(float(sub_sigma_next),3), round(float(sub_sigma_down),3), round(float(sub_alpha_ratio),3))
x = x_0 + h * rk.b_k_sum(eps_, 0)
denoised = x_0 + ((sigma / (sigma - sigma_down)) * h) * rk.b_k_sum(eps_, 0)
eps = x - denoised
x = LG.process_guides_poststep(x, denoised, eps, step, extra_options)
# DIAGONALLY IMPLICIT
elif implicit_sampler_name=="explicit_diagonal_alt" or any(irk_type.startswith(prefix) for prefix in {"crouzeix", "irk_exp_diag", "pareschi_russo", "kraaijevanger_spijker", "qin_zhang",}):
s_irk = [torch.full_like(s_irk[0], sigma.item())] + s_irk
for row in range(irk.rows - irk.multistep_stages):
sub_sigma_up, sub_sigma, sub_sigma_next, sub_sigma_down, sub_alpha_ratio = 0.0, s_irk[row], s_irk[row+1], s_irk[row+1], 1.0
if irk.c[row] > 0:
sub_sigma_up, sub_sigma, sub_sigma_down, sub_alpha_ratio = get_res4lyf_step_with_model(model, s_irk[row], s_irk[row+1], substep_eta, substep_noise_mode)
if not extra_options_flag("diagonal_implicit_skip_initial", extra_options):
# MODEL CALL
eps_[row], data_[row] = irk(x_0, x_[row], s_irk[row], h_irk, **extra_args)
# GUIDES
eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_irk, unsample_resample_scale, irk, irk_type, extra_options, frame_weights_grp)
for diag_iter in range(implicit_steps):
h_new_irk = h.clone()
if irk.c[row] > 0:
h_new_irk = (irk.h_fn(sub_sigma_down, sigma) / irk.c[row])[0]
# UPDATE
x_[row+1] = x_0 + h_new_irk * irk.a_k_sum(eps_, row)
# NOISE ADD
if is_RF_model(model) == True or (is_RF_model(model) == False and noise_mode != "hard"):
if (row > 0) and (sub_sigma_up > 0) and ((SUBSTEP_SKIP_LAST == False) or (row < irk.rows - irk.multistep_stages - 1)):
data_tmp = denoised_prev if data_[row-1].sum() == 0 else data_[row-1]
eps_tmp = eps_prev if eps_[row-1].sum() == 0 else eps_ [row-1]
Osde = NoiseStepHandlerOSDE(x_[row+1], eps_tmp, data_tmp, x_init, y0, y0_inv)
if Osde.check_cossim_source(NOISE_SUBSTEP_COSSIM_SOURCE):
noise = irk.noise_sampler(sigma=sub_sigma, sigma_next=sub_sigma_next)
noise_osde = Osde.get_ortho_noise(noise, prev_noises, max_iter=noise_substep_cossim_max_iter, max_score=noise_substep_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_SUBSTEP_COSSIM_SOURCE)
x_[row+1] = sub_alpha_ratio * x_[row+1] + sub_sigma_up * noise_osde * s_noise
elif extra_options_flag("noise_substep_cossim", extra_options):
x_[row+1] = handle_tiled_etc_noise_steps(x_0, x_[row+1], x_prenoise, x_init, eps_tmp, data_tmp, y0, y0_inv, row,
irk_type, irk, sub_sigma_up, s_irk[row-1], s_irk[row], sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t,
NOISE_SUBSTEP_COSSIM_SOURCE, NOISE_SUBSTEP_COSSIM_MODE, noise_substep_cossim_tile_size, noise_substep_cossim_iterations,
extra_options)
else:
x_[row+1] = irk.add_noise_post(x_[row+1], sub_sigma_up, sub_sigma, sub_sigma_next, sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t)
# MODEL CALL
eps_[row], data_[row] = irk(x_0, x_[row+1], s_irk[row+1], h_irk, **extra_args)
if sub_sigma_up > 0 and not RK_Method.is_exponential(irk_type):
eps_[row] = (x_0 - data_[row]) / sigma
# GUIDES
eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_irk, unsample_resample_scale, irk, irk_type, extra_options, frame_weights_grp)
x = x_0 + h_irk * irk.b_k_sum(eps_, 0)
denoised = x_0 + (sigma / (sigma - sigma_down)) * h_irk * irk.b_k_sum(eps_, 0)
eps = x - denoised
x = LG.process_guides_poststep(x, denoised, eps, step, extra_options)
# FULLY IMPLICIT
else:
s2 = s_irk_rk[:]
s2.append(sigma.unsqueeze(dim=0))
s_all = torch.sort(torch.stack(s2, dim=0).squeeze(dim=1).unique(), descending=True)[0]
sigmas_and = torch.cat( (sigmas[0:step], s_all), dim=0)
data_[0].zero_()
eps_ [0].zero_()
eps_list = []
if extra_options_flag("fast_implicit_guess", extra_options):
if denoised.sum() == 0:
if extra_options_flag("fast_implicit_guess_use_guide", extra_options):
data_s = y0
eps_s = x_0 - data_s
else:
eps_s, data_s = rk(x_0, x_0, sigma, h, **extra_args)
else:
eps_s, data_s = eps, denoised
for i in range(len(s_all)-1):
eps_list.append(eps_s * s_all[i]/sigma)
if torch.allclose(s_all[-1], sigma_down, atol=1e-8):
eps_list.append(eps_s * sigma_down/sigma)
else:
# EXPLICIT GUESS
x_mid = x
for i in range(len(s_all)-1):
x_mid, eps_, data_ = get_explicit_rk_step(rk, rk_type, x_mid, LG, step, s_all[i], s_all[i+1], eta, eta_var, s_noise, noise_mode, c2, c3, step+i, sigmas_and, x_, eps_, data_, unsample_resample_scale, extra_options, frame_weights_grp,
x_init, x_prenoise, NOISE_COSSIM_SOURCE, NOISE_COSSIM_MODE, noise_cossim_max_iter, noise_cossim_max_score, noise_cossim_tile_size, noise_cossim_iterations,SDE_NOISE_EXTERNAL,sde_noise_t,MODEL_SAMPLING,
**extra_args)
eps_list.append(eps_[0])
data_[0].zero_()
eps_ [0].zero_()
if torch.allclose(s_all[-1], sigma_down, atol=1e-8):
eps_down, data_down = rk(x_0, x_mid, sigma_down, h, **extra_args) #should h_irk = h? going to change it for now.
eps_list.append(eps_down)
s_all = [s for s in s_all if s in s_irk_rk]
eps_list = [eps_list[s_all.index(s)].clone() for s in s_irk_rk]
eps2_ = torch.stack(eps_list, dim=0)
# FULLY IMPLICIT LOOP
for implicit_iter in range(implicit_steps):
for row in range(irk.rows):
x_[row+1] = x_0 + h_irk * irk.a_k_sum(eps2_, row)
eps2_[row], data_[row] = irk(x_0, x_[row+1], s_irk[row], h_irk, **extra_args)
if not extra_options_flag("implicit_loop_skip_guide", extra_options):
eps2_, x_ = LG.process_guides_substep(x_0, x_, eps2_, data_, row, step, sigma, sigma_next, sigma_down, s_irk, unsample_resample_scale, irk, irk_type, extra_options, frame_weights_grp)
x = x_0 + h_irk * irk.b_k_sum(eps2_, 0)
denoised = x_0 + (sigma / (sigma - sigma_down)) * h_irk * irk.b_k_sum(eps2_, 0)
eps = x - denoised
x = LG.process_guides_poststep(x, denoised, eps, step, extra_options)
preview_callback(x, eps, denoised, x_, eps_, data_, step, sigma, sigma_next, callback, extra_options)
sde_noise_t = None
if SDE_NOISE_EXTERNAL:
if step >= len(sde_noise):
SDE_NOISE_EXTERNAL=False
else:
sde_noise_t = sde_noise[step]
if is_RF_model(model) == True or (is_RF_model(model) == False and noise_mode != "hard"):
if sigma_up > 0:
#print("NOISE_FULL: sigma_up, sigma, sigma_next, sigma_down, alpha_ratio: ", sigma_up.item(), sigma.item(), sigma_next.item(), sigma_down.item(), alpha_ratio.item())
if implicit_steps==0:
rk_or_irk = rk
rk_or_irk_type = rk_type
else:
rk_or_irk = irk
rk_or_irk_type = irk_type
Osde = NoiseStepHandlerOSDE(x, eps, denoised, x_init, y0, y0_inv)
if Osde.check_cossim_source(NOISE_COSSIM_SOURCE):
noise = rk_or_irk.noise_sampler(sigma=sigma, sigma_next=sigma_next)
noise_osde = Osde.get_ortho_noise(noise, prev_noises, max_iter=noise_cossim_max_iter, max_score=noise_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_COSSIM_SOURCE)
x = alpha_ratio * x + sigma_up * noise_osde * s_noise
elif extra_options_flag("noise_cossim", extra_options):
x = handle_tiled_etc_noise_steps(x_0, x, x_prenoise, x_init, eps, denoised, y0, y0_inv, step,
rk_or_irk_type, rk_or_irk, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t,
NOISE_COSSIM_SOURCE, NOISE_COSSIM_MODE, noise_cossim_tile_size, noise_cossim_iterations,
extra_options)
else:
x = rk_or_irk.add_noise_post(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t)
if PRINT_DEBUG:
print("Data vs. y0 cossim score: ", get_cosine_similarity(data_[0], y0).item())
for ms in range(rk.multistep_stages):
if RK_Method.is_exponential(rk_type):
eps_[rk.multistep_stages - ms] = data_[rk.multistep_stages - ms - 1] - x
else:
eps_[rk.multistep_stages - ms] = (x - data_[rk.multistep_stages - ms - 1]) / sigma
#eps_ [rk.multistep_stages - ms] = eps_ [rk.multistep_stages - ms - 1]
data_[rk.multistep_stages - ms] = data_[rk.multistep_stages - ms - 1]
eps_ [0] = torch.zeros_like(eps_ [0])
data_[0] = torch.zeros_like(data_[0])
denoised_prev = denoised
eps_prev = eps
preview_callback(x, eps, denoised, x_, eps_, data_, step, sigma, sigma_next, callback, extra_options, FINAL_STEP=True)
return x
def get_explicit_rk_step(rk, rk_type, x, LG, step, sigma, sigma_next, eta, eta_var, s_noise, noise_mode, c2, c3, stepcount, sigmas, x_, eps_, data_, unsample_resample_scale, extra_options, frame_weights_grp,
x_init, x_prenoise, NOISE_COSSIM_SOURCE, NOISE_COSSIM_MODE, noise_cossim_max_iter, noise_cossim_max_score, noise_cossim_tile_size, noise_cossim_iterations,SDE_NOISE_EXTERNAL,sde_noise_t,MODEL_SAMPLING,
**extra_args):
extra_args = {} if extra_args is None else extra_args
s_in = x.new_ones([x.shape[0]])
eta = float(get_extra_options_kv("implicit_substep_eta", eta, extra_options))
sigma_up, sigma, sigma_down, alpha_ratio = get_res4lyf_step_with_model(rk.model, sigma, sigma_next, eta, noise_mode)
h = rk.h_fn(sigma_down, sigma)
c2, c3 = get_res4lyf_half_step3(sigma, sigma_down, c2, c3, t_fn=rk.t_fn, sigma_fn=rk.sigma_fn)
rk.set_coeff(rk_type, h, c2=c2, c3=c3, stepcount=stepcount, sigmas=sigmas, sigma_down=sigma_down, extra_options=extra_options)
s_ = [(sigma + h * c_) * s_in for c_ in rk.c]
x_[0] = rk.add_noise_pre(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode)
x_0 = x_[0].clone()
for ms in range(rk.multistep_stages):
if RK_Method.is_exponential(rk_type):
eps_ [rk.multistep_stages - ms] = data_ [rk.multistep_stages - ms] - x_0
else:
eps_ [rk.multistep_stages - ms] = (x_0 - data_ [rk.multistep_stages - ms]) / sigma
for row in range(rk.rows - rk.multistep_stages):
x_[row+1] = x_0 + h * rk.a_k_sum(eps_, row)
eps_[row], data_[row] = rk(x_0, x_[row+1], s_[row], h, **extra_args)
eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_, unsample_resample_scale, rk, rk_type, extra_options, frame_weights_grp)
x = x_0 + h * rk.b_k_sum(eps_, 0)
denoised = x_0 + (sigma / (sigma - sigma_down)) * h * rk.b_k_sum(eps_, 0)
eps = x - denoised
y0 = LG.y0
if LG.y0.shape[0] > 1:
y0 = LG.y0[min(step, LG.y0.shape[0]-1)].unsqueeze(0)
x = LG.process_guides_poststep(x, denoised, eps, step, extra_options)
#x = rk.add_noise_post(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode)
if is_RF_model(rk.model) == True or (is_RF_model(rk.model) == False and noise_mode != "hard"):
if sigma_up > 0:
Osde = NoiseStepHandlerOSDE(x, eps, denoised, x_init, y0, LG.y0_inv)
if Osde.check_cossim_source(NOISE_COSSIM_SOURCE):
noise = rk.noise_sampler(sigma=sigma, sigma_next=sigma_next)
noise_osde = Osde.get_ortho_noise(noise, [], max_iter=noise_cossim_max_iter, max_score=noise_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_COSSIM_SOURCE)
x = alpha_ratio * x + sigma_up * noise_osde * s_noise
elif extra_options_flag("noise_cossim", extra_options):
x = handle_tiled_etc_noise_steps(x_0, x, x_prenoise, x_init, eps, denoised, y0, LG.y0_inv, step,
rk_type, rk, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t,
NOISE_COSSIM_SOURCE, NOISE_COSSIM_MODE, noise_cossim_tile_size, noise_cossim_iterations,
extra_options)
else:
x = rk.add_noise_post(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t)
for ms in range(rk.multistep_stages): # NEEDS ADJUSTING?
eps_ [rk.multistep_stages - ms] = eps_ [rk.multistep_stages - ms - 1]
data_[rk.multistep_stages - ms] = data_[rk.multistep_stages - ms - 1]
return x, eps_, data_
def preview_callback(x, eps, denoised, x_, eps_, data_, step, sigma, sigma_next, callback, extra_options, FINAL_STEP=False):
if FINAL_STEP:
denoised_callback = denoised
elif extra_options_flag("eps_substep_preview", extra_options):
row_callback = int(get_extra_options_kv("eps_substep_preview", "0", extra_options))
denoised_callback = eps_[row_callback]
elif extra_options_flag("denoised_substep_preview", extra_options):
row_callback = int(get_extra_options_kv("denoised_substep_preview", "0", extra_options))
denoised_callback = data_[row_callback]
elif extra_options_flag("x_substep_preview", extra_options):
row_callback = int(get_extra_options_kv("x_substep_preview", "0", extra_options))
denoised_callback = x_[row_callback]
elif extra_options_flag("eps_preview", extra_options):
denoised_callback = eps
elif extra_options_flag("denoised_preview", extra_options):
denoised_callback = denoised
elif extra_options_flag("x_preview", extra_options):
denoised_callback = x
else:
denoised_callback = data_[0]
callback({'x': x, 'i': step, 'sigma': sigma, 'sigma_next': sigma_next, 'denoised': denoised_callback.to(torch.float32)}) if callback is not None else None
return
def sample_res_2m(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_2m", eta=0.0, )
def sample_res_2s(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_2s", eta=0.0, )
def sample_res_3s(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_3s", eta=0.0, )
def sample_res_5s(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_5s", eta=0.0, )
def sample_res_6s(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_6s", eta=0.0, )
def sample_res_2m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_2m", eta=0.5, eta_substep=0.5, )
def sample_res_2s_sde(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_2s", eta=0.5, eta_substep=0.5, )
def sample_res_3s_sde(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_3s", eta=0.5, eta_substep=0.5, )
def sample_res_5s_sde(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_5s", eta=0.5, eta_substep=0.5, )
def sample_res_6s_sde(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_6s", eta=0.5, eta_substep=0.5, )
def sample_deis_2m(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_2m", eta=0.0, )
def sample_deis_3m(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_3m", eta=0.0, )
def sample_deis_4m(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_4m", eta=0.0, )
def sample_deis_2m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_2m", eta=0.5, eta_substep=0.5, )
def sample_deis_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_3m", eta=0.5, eta_substep=0.5, )
def sample_deis_4m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None):
return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_4m", eta=0.5, eta_substep=0.5, )
================================================
FILE: legacy/samplers.py
================================================
from .noise_classes import prepare_noise, NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_NAMES_SIMPLE, NOISE_GENERATOR_NAMES
from .sigmas import get_sigmas
from .constants import MAX_STEPS
import comfy.samplers
import comfy.sample
import comfy.sampler_helpers
import comfy.model_sampling
import comfy.latent_formats
import comfy.sd
import comfy.supported_models
import latent_preview
import torch
import torch.nn.functional as F
import math
import copy
from .helper import get_extra_options_kv, extra_options_flag, get_res4lyf_scheduler_list
from .latents import initialize_or_scale
from .noise_classes import prepare_noise, NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_NAMES_SIMPLE, NOISE_GENERATOR_NAMES
from .sigmas import get_sigmas
from .rk_sampler import sample_rk
from .rk_coefficients import RK_SAMPLER_NAMES, IRK_SAMPLER_NAMES
from .rk_guide_func import get_orthogonal
from .noise_sigmas_timesteps_scaling import NOISE_MODE_NAMES
def move_to_same_device(*tensors):
if not tensors:
return tensors
device = tensors[0].device
return tuple(tensor.to(device) for tensor in tensors)
#SCHEDULER_NAMES = comfy.samplers.SCHEDULER_NAMES + ["beta57"]
class ClownSamplerAdvanced:
@classmethod
def INPUT_TYPES(s):
return {"required":
{
"noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_type_sde_substep": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"noise_mode_sde_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"eta_substep": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}),
"d_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Downscales the sigma schedule. Values around 0.98-0.95 can lead to a large boost in detail and paint textures."}),
"noise_seed_sde": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}),
"sampler_name": (RK_SAMPLER_NAMES, {"default": "res_2m"}),
"implicit_sampler_name": (IRK_SAMPLER_NAMES, {"default": "explicit_diagonal"}),
"implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}),
},
"optional":
{
"guides": ("GUIDES", ),
"options": ("OPTIONS", ),
"automation": ("AUTOMATION", ),
"extra_options": ("STRING", {"default": "", "multiline": True}),
}
}
RETURN_TYPES = ("SAMPLER",)
RETURN_NAMES = ("sampler", )
FUNCTION = "main"
CATEGORY = "RES4LYF/legacy/samplers"
DEPRECATED = True
def main(self,
noise_type_sde="gaussian", noise_type_sde_substep="gaussian", noise_mode_sde="hard",
eta=0.25, eta_var=0.0, d_noise=1.0, s_noise=1.0, alpha_sde=-1.0, k_sde=1.0, cfgpp=0.0, c1=0.0, c2=0.5, c3=1.0, noise_seed_sde=-1, sampler_name="res_2m", implicit_sampler_name="gauss-legendre_2s",
t_fn_formula=None, sigma_fn_formula=None, implicit_steps=0,
latent_guide=None, latent_guide_inv=None, guide_mode="", latent_guide_weights=None, latent_guide_weights_inv=None, latent_guide_mask=None, latent_guide_mask_inv=None, rescale_floor=True, sigmas_override=None,
guides=None, options=None, sde_noise=None,sde_noise_steps=1,
extra_options="", automation=None, etas=None, s_noises=None,unsample_resample_scales=None, regional_conditioning_weights=None,frame_weights_grp=None, eta_substep=0.5, noise_mode_sde_substep="hard",
):
if implicit_sampler_name == "none":
implicit_steps = 0
implicit_sampler_name = "gauss-legendre_2s"
if noise_mode_sde == "none":
eta, eta_var = 0.0, 0.0
noise_mode_sde = "hard"
default_dtype = getattr(torch, get_extra_options_kv("default_dtype", "float64", extra_options), torch.float64)
unsample_resample_scales_override = unsample_resample_scales
if options is not None:
noise_type_sde = options.get('noise_type_sde', noise_type_sde)
noise_mode_sde = options.get('noise_mode_sde', noise_mode_sde)
eta = options.get('eta', eta)
s_noise = options.get('s_noise', s_noise)
d_noise = options.get('d_noise', d_noise)
alpha_sde = options.get('alpha_sde', alpha_sde)
k_sde = options.get('k_sde', k_sde)
c1 = options.get('c1', c1)
c2 = options.get('c2', c2)
c3 = options.get('c3', c3)
t_fn_formula = options.get('t_fn_formula', t_fn_formula)
sigma_fn_formula = options.get('sigma_fn_formula', sigma_fn_formula)
frame_weights_grp = options.get('frame_weights_grp', frame_weights_grp)
sde_noise = options.get('sde_noise', sde_noise)
sde_noise_steps = options.get('sde_noise_steps', sde_noise_steps)
#noise_seed_sde = torch.initial_seed()+1 if noise_seed_sde < 0 else noise_seed_sde
rescale_floor = extra_options_flag("rescale_floor", extra_options)
if automation is not None:
etas = automation['etas'] if 'etas' in automation else None
s_noises = automation['s_noises'] if 's_noises' in automation else None
unsample_resample_scales = automation['unsample_resample_scales'] if 'unsample_resample_scales' in automation else None
frame_weights_grp = automation['frame_weights_grp'] if 'frame_weights_grp' in automation else None
etas = initialize_or_scale(etas, eta, MAX_STEPS).to(default_dtype)
etas = F.pad(etas, (0, MAX_STEPS), value=0.0)
s_noises = initialize_or_scale(s_noises, s_noise, MAX_STEPS).to(default_dtype)
s_noises = F.pad(s_noises, (0, MAX_STEPS), value=0.0)
if sde_noise is None:
sde_noise = []
else:
sde_noise = copy.deepcopy(sde_noise)
for i in range(len(sde_noise)):
sde_noise[i] = sde_noise[i]
for j in range(sde_noise[i].shape[1]):
sde_noise[i][0][j] = ((sde_noise[i][0][j] - sde_noise[i][0][j].mean()) / sde_noise[i][0][j].std())
if unsample_resample_scales_override is not None:
unsample_resample_scales = unsample_resample_scales_override
sampler = comfy.samplers.ksampler("rk", {"eta": eta, "eta_var": eta_var, "s_noise": s_noise, "d_noise": d_noise, "alpha": alpha_sde, "k": k_sde, "c1": c1, "c2": c2, "c3": c3, "cfgpp": cfgpp,
"noise_sampler_type": noise_type_sde, "noise_mode": noise_mode_sde, "noise_seed": noise_seed_sde, "rk_type": sampler_name, "implicit_sampler_name": implicit_sampler_name,
"t_fn_formula": t_fn_formula, "sigma_fn_formula": sigma_fn_formula, "implicit_steps": implicit_steps,
"latent_guide": latent_guide, "latent_guide_inv": latent_guide_inv, "mask": latent_guide_mask, "mask_inv": latent_guide_mask_inv,
"latent_guide_weights": latent_guide_weights, "latent_guide_weights_inv": latent_guide_weights_inv, "guide_mode": guide_mode,
"LGW_MASK_RESCALE_MIN": rescale_floor, "sigmas_override": sigmas_override, "sde_noise": sde_noise,
"extra_options": extra_options,
"etas": etas, "s_noises": s_noises, "unsample_resample_scales": unsample_resample_scales, "regional_conditioning_weights": regional_conditioning_weights,
"guides": guides, "frame_weights_grp": frame_weights_grp, "eta_substep": eta_substep, "noise_mode_sde_substep": noise_mode_sde_substep,
})
return (sampler, )
class ClownSampler:
@classmethod
def INPUT_TYPES(s):
return {"required":
{
"noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"d_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"noise_seed_sde": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}),
"sampler_name": (RK_SAMPLER_NAMES, {"default": "res_2m"}),
"implicit_sampler_name": (IRK_SAMPLER_NAMES, {"default": "explicit_diagonal"}),
"implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}),
},
"optional":
{
"guides": ("GUIDES", ),
"options": ("OPTIONS", ),
"automation": ("AUTOMATION", ),
"extra_options": ("STRING", {"default": "", "multiline": True}),
}
}
RETURN_TYPES = ("SAMPLER",)
RETURN_NAMES = ("sampler", )
FUNCTION = "main"
CATEGORY = "RES4LYF/legacy/samplers"
DEPRECATED = True
def main(self,
noise_type_sde="gaussian", noise_type_sde_substep="gaussian", noise_mode_sde="hard",
eta=0.25, eta_var=0.0, d_noise=1.0, s_noise=1.0, alpha_sde=-1.0, k_sde=1.0, cfgpp=0.0, c1=0.0, c2=0.5, c3=1.0, noise_seed_sde=-1, sampler_name="res_2m", implicit_sampler_name="gauss-legendre_2s",
t_fn_formula=None, sigma_fn_formula=None, implicit_steps=0,
latent_guide=None, latent_guide_inv=None, guide_mode="", latent_guide_weights=None, latent_guide_weights_inv=None, latent_guide_mask=None, latent_guide_mask_inv=None, rescale_floor=True, sigmas_override=None,
guides=None, options=None, sde_noise=None,sde_noise_steps=1,
extra_options="", automation=None, etas=None, s_noises=None,unsample_resample_scales=None, regional_conditioning_weights=None,frame_weights_grp=None,eta_substep=0.0, noise_mode_sde_substep="hard",
):
eta_substep = eta
noise_mode_sde_substep = noise_mode_sde
noise_type_sde_substep = noise_type_sde
sampler = ClownSamplerAdvanced().main(
noise_type_sde=noise_type_sde, noise_type_sde_substep=noise_type_sde_substep, noise_mode_sde=noise_mode_sde,
eta=eta, eta_var=eta_var, d_noise=d_noise, s_noise=s_noise, alpha_sde=alpha_sde, k_sde=k_sde, cfgpp=cfgpp, c1=c1, c2=c2, c3=c3, noise_seed_sde=noise_seed_sde, sampler_name=sampler_name, implicit_sampler_name=implicit_sampler_name,
t_fn_formula=t_fn_formula, sigma_fn_formula=sigma_fn_formula, implicit_steps=implicit_steps,
latent_guide=latent_guide, latent_guide_inv=latent_guide_inv, guide_mode=guide_mode, latent_guide_weights=latent_guide_weights, latent_guide_weights_inv=latent_guide_weights_inv, latent_guide_mask=latent_guide_mask, latent_guide_mask_inv=latent_guide_mask_inv, rescale_floor=rescale_floor, sigmas_override=sigmas_override,
guides=guides, options=options, sde_noise=sde_noise,sde_noise_steps=sde_noise_steps,
extra_options=extra_options, automation=automation, etas=etas, s_noises=s_noises,unsample_resample_scales=unsample_resample_scales, regional_conditioning_weights=regional_conditioning_weights,frame_weights_grp=frame_weights_grp, eta_substep=eta_substep, noise_mode_sde_substep=noise_mode_sde_substep,
)
return sampler
def process_sampler_name(selected_value):
processed_name = selected_value.split("/")[-1]
if selected_value.startswith("fully_implicit") or selected_value.startswith("diag_implicit"):
implicit_sampler_name = processed_name
sampler_name = "buehler"
else:
sampler_name = processed_name
implicit_sampler_name = "use_explicit"
return sampler_name, implicit_sampler_name
def copy_cond(positive):
new_positive = []
for embedding, cond in positive:
cond_copy = {}
for k, v in cond.items():
if isinstance(v, torch.Tensor):
cond_copy[k] = v.clone()
else:
cond_copy[k] = v # ensure we're not copying huge shit like controlnets
new_positive.append([embedding.clone(), cond_copy])
return new_positive
class SharkSamplerAlpha:
@classmethod
def INPUT_TYPES(s):
return {"required":
{"model": ("MODEL",),
"noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_stdev": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }),
"noise_seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}),
"sampler_mode": (['standard', 'unsample', 'resample'],),
"scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},),
"steps": ("INT", {"default": 30, "min": 1, "max": 10000}),
"denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"cfg": ("FLOAT", {"default": 3.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Negative values use channelwise CFG." }),
},
"optional":
{
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"sampler": ("SAMPLER", ),
"sigmas": ("SIGMAS", ),
"latent_image": ("LATENT", ),
"options": ("OPTIONS", ),
"extra_options": ("STRING", {"default": "", "multiline": True}),
}
}
RETURN_TYPES = ("LATENT","LATENT", "LATENT",)
RETURN_NAMES = ("output", "denoised","sde_noise",)
FUNCTION = "main"
CATEGORY = "RES4LYF/legacy/samplers"
DEPRECATED = True
def main(self, model, cfg, scheduler, steps, sampler_mode="standard",denoise=1.0, denoise_alt=1.0,
noise_type_init="gaussian", latent_image=None,
positive=None, negative=None, sampler=None, sigmas=None, latent_noise=None, latent_noise_match=None,
noise_stdev=1.0, noise_mean=0.0, noise_normalize=True,
d_noise=1.0, alpha_init=-1.0, k_init=1.0, cfgpp=0.0, noise_seed=-1,
options=None, sde_noise=None,sde_noise_steps=1,
extra_options="",
):
# blame comfy here
raw_x = latent_image['raw_x'] if 'raw_x' in latent_image else None
last_seed = latent_image['last_seed'] if 'last_seed' in latent_image else None
pos_cond = copy_cond(positive)
neg_cond = copy_cond(negative)
if sampler is None:
raise ValueError("sampler is required")
else:
sampler = copy.deepcopy(sampler)
default_dtype = getattr(torch, get_extra_options_kv("default_dtype", "float64", extra_options), torch.float64)
model = model.clone()
if pos_cond[0][1] is not None:
if "regional_conditioning_weights" in pos_cond[0][1]:
sampler.extra_options['regional_conditioning_weights'] = pos_cond[0][1]['regional_conditioning_weights']
sampler.extra_options['regional_conditioning_floors'] = pos_cond[0][1]['regional_conditioning_floors']
regional_generate_conditionings_and_masks_fn = pos_cond[0][1]['regional_generate_conditionings_and_masks_fn']
regional_conditioning, regional_mask = regional_generate_conditionings_and_masks_fn(latent_image['samples'])
regional_conditioning = copy.deepcopy(regional_conditioning)
regional_mask = copy.deepcopy(regional_mask)
model.set_model_patch(regional_conditioning, 'regional_conditioning_positive')
model.set_model_patch(regional_mask, 'regional_conditioning_mask')
if "noise_seed" in sampler.extra_options:
if sampler.extra_options['noise_seed'] == -1 and noise_seed != -1:
sampler.extra_options['noise_seed'] = noise_seed + 1
#print("Shark: setting clown noise seed to: ", sampler.extra_options['noise_seed'])
if "sampler_mode" in sampler.extra_options:
sampler.extra_options['sampler_mode'] = sampler_mode
if "extra_options" in sampler.extra_options:
extra_options += " "
extra_options += sampler.extra_options['extra_options']
sampler.extra_options['extra_options'] = extra_options
batch_size = int(get_extra_options_kv("batch_size", "1", extra_options))
if batch_size > 1:
latent_image['samples'] = latent_image['samples'].repeat(batch_size, 1, 1, 1)
latent_image_batch = {"samples": latent_image['samples']}
out_samples, out_samples_fp64, out_denoised_samples, out_denoised_samples_fp64 = [], [], [], []
for batch_num in range(latent_image_batch['samples'].shape[0]):
latent_unbatch = copy.deepcopy(latent_image)
latent_unbatch['samples'] = latent_image_batch['samples'][batch_num].clone().unsqueeze(0)
if noise_seed == -1:
seed = torch.initial_seed() + 1 + batch_num
else:
seed = noise_seed + batch_num
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
#torch.cuda.manual_seed_all(seed)
if options is not None:
noise_stdev = options.get('noise_init_stdev', noise_stdev)
noise_mean = options.get('noise_init_mean', noise_mean)
noise_type_init = options.get('noise_type_init', noise_type_init)
d_noise = options.get('d_noise', d_noise)
alpha_init = options.get('alpha_init', alpha_init)
k_init = options.get('k_init', k_init)
sde_noise = options.get('sde_noise', sde_noise)
sde_noise_steps = options.get('sde_noise_steps', sde_noise_steps)
latent_image_dtype = latent_unbatch['samples'].dtype
if isinstance(model.model.model_config, comfy.supported_models.Flux) or isinstance(model.model.model_config, comfy.supported_models.FluxSchnell):
if pos_cond is None:
pos_cond = [[
torch.zeros((1, 256, 4096)),
{'pooled_output': torch.zeros((1, 768))}
]]
if extra_options_flag("uncond_ortho_flux", extra_options):
if neg_cond is None:
print("uncond_ortho_flux: using random negative conditioning...")
neg_cond = [[
torch.randn((1, 256, 4096)),
{'pooled_output': torch.randn((1, 768))}
]]
#neg_cond[0][0] = get_orthogonal(neg_cond[0][0].to(torch.bfloat16), pos_cond[0][0].to(torch.bfloat16))
#neg_cond[0][1]['pooled_output'] = get_orthogonal(neg_cond[0][1]['pooled_output'].to(torch.bfloat16), pos_cond[0][1]['pooled_output'].to(torch.bfloat16))
neg_cond[0][0] = get_orthogonal(neg_cond[0][0], pos_cond[0][0])
neg_cond[0][1]['pooled_output'] = get_orthogonal(neg_cond[0][1]['pooled_output'], pos_cond[0][1]['pooled_output'])
if neg_cond is None:
neg_cond = [[
torch.zeros((1, 256, 4096)),
{'pooled_output': torch.zeros((1, 768))}
]]
else:
if pos_cond is None:
pos_cond = [[
torch.zeros((1, 154, 4096)),
{'pooled_output': torch.zeros((1, 2048))}
]]
if extra_options_flag("uncond_ortho_sd35", extra_options):
if neg_cond is None:
neg_cond = [[
torch.randn((1, 154, 4096)),
{'pooled_output': torch.randn((1, 2048))}
]]
neg_cond[0][0] = get_orthogonal(neg_cond[0][0], pos_cond[0][0])
neg_cond[0][1]['pooled_output'] = get_orthogonal(neg_cond[0][1]['pooled_output'], pos_cond[0][1]['pooled_output'])
if neg_cond is None:
neg_cond = [[
torch.zeros((1, 154, 4096)),
{'pooled_output': torch.zeros((1, 2048))}
]]
if extra_options_flag("zero_uncond_t5", extra_options):
neg_cond[0][0] = torch.zeros_like(neg_cond[0][0])
if extra_options_flag("zero_uncond_pooled_output", extra_options):
neg_cond[0][1]['pooled_output'] = torch.zeros_like(neg_cond[0][1]['pooled_output'])
if extra_options_flag("zero_pooled_output", extra_options):
pos_cond[0][1]['pooled_output'] = torch.zeros_like(pos_cond[0][1]['pooled_output'])
neg_cond[0][1]['pooled_output'] = torch.zeros_like(neg_cond[0][1]['pooled_output'])
if denoise_alt < 0:
d_noise = denoise_alt = -denoise_alt
if options is not None:
d_noise = options.get('d_noise', d_noise)
if sigmas is not None:
sigmas = sigmas.clone().to(default_dtype)
else:
sigmas = get_sigmas(model, scheduler, steps, denoise).to(default_dtype)
sigmas *= denoise_alt
if sampler_mode.startswith("unsample"):
null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype)
sigmas = torch.flip(sigmas, dims=[0])
sigmas = torch.cat([sigmas, null])
elif sampler_mode.startswith("resample"):
null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype)
sigmas = torch.cat([null, sigmas])
sigmas = torch.cat([sigmas, null])
x = latent_unbatch["samples"].clone().to(default_dtype)
if latent_unbatch is not None:
if "samples_fp64" in latent_unbatch:
if latent_unbatch['samples'].shape == latent_unbatch['samples_fp64'].shape:
if torch.norm(latent_unbatch['samples'] - latent_unbatch['samples_fp64']) < 0.01:
x = latent_unbatch["samples_fp64"].clone()
if latent_noise is not None:
latent_noise_samples = latent_noise["samples"].clone().to(default_dtype)
if latent_noise_match is not None:
latent_noise_match_samples = latent_noise_match["samples"].clone().to(default_dtype)
truncate_conditioning = extra_options_flag("truncate_conditioning", extra_options)
if truncate_conditioning == "true" or truncate_conditioning == "true_and_zero_neg":
if pos_cond is not None:
pos_cond[0][0] = pos_cond[0][0].clone().to(default_dtype)
pos_cond[0][1]["pooled_output"] = pos_cond[0][1]["pooled_output"].clone().to(default_dtype)
if neg_cond is not None:
neg_cond[0][0] = neg_cond[0][0].clone().to(default_dtype)
neg_cond[0][1]["pooled_output"] = neg_cond[0][1]["pooled_output"].clone().to(default_dtype)
c = []
for t in pos_cond:
d = t[1].copy()
pooled_output = d.get("pooled_output", None)
for t in neg_cond:
d = t[1].copy()
pooled_output = d.get("pooled_output", None)
if pooled_output is not None:
if truncate_conditioning == "true_and_zero_neg":
d["pooled_output"] = torch.zeros((1,2048), dtype=t[0].dtype, device=t[0].device)
n = [torch.zeros((1,154,4096), dtype=t[0].dtype, device=t[0].device), d]
else:
d["pooled_output"] = d["pooled_output"][:, :2048]
n = [t[0][:, :154, :4096], d]
c.append(n)
neg_cond = c
sigmin = model.model.model_sampling.sigma_min
sigmax = model.model.model_sampling.sigma_max
if sde_noise is None and sampler_mode.startswith("unsample"):
total_steps = len(sigmas)+1
sde_noise = []
else:
total_steps = 1
for total_steps_iter in range (sde_noise_steps):
if noise_type_init == "none":
noise = torch.zeros_like(x)
elif latent_noise is None:
print("Initial latent noise seed: ", seed)
noise_sampler_init = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_type_init)(x=x, seed=seed, sigma_min=sigmin, sigma_max=sigmax)
if noise_type_init == "fractal":
noise_sampler_init.alpha = alpha_init
noise_sampler_init.k = k_init
noise_sampler_init.scale = 0.1
noise = noise_sampler_init(sigma=sigmax, sigma_next=sigmin)
else:
noise = latent_noise_samples
if noise_normalize and noise.std() > 0:
noise = (noise - noise.mean(dim=(-2, -1), keepdim=True)) / noise.std(dim=(-2, -1), keepdim=True)
#noise.sub_(noise.mean()).div_(noise.std())
noise *= noise_stdev
noise = (noise - noise.mean()) + noise_mean
if latent_noise_match is not None:
for i in range(latent_noise_match_samples.shape[1]):
noise[0][i] = (noise[0][i] - noise[0][i].mean())
noise[0][i] = (noise[0][i]) + latent_noise_match_samples[0][i].mean()
noise_mask = latent_unbatch["noise_mask"] if "noise_mask" in latent_unbatch else None
x0_output = {}
if cfg < 0:
sampler.extra_options['cfg_cw'] = -cfg
cfg = 1.0
else:
sampler.extra_options.pop("cfg_cw", None)
if sde_noise is None:
sde_noise = []
else:
sde_noise = copy.deepcopy(sde_noise)
for i in range(len(sde_noise)):
sde_noise[i] = sde_noise[i]
for j in range(sde_noise[i].shape[1]):
sde_noise[i][0][j] = ((sde_noise[i][0][j] - sde_noise[i][0][j].mean()) / sde_noise[i][0][j].std())
callback = latent_preview.prepare_callback(model, sigmas.shape[-1] - 1, x0_output)
disable_pbar = not comfy.utils.PROGRESS_BAR_ENABLED
model.model.diffusion_model.raw_x = raw_x
model.model.diffusion_model.last_seed = last_seed
samples = comfy.sample.sample_custom(model, noise, cfg, sampler, sigmas, pos_cond, neg_cond, x.clone(), noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed)
out = latent_unbatch.copy()
out["samples"] = samples
if "x0" in x0_output:
out_denoised = latent_unbatch.copy()
out_denoised["samples"] = model.model.process_latent_out(x0_output["x0"].cpu())
else:
out_denoised = out
out["samples_fp64"] = out["samples"].clone()
out["samples"] = out["samples"].to(latent_image_dtype)
out_denoised["samples_fp64"] = out_denoised["samples"].clone()
out_denoised["samples"] = out_denoised["samples"].to(latent_image_dtype)
out_samples. append(out["samples"])
out_samples_fp64.append(out["samples_fp64"])
out_denoised_samples. append(out_denoised["samples"])
out_denoised_samples_fp64.append(out_denoised["samples_fp64"])
seed += 1
torch.manual_seed(seed)
if total_steps_iter > 1:
sde_noise.append(out["samples_fp64"])
out_samples = [tensor.squeeze(0) for tensor in out_samples]
out_samples_fp64 = [tensor.squeeze(0) for tensor in out_samples_fp64]
out_denoised_samples = [tensor.squeeze(0) for tensor in out_denoised_samples]
out_denoised_samples_fp64 = [tensor.squeeze(0) for tensor in out_denoised_samples_fp64]
out['samples'] = torch.stack(out_samples, dim=0)
out['samples_fp64'] = torch.stack(out_samples_fp64, dim=0)
out_denoised['samples'] = torch.stack(out_denoised_samples, dim=0)
out_denoised['samples_fp64'] = torch.stack(out_denoised_samples_fp64, dim=0)
out['raw_x'] = None
if hasattr(model.model.diffusion_model, "raw_x"):
if model.model.diffusion_model.raw_x is not None:
out['raw_x'] = model.model.diffusion_model.raw_x.clone()
del model.model.diffusion_model.raw_x
out['last_seed'] = None
if hasattr(model.model.diffusion_model, "last_seed"):
if model.model.diffusion_model.last_seed is not None:
out['last_seed'] = model.model.diffusion_model.last_seed
del model.model.diffusion_model.last_seed
return ( out, out_denoised, sde_noise,)
class ClownsharKSampler:
@classmethod
def INPUT_TYPES(s):
return {"required":
{"model": ("MODEL",),
"noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"noise_seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}),
"sampler_mode": (['standard', 'unsample', 'resample'],),
"sampler_name": (RK_SAMPLER_NAMES, {"default": "res_2m"}),
"implicit_sampler_name": (IRK_SAMPLER_NAMES, {"default": "explicit_diagonal"}),
"scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},),
"steps": ("INT", {"default": 30, "min": 1, "max": 10000}),
"implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}),
"denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"cfg": ("FLOAT", {"default": 3.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, }),
"extra_options": ("STRING", {"default": "", "multiline": True}),
},
"optional":
{
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"sigmas": ("SIGMAS", ),
"latent_image": ("LATENT", ),
"guides": ("GUIDES", ),
"options": ("OPTIONS", ),
"automation": ("AUTOMATION", ),
}
}
RETURN_TYPES = ("LATENT","LATENT", "LATENT",)
RETURN_NAMES = ("output", "denoised","sde_noise",)
FUNCTION = "main"
CATEGORY = "RES4LYF/legacy/samplers"
DEPRECATED = True
def main(self, model, cfg, sampler_mode, scheduler, steps, denoise=1.0, denoise_alt=1.0,
noise_type_init="gaussian", noise_type_sde="brownian", noise_mode_sde="hard", latent_image=None,
positive=None, negative=None, sigmas=None, latent_noise=None, latent_noise_match=None,
noise_stdev=1.0, noise_mean=0.0, noise_normalize=True, noise_is_latent=False,
eta=0.25, eta_var=0.0, d_noise=1.0, s_noise=1.0, alpha_init=-1.0, k_init=1.0, alpha_sde=-1.0, k_sde=1.0, cfgpp=0.0, c1=0.0, c2=0.5, c3=1.0, noise_seed=-1, sampler_name="res_2m", implicit_sampler_name="default",
t_fn_formula=None, sigma_fn_formula=None, implicit_steps=0,
latent_guide=None, latent_guide_inv=None, guide_mode="blend", latent_guide_weights=None, latent_guide_weights_inv=None, latent_guide_mask=None, latent_guide_mask_inv=None, rescale_floor=True, sigmas_override=None,
shift=3.0, base_shift=0.85, guides=None, options=None, sde_noise=None,sde_noise_steps=1, shift_scaling="exponential",
extra_options="", automation=None, etas=None, s_noises=None,unsample_resample_scales=None, regional_conditioning_weights=None,frame_weights_grp=None,
):
if noise_seed >= 0:
noise_seed_sde = noise_seed + 1
else:
noise_seed_sde = -1
eta_substep = eta
noise_mode_sde_substep = noise_mode_sde
noise_type_sde_substep = noise_type_sde
sampler = ClownSamplerAdvanced().main(
noise_type_sde=noise_type_sde, noise_type_sde_substep=noise_type_sde_substep, noise_mode_sde=noise_mode_sde,
eta=eta, eta_var=eta_var, d_noise=d_noise, s_noise=s_noise, alpha_sde=alpha_sde, k_sde=k_sde, cfgpp=cfgpp, c1=c1, c2=c2, c3=c3, noise_seed_sde=noise_seed_sde, sampler_name=sampler_name, implicit_sampler_name=implicit_sampler_name,
t_fn_formula=t_fn_formula, sigma_fn_formula=sigma_fn_formula, implicit_steps=implicit_steps,
latent_guide=latent_guide, latent_guide_inv=latent_guide_inv, guide_mode=guide_mode, latent_guide_weights=latent_guide_weights, latent_guide_weights_inv=latent_guide_weights_inv, latent_guide_mask=latent_guide_mask, latent_guide_mask_inv=latent_guide_mask_inv, rescale_floor=rescale_floor, sigmas_override=sigmas_override,
guides=guides, options=options, sde_noise=sde_noise,sde_noise_steps=sde_noise_steps,
extra_options=extra_options, automation=automation, etas=etas, s_noises=s_noises,unsample_resample_scales=unsample_resample_scales, regional_conditioning_weights=regional_conditioning_weights,frame_weights_grp=frame_weights_grp, eta_substep=eta_substep, noise_mode_sde_substep=noise_mode_sde_substep,
)
return SharkSamplerAlpha().main(
model=model, cfg=cfg, sampler_mode=sampler_mode, scheduler=scheduler, steps=steps,
denoise=denoise, denoise_alt=denoise_alt, noise_type_init=noise_type_init,
latent_image=latent_image, positive=positive, negative=negative, sampler=sampler[0],
sigmas=sigmas, latent_noise=latent_noise, latent_noise_match=latent_noise_match,
noise_stdev=noise_stdev, noise_mean=noise_mean, noise_normalize=noise_normalize,
d_noise=d_noise, alpha_init=alpha_init, k_init=k_init, cfgpp=cfgpp, noise_seed=noise_seed,
options=options, sde_noise=sde_noise, sde_noise_steps=sde_noise_steps,
extra_options=extra_options
)
class UltraSharkSampler:
# for use with https://github.com/ClownsharkBatwing/UltraCascade
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model": ("MODEL",),
"add_noise": ("BOOLEAN", {"default": True}),
"normalize_noise": ("BOOLEAN", {"default": False}),
"noise_type": (NOISE_GENERATOR_NAMES, ),
"alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}),
"k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}),
"noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
"cfg": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 100.0, "step":0.5, "round": 0.01}),
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"sampler": ("SAMPLER", ),
"sigmas": ("SIGMAS", ),
"latent_image": ("LATENT", ),
"guide_type": (['residual', 'weighted'], ),
"guide_weight": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": 0.01}),
},
"optional": {
#"latent_noise": ("LATENT", ),
"guide": ("LATENT",),
"guide_weights": ("SIGMAS",),
#"style": ("CONDITIONING", ),
#"img_style": ("CONDITIONING", ),
}
}
RETURN_TYPES = ("LATENT","LATENT","LATENT")
RETURN_NAMES = ("output", "denoised_output", "latent_batch")
FUNCTION = "main"
CATEGORY = "RES4LYF/legacy/samplers/UltraCascade"
DESCRIPTION = "For use with Stable Cascade and UltraCascade."
DEPRECATED = True
def main(self, model, add_noise, normalize_noise, noise_type, noise_seed, cfg, alpha, k, positive, negative, sampler,
sigmas, guide_type, guide_weight, latent_image, latent_noise=None, guide=None, guide_weights=None, style=None, img_style=None):
if model.model.model_config.unet_config.get('stable_cascade_stage') == 'up':
model = model.clone()
x_lr = guide['samples'] if guide is not None else None
guide_weights = initialize_or_scale(guide_weights, guide_weight, 10000)#("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
#model.model.diffusion_model.set_guide_weights(guide_weights=guide_weights)
#model.model.diffusion_model.set_guide_type(guide_type=guide_type)
#model.model.diffusion_model.set_x_lr(x_lr=x_lr)
patch = model.model_options.get("transformer_options", {}).get("patches_replace", {}).get("ultracascade", {}).get("main")
if patch is not None:
patch.update(x_lr=x_lr, guide_weights=guide_weights, guide_type=guide_type)
else:
model.model.diffusion_model.set_sigmas_schedule(sigmas_schedule=sigmas)
model.model.diffusion_model.set_sigmas_prev(sigmas_prev=sigmas[:1])
model.model.diffusion_model.set_guide_weights(guide_weights=guide_weights)
model.model.diffusion_model.set_guide_type(guide_type=guide_type)
model.model.diffusion_model.set_x_lr(x_lr=x_lr)
elif model.model.model_config.unet_config['stable_cascade_stage'] == 'b':
c_pos, c_neg = [], []
for t in positive:
d_pos = t[1].copy()
d_neg = t[1].copy()
d_pos['stable_cascade_prior'] = guide['samples']
pooled_output = d_neg.get("pooled_output", None)
if pooled_output is not None:
d_neg["pooled_output"] = torch.zeros_like(pooled_output)
c_pos.append([t[0], d_pos])
c_neg.append([torch.zeros_like(t[0]), d_neg])
positive = c_pos
negative = c_neg
if style is not None:
model.set_model_patch(style, 'style_cond')
if img_style is not None:
model.set_model_patch(img_style,'img_style_cond')
# 1, 768 clip_style[0][0][1]['unclip_conditioning'][0]['clip_vision_output'].image_embeds.shape
# 1, 1280 clip_style[0][0][1]['pooled_output'].shape
# 1, 77, 1280 clip_style[0][0][0].shape
latent = latent_image
latent_image = latent["samples"]
torch.manual_seed(noise_seed)
if not add_noise:
noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
elif latent_noise is None:
batch_inds = latent["batch_index"] if "batch_index" in latent else None
noise = prepare_noise(latent_image, noise_seed, noise_type, batch_inds, alpha, k)
else:
noise = latent_noise["samples"]#.to(torch.float64)
if normalize_noise and noise.std() > 0:
noise = (noise - noise.mean(dim=(-2, -1), keepdim=True)) / noise.std(dim=(-2, -1), keepdim=True)
noise_mask = None
if "noise_mask" in latent:
noise_mask = latent["noise_mask"]
x0_output = {}
callback = latent_preview.prepare_callback(model, sigmas.shape[-1] - 1, x0_output)
disable_pbar = False
samples = comfy.sample.sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, latent_image,
noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar,
seed=noise_seed)
out = latent.copy()
out["samples"] = samples
if "x0" in x0_output:
out_denoised = latent.copy()
out_denoised["samples"] = model.model.process_latent_out(x0_output["x0"].cpu())
else:
out_denoised = out
return (out, out_denoised)
================================================
FILE: legacy/samplers_extensions.py
================================================
from .noise_classes import NOISE_GENERATOR_CLASSES, NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_NAMES, NOISE_GENERATOR_NAMES_SIMPLE
import comfy.sample
import comfy.sampler_helpers
import comfy.model_sampling
import comfy.latent_formats
import comfy.sd
import comfy.supported_models
from .conditioning import FluxRegionalPrompt, FluxRegionalConditioning
from .models import ReFluxPatcher
import torch
import torch.nn.functional as F
import copy
from .helper import initialize_or_scale, get_res4lyf_scheduler_list
def move_to_same_device(*tensors):
if not tensors:
return tensors
device = tensors[0].device
return tuple(tensor.to(device) for tensor in tensors)
class SamplerOptions_TimestepScaling:
# for patching the t_fn and sigma_fn (sigma <-> timestep) formulas to allow picking Runge-Kutta Ci values ("midpoints") with different scaling.
@classmethod
def INPUT_TYPES(s):
return {"required":
{
"sampler": ("SAMPLER", ),
"t_fn_formula": ("STRING", {"default": "1/((sigma).exp()+1)", "multiline": True}),
"sigma_fn_formula": ("STRING", {"default": "((1-t)/t).log()", "multiline": True}),
},
"optional":
{
}
}
RETURN_TYPES = ("SAMPLER",)
RETURN_NAMES = ("sampler",)
FUNCTION = "set_sampler_extra_options"
CATEGORY = "RES4LYF/legacy/sampler_extensions"
DESCRIPTION = "Patches ClownSampler's t_fn and sigma_fn (sigma <-> timestep) formulas to allow picking Runge-Kutta Ci values (midpoints) with different scaling."
DEPRECATED = True
def set_sampler_extra_options(self, sampler, t_fn_formula=None, sigma_fn_formula=None, ):
sampler = copy.deepcopy(sampler)
sampler.extra_options['t_fn_formula'] = t_fn_formula
sampler.extra_options['sigma_fn_formula'] = sigma_fn_formula
return (sampler, )
class SamplerOptions_GarbageCollection:
@classmethod
def INPUT_TYPES(s):
return {"required":
{
"sampler": ("SAMPLER", ),
"garbage_collection": ("BOOLEAN", {"default": True}),
},
"optional":
{
}
}
RETURN_TYPES = ("SAMPLER",)
RETURN_NAMES = ("sampler",)
FUNCTION = "set_sampler_extra_options"
CATEGORY = "RES4LYF/legacy/sampler_extensions"
DESCRIPTION = "Patches ClownSampler to use garbage collection after every step. This can help with OOM issues during inference for large models like Flux. The tradeoff is slower sampling."
DEPRECATED = True
def set_sampler_extra_options(self, sampler, garbage_collection):
sampler = copy.deepcopy(sampler)
sampler.extra_options['GARBAGE_COLLECT'] = garbage_collection
return (sampler, )
GUIDE_MODE_NAMES = ["unsample",
"resample",
"epsilon",
"epsilon_projection",
"epsilon_dynamic_mean",
"epsilon_dynamic_mean_std",
"epsilon_dynamic_mean_from_bkg",
"epsilon_guide_mean_std_from_bkg",
"hard_light",
"blend",
"blend_projection",
"mean_std",
"mean",
"mean_tiled",
"std",
"data",
#"data_projection",
"none",
]
class ClownInpaint: ##################################################################################################################################
@classmethod
def INPUT_TYPES(s):
return {"required":
{#"guide_mode": (GUIDE_MODE_NAMES, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}),
"guide_weight": ("FLOAT", {"default": 0.10, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"guide_weight_bkg": ("FLOAT", {"default": 1.00, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"guide_weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"guide_weight_scheduler_bkg": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"guide_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}),
"guide_bkg_end_step": ("INT", {"default": 10000, "min": 1, "max": 10000}),
},
"optional":
{
"model": ("MODEL", ),
"positive_inpaint": ("CONDITIONING", ),
"positive_bkg": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"latent_image": ("LATENT", ),
"mask": ("MASK", ),
"guide_weights": ("SIGMAS", ),
"guide_weights_bkg": ("SIGMAS", ),
}
}
RETURN_TYPES = ("MODEL","CONDITIONING","CONDITIONING","LATENT","GUIDES",)
RETURN_NAMES = ("model","positive" ,"negative" ,"latent","guides",)
CATEGORY = "RES4LYF/legacy/sampler_extensions"
FUNCTION = "main"
DEPRECATED = True
def main(self, guide_weight_scheduler="constant", guide_weight_scheduler_bkg="constant", guide_end_step=10000, guide_bkg_end_step=30, guide_weight_scale=1.0, guide_weight_bkg_scale=1.0, guide=None, guide_bkg=None, guide_weight=1.0, guide_weight_bkg=1.0,
guide_mode="epsilon", guide_weights=None, guide_weights_bkg=None, guide_mask_bkg=None,
model=None, positive_inpaint=None, positive_bkg=None, negative=None, latent_image=None, mask=None,
):
default_dtype = torch.float64
guide = latent_image
guide_bkg = {'samples': latent_image['samples'].clone()}
max_steps = 10000
denoise, denoise_bkg = guide_weight_scale, guide_weight_bkg_scale
if guide_mode.startswith("epsilon_") and not guide_mode.startswith("epsilon_projection") and guide_bkg == None:
print("Warning: need two latent inputs for guide_mode=",guide_mode," to work. Falling back to epsilon.")
guide_mode = "epsilon"
if guide_weight_scheduler == "constant":
guide_weights = initialize_or_scale(None, guide_weight, guide_end_step).to(default_dtype)
guide_weights = F.pad(guide_weights, (0, max_steps), value=0.0)
if guide_weight_scheduler_bkg == "constant":
guide_weights_bkg = initialize_or_scale(None, guide_weight_bkg, guide_bkg_end_step).to(default_dtype)
guide_weights_bkg = F.pad(guide_weights_bkg, (0, max_steps), value=0.0)
guides = (guide_mode, guide_weight, guide_weight_bkg, guide_weights, guide_weights_bkg, guide, guide_bkg, mask, guide_mask_bkg,
guide_weight_scheduler, guide_weight_scheduler_bkg, guide_end_step, guide_bkg_end_step, denoise, denoise_bkg)
latent = {'samples': torch.zeros_like(latent_image['samples'])}
if (positive_inpaint is None) and (positive_bkg is None):
positive = None
else:
if positive_bkg is None:
if positive_bkg is None:
positive_bkg = [[
torch.zeros((1, 256, 4096)),
{'pooled_output': torch.zeros((1, 768))}
]]
cond_regional, mask_inv = FluxRegionalPrompt().main(cond=positive_inpaint, mask=mask)
cond_regional, mask_inv_inv = FluxRegionalPrompt().main(cond=positive_bkg , cond_regional=cond_regional, mask=mask_inv)
positive, = FluxRegionalConditioning().main(conditioning_regional=cond_regional, self_attn_floor=0.0)
model, = ReFluxPatcher().main(model, enable=True)
return (model, positive, negative, latent, guides, )
class ClownInpaintSimple: ##################################################################################################################################
@classmethod
def INPUT_TYPES(s):
return {"required":
{#"guide_mode": (GUIDE_MODE_NAMES, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}),
"guide_weight": ("FLOAT", {"default": 0.10, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"guide_weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"guide_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}),
},
"optional":
{
"model": ("MODEL", ),
"positive_inpaint": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"latent_image": ("LATENT", ),
"mask": ("MASK", ),
}
}
RETURN_TYPES = ("MODEL","CONDITIONING","CONDITIONING","LATENT","GUIDES",)
RETURN_NAMES = ("model","positive" ,"negative" ,"latent","guides",)
CATEGORY = "RES4LYF/legacy/sampler_extensions"
FUNCTION = "main"
DEPRECATED = True
def main(self, guide_weight_scheduler="constant", guide_weight_scheduler_bkg="constant", guide_end_step=10000, guide_bkg_end_step=30, guide_weight_scale=1.0, guide_weight_bkg_scale=1.0, guide=None, guide_bkg=None, guide_weight=1.0, guide_weight_bkg=1.0,
guide_mode="epsilon", guide_weights=None, guide_weights_bkg=None, guide_mask_bkg=None,
model=None, positive_inpaint=None, positive_bkg=None, negative=None, latent_image=None, mask=None,
):
default_dtype = torch.float64
guide = latent_image
guide_bkg = {'samples': latent_image['samples'].clone()}
max_steps = 10000
denoise, denoise_bkg = guide_weight_scale, guide_weight_bkg_scale
if guide_mode.startswith("epsilon_") and not guide_mode.startswith("epsilon_projection") and guide_bkg == None:
print("Warning: need two latent inputs for guide_mode=",guide_mode," to work. Falling back to epsilon.")
guide_mode = "epsilon"
if guide_weight_scheduler == "constant":
guide_weights = initialize_or_scale(None, guide_weight, guide_end_step).to(default_dtype)
guide_weights = F.pad(guide_weights, (0, max_steps), value=0.0)
if guide_weight_scheduler_bkg == "constant":
guide_weights_bkg = initialize_or_scale(None, guide_weight_bkg, guide_bkg_end_step).to(default_dtype)
guide_weights_bkg = F.pad(guide_weights_bkg, (0, max_steps), value=0.0)
guides = (guide_mode, guide_weight, guide_weight_bkg, guide_weights, guide_weights_bkg, guide, guide_bkg, mask, guide_mask_bkg,
guide_weight_scheduler, guide_weight_scheduler_bkg, guide_end_step, guide_bkg_end_step, denoise, denoise_bkg)
latent = {'samples': torch.zeros_like(latent_image['samples'])}
if (positive_inpaint is None) and (positive_bkg is None):
positive = None
else:
if positive_bkg is None:
if positive_bkg is None:
positive_bkg = [[
torch.zeros((1, 256, 4096)),
{'pooled_output': torch.zeros((1, 768))}
]]
cond_regional, mask_inv = FluxRegionalPrompt().main(cond=positive_inpaint, mask=mask)
cond_regional, mask_inv_inv = FluxRegionalPrompt().main(cond=positive_bkg , cond_regional=cond_regional, mask=mask_inv)
positive, = FluxRegionalConditioning().main(conditioning_regional=cond_regional, self_attn_floor=1.0)
model, = ReFluxPatcher().main(model, enable=True)
return (model, positive, negative, latent, guides, )
##################################################################################################################################
class ClownsharKSamplerGuide:
@classmethod
def INPUT_TYPES(s):
return {"required":
{"guide_mode": (GUIDE_MODE_NAMES, {"default": 'epsilon_projection', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}),
"guide_weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
#"guide_weight_bkg": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"guide_weight_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
#"guide_weight_bkg_scale": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"guide_weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
#"guide_weight_scheduler_bkg": (["constant"] + comfy.samplers.SCHEDULER_NAMES + ["beta57"], {"default": "beta57"},),
"guide_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}),
#"guide_bkg_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}),
},
"optional":
{
"guide": ("LATENT", ),
#"guide_bkg": ("LATENT", ),
"guide_mask": ("MASK", ),
#"guide_mask_bkg": ("MASK", ),
"guide_weights": ("SIGMAS", ),
#"guide_weights_bkg": ("SIGMAS", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
CATEGORY = "RES4LYF/legacy/sampler_extensions"
FUNCTION = "main"
DEPRECATED = True
def main(self, guide_weight_scheduler="constant", guide_weight_scheduler_bkg="constant", guide_end_step=30, guide_bkg_end_step=30, guide_weight_scale=1.0, guide_weight_bkg_scale=1.0, guide=None, guide_bkg=None, guide_weight=0.0, guide_weight_bkg=0.0,
guide_mode="blend", guide_weights=None, guide_weights_bkg=None, guide_mask=None, guide_mask_bkg=None,
):
default_dtype = torch.float64
max_steps = 10000
denoise, denoise_bkg = guide_weight_scale, guide_weight_bkg_scale
if guide_mode.startswith("epsilon_") and not guide_mode.startswith("epsilon_projection") and guide_bkg == None:
print("Warning: need two latent inputs for guide_mode=",guide_mode," to work. Falling back to epsilon.")
guide_mode = "epsilon"
if guide_weight_scheduler == "constant" and guide_weights == None:
guide_weights = initialize_or_scale(None, 1.0, guide_end_step).to(default_dtype)
#guide_weights = initialize_or_scale(None, guide_weight, guide_end_step).to(default_dtype)
guide_weights = F.pad(guide_weights, (0, max_steps), value=0.0)
if guide_weight_scheduler_bkg == "constant":
guide_weights_bkg = initialize_or_scale(None, 0.0, guide_bkg_end_step).to(default_dtype)
#guide_weights_bkg = initialize_or_scale(None, guide_weight_bkg, guide_bkg_end_step).to(default_dtype)
guide_weights_bkg = F.pad(guide_weights_bkg, (0, max_steps), value=0.0)
guides = (guide_mode, guide_weight, guide_weight_bkg, guide_weights, guide_weights_bkg, guide, guide_bkg, guide_mask, guide_mask_bkg,
guide_weight_scheduler, guide_weight_scheduler_bkg, guide_end_step, guide_bkg_end_step, denoise, denoise_bkg)
return (guides, )
class ClownsharKSamplerGuides:
@classmethod
def INPUT_TYPES(s):
return {"required":
{"guide_mode": (GUIDE_MODE_NAMES, {"default": 'epsilon_projection', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}),
"guide_weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"guide_weight_bkg": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"guide_weight_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"guide_weight_bkg_scale": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"guide_weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"guide_weight_scheduler_bkg": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"guide_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}),
"guide_bkg_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}),
},
"optional":
{
"guide": ("LATENT", ),
"guide_bkg": ("LATENT", ),
"guide_mask": ("MASK", ),
"guide_mask_bkg": ("MASK", ),
"guide_weights": ("SIGMAS", ),
"guide_weights_bkg": ("SIGMAS", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
CATEGORY = "RES4LYF/legacy/sampler_extensions"
FUNCTION = "main"
DEPRECATED = True
def main(self, guide_weight_scheduler="constant", guide_weight_scheduler_bkg="constant", guide_end_step=30, guide_bkg_end_step=30, guide_weight_scale=1.0, guide_weight_bkg_scale=1.0, guide=None, guide_bkg=None, guide_weight=0.0, guide_weight_bkg=0.0,
guide_mode="blend", guide_weights=None, guide_weights_bkg=None, guide_mask=None, guide_mask_bkg=None,
):
default_dtype = torch.float64
max_steps = 10000
denoise, denoise_bkg = guide_weight_scale, guide_weight_bkg_scale
if guide_mode.startswith("epsilon_") and not guide_mode.startswith("epsilon_projection") and guide_bkg == None:
print("Warning: need two latent inputs for guide_mode=",guide_mode," to work. Falling back to epsilon.")
guide_mode = "epsilon"
if guide_weight_scheduler == "constant" and guide_weights == None:
guide_weights = initialize_or_scale(None, 1.0, guide_end_step).to(default_dtype)
guide_weights = F.pad(guide_weights, (0, max_steps), value=0.0)
if guide_weight_scheduler_bkg == "constant" and guide_weights_bkg == None:
guide_weights_bkg = initialize_or_scale(None, 1.0, guide_bkg_end_step).to(default_dtype)
guide_weights_bkg = F.pad(guide_weights_bkg, (0, max_steps), value=0.0)
guides = (guide_mode, guide_weight, guide_weight_bkg, guide_weights, guide_weights_bkg, guide, guide_bkg, guide_mask, guide_mask_bkg,
guide_weight_scheduler, guide_weight_scheduler_bkg, guide_end_step, guide_bkg_end_step, denoise, denoise_bkg)
return (guides, )
class ClownsharKSamplerAutomation:
@classmethod
def INPUT_TYPES(s):
return {"required":
{
},
"optional":
{
"etas": ("SIGMAS", ),
"s_noises": ("SIGMAS", ),
"unsample_resample_scales": ("SIGMAS", ),
}
}
RETURN_TYPES = ("AUTOMATION",)
RETURN_NAMES = ("automation",)
CATEGORY = "RES4LYF/legacy/sampler_extensions"
FUNCTION = "main"
DEPRECATED = True
def main(self, etas=None, s_noises=None, unsample_resample_scales=None,):
automation = (etas, s_noises, unsample_resample_scales)
return (automation, )
class ClownsharKSamplerAutomation_Advanced:
@classmethod
def INPUT_TYPES(s):
return {"required":
{
},
"optional":
{
"automation": ("AUTOMATION", ),
"etas": ("SIGMAS", ),
"etas_substep": ("SIGMAS", ),
"s_noises": ("SIGMAS", ),
"unsample_resample_scales": ("SIGMAS", ),
"frame_weights": ("SIGMAS", ),
"frame_weights_bkg": ("SIGMAS", ),
}
}
RETURN_TYPES = ("AUTOMATION",)
RETURN_NAMES = ("automation",)
CATEGORY = "RES4LYF/legacy/sampler_extensions"
FUNCTION = "main"
DEPRECATED = True
def main(self, automation=None, etas=None, etas_substep=None, s_noises=None, unsample_resample_scales=None, frame_weights=None, frame_weights_bkg=None):
if automation is None:
automation = {}
frame_weights_grp = (frame_weights, frame_weights_bkg)
automation['etas'] = etas
automation['etas_substep'] = etas_substep
automation['s_noises'] = s_noises
automation['unsample_resample_scales'] = unsample_resample_scales
automation['frame_weights_grp'] = frame_weights_grp
return (automation, )
class ClownsharKSamplerOptions:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"noise_init_stdev": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }),
"noise_init_mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }),
"noise_type_init": (NOISE_GENERATOR_NAMES, {"default": "gaussian"}),
"noise_type_sde": (NOISE_GENERATOR_NAMES, {"default": "brownian"}),
"noise_mode_sde": (["hard", "hard_var", "hard_sq", "soft", "softer", "exp"], {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"eta": ("FLOAT", {"default": 0.25, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
"s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "round": False}),
"d_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"alpha_init": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.1}),
"k_init": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 2}),
"alpha_sde": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.1}),
"k_sde": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 2}),
"noise_seed": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff, "tooltip": "Seed for the SDE noise that is added after each step if eta or eta_var are non-zero. If set to -1, it will use the increment the seed most recently used by the workflow."}),
"c1": ("FLOAT", {"default": 0.0, "min": -1.0, "max": 10000.0, "step": 0.01}),
"c2": ("FLOAT", {"default": 0.5, "min": -1.0, "max": 10000.0, "step": 0.01}),
"c3": ("FLOAT", {"default": 1.0, "min": -1.0, "max": 10000.0, "step": 0.01}),
"t_fn_formula": ("STRING", {"default": "", "multiline": True}),
"sigma_fn_formula": ("STRING", {"default": "", "multiline": True}),
#"unsampler_type": (['linear', 'exponential', 'constant'],),
},
"optional": {
"options": ("OPTIONS",),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
CATEGORY = "RES4LYF/legacy/sampler_extensions"
FUNCTION = "main"
DEPRECATED = True
def main(self, noise_init_stdev, noise_init_mean, c1, c2, c3, eta, s_noise, d_noise, noise_type_init, noise_type_sde, noise_mode_sde, noise_seed,
alpha_init, k_init, alpha_sde, k_sde, t_fn_formula=None, sigma_fn_formula=None, unsampler_type="linear",
alphas=None, etas=None, s_noises=None, d_noises=None, c2s=None, c3s=None,
options=None,
):
if options is None:
options = {}
options['noise_init_stdev'] = noise_init_stdev
options['noise_init_mean'] = noise_init_mean
options['noise_type_init'] = noise_type_init
options['noise_type_sde'] = noise_type_sde
options['noise_mode_sde'] = noise_mode_sde
options['eta'] = eta
options['s_noise'] = s_noise
options['d_noise'] = d_noise
options['alpha_init'] = alpha_init
options['k_init'] = k_init
options['alpha_sde'] = alpha_sde
options['k_sde'] = k_sde
options['noise_seed_sde'] = noise_seed
options['c1'] = c1
options['c2'] = c2
options['c3'] = c3
options['t_fn_formula'] = t_fn_formula
options['sigma_fn_formula'] = sigma_fn_formula
options['unsampler_type'] = unsampler_type
return (options,)
class ClownOptions_SDE_Noise:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sde_noise_steps": ("INT", {"default": 1, "min": 1, "max": 10000}),
},
"optional": {
"sde_noise": ("LATENT",),
"options" : ("OPTIONS",),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
CATEGORY = "RES4LYF/legacy/sampler_options"
FUNCTION = "main"
DEPRECATED = True
def main(self, sde_noise_steps, sde_noise, options=None,):
if options is None:
options = {}
options['sde_noise_steps'] = sde_noise_steps
options['sde_noise'] = sde_noise
return (options,)
class ClownOptions_FrameWeights:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"frame_weights": ("SIGMAS", ),
},
"optional": {
"options": ("OPTIONS",),
}
}
DEPRECATED = True
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
CATEGORY = "RES4LYF/legacy/sampler_options"
FUNCTION = "main"
DEPRECATED = True
def main(self, frame_weights, options=None,):
if options is None:
options = {}
frame_weights_grp = (frame_weights, frame_weights)
options['frame_weights_grp'] = frame_weights_grp
return (options,)
================================================
FILE: legacy/samplers_tiled.py
================================================
# tiled sampler code adapted from https://github.com/BlenderNeko/ComfyUI_TiledKSampler
# and heavily modified for use with https://github.com/ClownsharkBatwing/UltraCascade
import sys
import os
import copy
from functools import partial
from tqdm.auto import tqdm
import torch
sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy"))
import comfy.sd
import comfy.controlnet
import comfy.model_management
import comfy.sample
import comfy.sampler_helpers
import latent_preview
from nodes import MAX_RESOLUTION
#MAX_RESOLUTION=8192
import comfy.clip_vision
import folder_paths
from . import tiling
from .noise_classes import *
def initialize_or_scale(tensor, value, steps):
if tensor is None:
return torch.full((steps,), value)
else:
return value * tensor
def cv_cond(cv_out, conditioning, strength, noise_augmentation):
c = []
for t in conditioning:
o = t[1].copy()
x = {"clip_vision_output": cv_out, "strength": strength, "noise_augmentation": noise_augmentation}
if "unclip_conditioning" in o:
o["unclip_conditioning"] = o["unclip_conditioning"][:] + [x]
else:
o["unclip_conditioning"] = [x]
n = [t[0], o]
c.append(n)
return c
def recursion_to_list(obj, attr):
current = obj
yield current
while True:
current = getattr(current, attr, None)
if current is not None:
yield current
else:
return
def copy_cond(cond):
return [[c1,c2.copy()] for c1,c2 in cond]
def slice_cond(tile_h, tile_h_len, tile_w, tile_w_len, cond, area):
tile_h_end = tile_h + tile_h_len
tile_w_end = tile_w + tile_w_len
coords = area[0] #h_len, w_len, h, w,
mask = area[1]
if coords is not None:
h_len, w_len, h, w = coords
h_end = h + h_len
w_end = w + w_len
if h < tile_h_end and h_end > tile_h and w < tile_w_end and w_end > tile_w:
new_h = max(0, h - tile_h)
new_w = max(0, w - tile_w)
new_h_end = min(tile_h_end, h_end - tile_h)
new_w_end = min(tile_w_end, w_end - tile_w)
cond[1]['area'] = (new_h_end - new_h, new_w_end - new_w, new_h, new_w)
else:
return (cond, True)
if mask is not None:
new_mask = tiling.get_slice(mask, tile_h,tile_h_len,tile_w,tile_w_len)
if new_mask.sum().cpu() == 0.0 and 'mask' in cond[1]:
return (cond, True)
else:
cond[1]['mask'] = new_mask
return (cond, False)
def slice_gligen(tile_h, tile_h_len, tile_w, tile_w_len, cond, gligen):
tile_h_end = tile_h + tile_h_len
tile_w_end = tile_w + tile_w_len
if gligen is None:
return
gligen_type = gligen[0]
gligen_model = gligen[1]
gligen_areas = gligen[2]
gligen_areas_new = []
for emb, h_len, w_len, h, w in gligen_areas:
h_end = h + h_len
w_end = w + w_len
if h < tile_h_end and h_end > tile_h and w < tile_w_end and w_end > tile_w:
new_h = max(0, h - tile_h)
new_w = max(0, w - tile_w)
new_h_end = min(tile_h_end, h_end - tile_h)
new_w_end = min(tile_w_end, w_end - tile_w)
gligen_areas_new.append((emb, new_h_end - new_h, new_w_end - new_w, new_h, new_w))
if len(gligen_areas_new) == 0:
del cond['gligen']
else:
cond['gligen'] = (gligen_type, gligen_model, gligen_areas_new)
def slice_cnet(h, h_len, w, w_len, model:comfy.controlnet.ControlBase, img):
if img is None:
img = model.cond_hint_original
hint = tiling.get_slice(img, h*8, h_len*8, w*8, w_len*8)
if isinstance(model, comfy.controlnet.ControlLora):
model.cond_hint = hint.float().to(model.device)
else:
model.cond_hint = hint.to(model.control_model.dtype).to(model.device)
def slices_T2I(h, h_len, w, w_len, model:comfy.controlnet.ControlBase, img):
model.control_input = None
if img is None:
img = model.cond_hint_original
model.cond_hint = tiling.get_slice(img, h*8, h_len*8, w*8, w_len*8).float().to(model.device)
# TODO: refactor some of the mess
def cnets_and_cnet_imgs(positive, negative, shape):
# cnets
cnets = [c['control'] for (_, c) in positive + negative if 'control' in c]
# unroll recursion
cnets = list(set([x for m in cnets for x in recursion_to_list(m, "previous_controlnet")]))
# filter down to only cnets
cnets = [x for x in cnets if isinstance(x, comfy.controlnet.ControlNet)]
cnet_imgs = [
torch.nn.functional.interpolate(m.cond_hint_original, (shape[-2] * 8, shape[-1] * 8), mode='nearest-exact').to('cpu')
if m.cond_hint_original.shape[-2] != shape[-2] * 8 or m.cond_hint_original.shape[-1] != shape[-1] * 8 else None
for m in cnets]
return cnets, cnet_imgs
def T2Is_and_T2I_imgs(positive, negative, shape):
# T2I
T2Is = [c['control'] for (_, c) in positive + negative if 'control' in c]
# unroll recursion
T2Is = [x for m in T2Is for x in recursion_to_list(m, "previous_controlnet")]
# filter down to only T2I
T2Is = [x for x in T2Is if isinstance(x, comfy.controlnet.T2IAdapter)]
T2I_imgs = [
torch.nn.functional.interpolate(m.cond_hint_original, (shape[-2] * 8, shape[-1] * 8), mode='nearest-exact').to('cpu')
if m.cond_hint_original.shape[-2] != shape[-2] * 8 or m.cond_hint_original.shape[-1] != shape[-1] * 8 or (m.channels_in == 1 and m.cond_hint_original.shape[1] != 1) else None
for m in T2Is
]
T2I_imgs = [
torch.mean(img, 1, keepdim=True) if img is not None and m.channels_in == 1 and m.cond_hint_original.shape[1] else img
for m, img in zip(T2Is, T2I_imgs)
]
return T2Is, T2I_imgs
def spatial_conds_posneg(positive, negative, shape, device): #cond area and mask
spatial_conds_pos = [
(c[1]['area'] if 'area' in c[1] else None,
comfy.sample.prepare_mask(c[1]['mask'], shape, device) if 'mask' in c[1] else None)
for c in positive
]
spatial_conds_neg = [
(c[1]['area'] if 'area' in c[1] else None,
comfy.sample.prepare_mask(c[1]['mask'], shape, device) if 'mask' in c[1] else None)
for c in negative
]
return spatial_conds_pos, spatial_conds_neg
def gligen_posneg(positive, negative):
#gligen
gligen_pos = [
c[1]['gligen'] if 'gligen' in c[1] else None
for c in positive
]
gligen_neg = [
c[1]['gligen'] if 'gligen' in c[1] else None
for c in negative
]
return gligen_pos, gligen_neg
def cascade_tiles(x, input_x, tile_h, tile_w, tile_h_len, tile_w_len):
h_cascade = input_x.shape[-2]
w_cascade = input_x.shape[-1]
h_samples = x.shape[-2]
w_samples = x.shape[-1]
tile_h_cascade = (h_cascade * tile_h) // h_samples
tile_w_cascade = (w_cascade * tile_w) // w_samples
tile_h_len_cascade = (h_cascade * tile_h_len) // h_samples
tile_w_len_cascade = (w_cascade * tile_w_len) // w_samples
return tile_h_cascade, tile_w_cascade, tile_h_len_cascade, tile_w_len_cascade
def sample_common(model, x, noise, noise_mask, noise_seed, tile_width, tile_height, tiling_strategy, cfg, positive, negative,
preview=False, sampler=None, sigmas=None,
clip_name=None, strength=1.0, noise_augment=1.0, image_cv=None, max_tile_batch_size=3,
guide=None, guide_type='residual', guide_weight=1.0, guide_weights=None,
):
device = comfy.model_management.get_torch_device()
steps = len(sigmas)-1
conds0 = \
{"positive": comfy.sampler_helpers.convert_cond(positive),
"negative": comfy.sampler_helpers.convert_cond(negative)}
conds = {}
for k in conds0:
conds[k] = list(map(lambda a: a.copy(), conds0[k]))
modelPatches, inference_memory = comfy.sampler_helpers.get_additional_models(conds, model.model_dtype())
comfy.model_management.load_models_gpu([model] + modelPatches, model.memory_required(noise.shape) + inference_memory)
if model.model.model_config.unet_config['stable_cascade_stage'] == 'up':
compression = 1
guide_weight = 1.0 if guide_weight is None else guide_weight
guide_type = 'residual' if guide_type is None else guide_type
guide = guide['samples'] if guide is not None else None
guide_weights = initialize_or_scale(guide_weights, guide_weight, 10000)
patch = model.model_options.get("transformer_options", {}).get("patches_replace", {}).get("ultracascade", {}).get("main") #CHANGED HERE
if patch is not None:
patch.update(x_lr=guide, guide_weights=guide_weights, guide_type=guide_type)
else:
model = model.clone()
model.model.diffusion_model.set_sigmas_prev(sigmas_prev=sigmas[:1])
model.model.diffusion_model.set_guide_weights(guide_weights=guide_weights)
model.model.diffusion_model.set_guide_type(guide_type=guide_type)
elif model.model.model_config.unet_config['stable_cascade_stage'] == 'c':
compression = 1
elif model.model.model_config.unet_config['stable_cascade_stage'] == 'b':
compression = 4
c_pos, c_neg = [], []
for t in positive:
d_pos = t[1].copy()
d_neg = t[1].copy()
d_pos['stable_cascade_prior'] = guide['samples']
pooled_output = d_neg.get("pooled_output", None)
if pooled_output is not None:
d_neg["pooled_output"] = torch.zeros_like(pooled_output)
c_pos.append([t[0], d_pos])
c_neg.append([torch.zeros_like(t[0]), d_neg])
positive = c_pos
negative = c_neg
effnet_samples = positive[0][1]['stable_cascade_prior'].clone()
effnet_interpolated = nn.functional.interpolate(effnet_samples.clone().to(torch.float16).to(device), size=torch.Size((x.shape[-2] // 2, x.shape[-1] // 2,)), mode='bilinear', align_corners=True)
effnet_full_map = model.model.diffusion_model.effnet_mapper(effnet_interpolated)
else:
compression = 8 #sd1.5, sdxl, sd3, flux, etc
if image_cv is not None: #CLIP VISION LOAD
clip_path = folder_paths.get_full_path("clip_vision", clip_name)
clip_vision = comfy.clip_vision.load(clip_path)
cnets, cnet_imgs = cnets_and_cnet_imgs (positive, negative, x.shape)
T2Is, T2I_imgs = T2Is_and_T2I_imgs (positive, negative, x.shape)
spatial_conds_pos, spatial_conds_neg = spatial_conds_posneg(positive, negative, x.shape, device)
gligen_pos, gligen_neg = gligen_posneg (positive, negative)
tile_width = min(x.shape[-1] * compression, tile_width)
tile_height = min(x.shape[2] * compression, tile_height)
if tiling_strategy != 'padded':
if noise_mask is not None:
x += sigmas[0] * noise_mask * model.model.process_latent_out(noise)
else:
x += sigmas[0] * model.model.process_latent_out(noise)
if tiling_strategy == 'random' or tiling_strategy == 'random strict':
tiles = tiling.get_tiles_and_masks_rgrid(steps, x.shape, tile_height, tile_width, torch.manual_seed(noise_seed), compression=compression)
elif tiling_strategy == 'padded':
tiles = tiling.get_tiles_and_masks_padded(steps, x.shape, tile_height, tile_width, compression=compression)
else:
tiles = tiling.get_tiles_and_masks_simple(steps, x.shape, tile_height, tile_width, compression=compression)
total_steps = sum([num_steps for img_pass in tiles for steps_list in img_pass for _,_,_,_,num_steps,_ in steps_list])
current_step = [0]
with tqdm(total=total_steps) as pbar_tqdm:
pbar = comfy.utils.ProgressBar(total_steps)
def callback(step, x0, x, total_steps, step_inc=1):
current_step[0] += step_inc
preview_bytes = None
if preview == True:
previewer = latent_preview.get_previewer(device, model.model.latent_format)
preview_bytes = previewer.decode_latent_to_preview_image("JPEG", x0)
pbar.update_absolute(current_step[0], preview=preview_bytes)
pbar_tqdm.update(step_inc)
if tiling_strategy == "random strict":
x_next = x.clone()
for img_pass in tiles: # img_pass is a set of non-intersecting tiles
effnet_slices, effnet_map_slices, tiled_noise_list, tiled_latent_list, tiled_mask_list, tile_h_list, tile_w_list, tile_h_len_list, tile_w_len_list = [],[],[],[],[],[],[],[],[]
for i in range(len(img_pass)):
for iteration, (tile_h, tile_h_len, tile_w, tile_w_len, tile_steps, tile_mask) in enumerate(img_pass[i]):
tiled_mask = None
if noise_mask is not None:
tiled_mask = tiling.get_slice(noise_mask, tile_h, tile_h_len, tile_w, tile_w_len).to(device)
if tile_mask is not None:
if tiled_mask is not None:
tiled_mask *= tile_mask.to(device)
else:
tiled_mask = tile_mask.to(device)
if tiling_strategy == 'padded' or tiling_strategy == 'random strict':
tile_h, tile_h_len, tile_w, tile_w_len, tiled_mask = tiling.mask_at_boundary( tile_h, tile_h_len, tile_w, tile_w_len,
tile_height, tile_width, x.shape[-2], x.shape[-1],
tiled_mask, device, compression=compression)
if tiled_mask is not None and tiled_mask.sum().cpu() == 0.0:
continue
tiled_latent = tiling.get_slice(x, tile_h, tile_h_len, tile_w, tile_w_len).to(device)
if tiling_strategy == 'padded':
tiled_noise = tiling.get_slice(noise, tile_h, tile_h_len, tile_w, tile_w_len).to(device)
else:
if tiled_mask is None or noise_mask is None:
tiled_noise = torch.zeros_like(tiled_latent)
else:
tiled_noise = tiling.get_slice(noise, tile_h, tile_h_len, tile_w, tile_w_len).to(device) * (1 - tiled_mask)
#TODO: all other condition based stuff like area sets and GLIGEN should also happen here
#cnets
for m, img in zip(cnets, cnet_imgs):
slice_cnet(tile_h, tile_h_len, tile_w, tile_w_len, m, img)
#T2I
for m, img in zip(T2Is, T2I_imgs):
slices_T2I(tile_h, tile_h_len, tile_w, tile_w_len, m, img)
pos = copy.deepcopy(positive)
neg = copy.deepcopy(negative)
#cond areas
pos = [slice_cond(tile_h, tile_h_len, tile_w, tile_w_len, c, area) for c, area in zip(pos, spatial_conds_pos)]
pos = [c for c, ignore in pos if not ignore]
neg = [slice_cond(tile_h, tile_h_len, tile_w, tile_w_len, c, area) for c, area in zip(neg, spatial_conds_neg)]
neg = [c for c, ignore in neg if not ignore]
#gligen
for cond, gligen in zip(pos, gligen_pos):
slice_gligen(tile_h, tile_h_len, tile_w, tile_w_len, cond, gligen)
for cond, gligen in zip(neg, gligen_neg):
slice_gligen(tile_h, tile_h_len, tile_w, tile_w_len, cond, gligen)
start_step = i * tile_steps
last_step = i * tile_steps + tile_steps
if last_step is not None and last_step < (len(sigmas) - 1):
sigmas = sigmas[:last_step + 1]
if start_step is not None:
if start_step < (len(sigmas) - 1):
sigmas = sigmas[start_step:]
else:
if tiled_latent is not None:
return tiled_latent
else:
return torch.zeros_like(noise)
# SLICE, DICE, AND DENOISE
if image_cv is not None: #slice and dice ClipVision for tiling
image_cv = image_cv. permute(0,3,1,2)
tile_h_cascade, tile_w_cascade, tile_h_len_cascade, tile_w_len_cascade = cascade_tiles(x, image_cv, tile_h, tile_w, tile_h_len, tile_w_len)
image_slice = copy.deepcopy(image_cv)
image_slice = tiling.get_slice(image_slice, tile_h_cascade, tile_h_len_cascade, tile_w_cascade, tile_w_len_cascade).to(device)
image_slice = image_slice.permute(0,2,3,1)
image_cv = image_cv. permute(0,2,3,1)
cv_out_slice = clip_vision.encode_image(image_slice)
pos = cv_cond(cv_out_slice, pos, strength, noise_augment)
if model.model.model_config.unet_config['stable_cascade_stage'] == 'up': #slice and dice stage UP guide
tile_h_cascade, tile_w_cascade, tile_h_len_cascade, tile_w_len_cascade = cascade_tiles(x, guide, tile_h, tile_w, tile_h_len, tile_w_len)
guide_slice = copy.deepcopy(guide)
guide_slice = tiling.get_slice(guide_slice.clone(), tile_h_cascade, tile_h_len_cascade, tile_w_cascade, tile_w_len_cascade).to(device)
model.model.diffusion_model.set_x_lr(x_lr=guide_slice)
tile_result = comfy.sample.sample_custom(model, tiled_noise, cfg, sampler, sigmas, pos, neg, tiled_latent, noise_mask=tiled_mask, callback=callback, disable_pbar=True, seed=noise_seed)
elif model.model.model_config.unet_config['stable_cascade_stage'] == 'b': #slice and dice stage B conditioning
tile_h_cascade, tile_w_cascade, tile_h_len_cascade, tile_w_len_cascade = cascade_tiles(x, effnet_samples.clone(), tile_h, tile_w, tile_h_len, tile_w_len)
effnet_slice = tiling.get_slice(effnet_samples.clone(), tile_h_cascade, tile_h_len_cascade, tile_w_cascade, tile_w_len_cascade).to(device)
effnet_slices.append(effnet_slice)
tile_h_cascade, tile_w_cascade, tile_h_len_cascade, tile_w_len_cascade = cascade_tiles(x, effnet_full_map.clone(), tile_h, tile_w, tile_h_len, tile_w_len)
effnet_map_slice = tiling.get_slice(effnet_full_map.clone(), tile_h_cascade, tile_h_len_cascade, tile_w_cascade, tile_w_len_cascade).to(device)
effnet_map_slices.append(effnet_map_slice)
else: # not stage UP or stage B, default
tile_result = comfy.sample.sample_custom(model, tiled_noise, cfg, sampler, sigmas, pos, neg, tiled_latent, noise_mask=tiled_mask, callback=callback, disable_pbar=True, seed=noise_seed)
if model.model.model_config.unet_config['stable_cascade_stage'] != 'b':
tile_result = tile_result.cpu()
if tiled_mask is not None:
tiled_mask = tiled_mask.cpu()
if tiling_strategy == "random strict":
tiling.set_slice(x_next, tile_result, tile_h, tile_h_len, tile_w, tile_w_len, tiled_mask)
else:
tiling.set_slice(x, tile_result, tile_h, tile_h_len, tile_w, tile_w_len, tiled_mask)
tiled_noise_list .append(tiled_noise)
tiled_latent_list.append(tiled_latent)
tiled_mask_list .append(tiled_mask)
tile_h_list .append(tile_h)
tile_w_list .append(tile_w)
tile_h_len_list .append(tile_h_len)
tile_w_len_list .append(tile_w_len)
#END OF NON-INTERSECTING SET OF TILES
if tiling_strategy == "random strict": # IS THIS ONE LEVEL OVER??
x = x_next.clone()
if model.model.model_config.unet_config['stable_cascade_stage'] == 'b':
for start_idx in range(0, len(tiled_latent_list), max_tile_batch_size):
end_idx = start_idx + max_tile_batch_size
#print("Tiled batch size: ", min(max_tile_batch_size, len(tiled_latent_list))) #end_idx - start_idx)
tiled_noise_batch = torch.cat(tiled_noise_list [start_idx:end_idx])
tiled_latent_batch = torch.cat(tiled_latent_list[start_idx:end_idx])
tiled_mask_batch = torch.cat(tiled_mask_list [start_idx:end_idx])
print("Tiled batch size: ", tiled_latent_batch.shape[0])
pos[0][1]['stable_cascade_prior'] = torch.cat(effnet_slices[start_idx:end_idx])
neg[0][1]['stable_cascade_prior'] = torch.cat(effnet_slices[start_idx:end_idx])
tile_result = comfy.sample.sample_custom(model, tiled_noise_batch, cfg, sampler, sigmas, pos, neg, tiled_latent_batch, noise_mask=tiled_mask_batch, callback=partial(callback, step_inc=tiled_latent_batch.shape[0]), disable_pbar=True, seed=noise_seed)
for i in range(tile_result.shape[0]):
idx = start_idx + i
single_tile = tile_result[i].unsqueeze(dim=0)
single_mask = tiled_mask_batch[i].unsqueeze(dim=0)
tiling.set_slice(x, single_tile, tile_h_list[idx], tile_h_len_list[idx], tile_w_list[idx], tile_w_len_list[idx], single_mask.cpu())
x = x.to('cpu')
comfy.sampler_helpers.cleanup_additional_models(modelPatches)
return x.cpu()
class UltraSharkSampler_Tiled: #this is for use with https://github.com/ClownsharkBatwing/UltraCascade
@classmethod
def INPUT_TYPES(s):
return {"required":
{
"add_noise": ("BOOLEAN", {"default": True}),
"noise_is_latent": ("BOOLEAN", {"default": False}),
"noise_type": (NOISE_GENERATOR_NAMES, ),
"alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}),
"k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}),
"noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
"cfg": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}),
"guide_type": (['residual', 'weighted'], ),
"guide_weight": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": 0.01}),
"tile_width": ("INT", {"default": 1024, "min": 2, "max": MAX_RESOLUTION, "step": 1}),
"tile_height": ("INT", {"default": 1024, "min": 2, "max": MAX_RESOLUTION, "step": 1}),
"tiling_strategy": (["padded", "random", "random strict", 'simple'], ),
"max_tile_batch_size": ("INT", {"default": 64, "min": 1, "max": 256, "step": 1}),
"model": ("MODEL",),
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"sampler": ("SAMPLER",),
"sigmas": ("SIGMAS",),
"latent_image": ("LATENT", ),
"clip_name": (folder_paths.get_filename_list("clip_vision"), {'default': "clip-vit-large-patch14.safetensors"}),
"strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
"noise_augment": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
},
"optional": {
"latent_noise": ("LATENT", ),
"guide": ("LATENT", ),
"guide_weights": ("SIGMAS",),
"image_cv": ("IMAGE",),
},
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "sample"
CATEGORY = "RES4LYF/legacy/samplers/ultracascade"
DESCRIPTION = "For use with UltraCascade."
DEPRECATED = True
def sample(self, model, noise_seed, add_noise, noise_is_latent, noise_type, alpha, k, tile_width, tile_height, tiling_strategy, cfg, positive, negative, latent_image, latent_noise=None, sampler=None, sigmas=None, guide=None,
clip_name=None, strength=1.0, noise_augment=1.0, image_cv=None, max_tile_batch_size=3,
guide_type='residual', guide_weight=1.0, guide_weights=None,
):
x = latent_image["samples"].clone()
torch.manual_seed(noise_seed)
if not add_noise:
noise = torch.zeros(x.size(), dtype=x.dtype, layout=x.layout, device="cpu")
elif latent_noise is None:
skip = latent_image["batch_index"] if "batch_index" in latent_image else None
noise = prepare_noise(x, noise_seed, noise_type, skip, alpha, k)
else:
noise = latent_noise["samples"]
if noise_is_latent: #add noise and latent together and normalize --> noise
noise += x.cpu()
noise.sub_(noise.mean()).div_(noise.std())
noise_mask = latent_image["noise_mask"].clone() if "noise_mask" in latent_image else None
latent_out = latent_image.copy()
latent_out['samples'] = sample_common(model, x=x, noise=noise, noise_mask=noise_mask, noise_seed=noise_seed, tile_width=tile_width, tile_height=tile_height, tiling_strategy=tiling_strategy, cfg=cfg, positive=positive, negative=negative,
preview=True, sampler=sampler, sigmas=sigmas,
clip_name=clip_name, strength=strength, noise_augment=noise_augment, image_cv=image_cv, max_tile_batch_size=max_tile_batch_size,
guide=guide, guide_type=guide_type, guide_weight=guide_weight, guide_weights=guide_weights,
)
return (latent_out,)
================================================
FILE: legacy/sigmas.py
================================================
import torch
import numpy as np
from math import *
import builtins
from scipy.interpolate import CubicSpline
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from comfy.k_diffusion.sampling import get_sigmas_polyexponential, get_sigmas_karras
import comfy.samplers
def rescale_linear(input, input_min, input_max, output_min, output_max):
output = ((input - input_min) / (input_max - input_min)) * (output_max - output_min) + output_min;
return output
class set_precision_sigmas:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", ),
"precision": (["16", "32", "64"], ),
"set_default": ("BOOLEAN", {"default": False})
},
}
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("passthrough",)
CATEGORY = "RES4LYF/precision"
FUNCTION = "main"
def main(self, precision="32", sigmas=None, set_default=False):
match precision:
case "16":
if set_default is True:
torch.set_default_dtype(torch.float16)
sigmas = sigmas.to(torch.float16)
case "32":
if set_default is True:
torch.set_default_dtype(torch.float32)
sigmas = sigmas.to(torch.float32)
case "64":
if set_default is True:
torch.set_default_dtype(torch.float64)
sigmas = sigmas.to(torch.float64)
return (sigmas, )
class SimpleInterpolator(nn.Module):
def __init__(self):
super(SimpleInterpolator, self).__init__()
self.net = nn.Sequential(
nn.Linear(1, 16),
nn.ReLU(),
nn.Linear(16, 32),
nn.ReLU(),
nn.Linear(32, 1)
)
def forward(self, x):
return self.net(x)
def train_interpolator(model, sigma_schedule, steps, epochs=5000, lr=0.01):
with torch.inference_mode(False):
model = SimpleInterpolator()
sigma_schedule = sigma_schedule.clone()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
x_train = torch.linspace(0, 1, steps=steps).unsqueeze(1)
y_train = sigma_schedule.unsqueeze(1)
# disable inference mode for training
model.train()
for epoch in range(epochs):
optimizer.zero_grad()
# fwd pass
outputs = model(x_train)
loss = criterion(outputs, y_train)
loss.backward()
optimizer.step()
return model
def interpolate_sigma_schedule_model(sigma_schedule, target_steps):
model = SimpleInterpolator()
sigma_schedule = sigma_schedule.float().detach()
# train on original sigma schedule
trained_model = train_interpolator(model, sigma_schedule, len(sigma_schedule))
# generate target steps for interpolation
x_interpolated = torch.linspace(0, 1, target_steps).unsqueeze(1)
# inference w/o gradients
trained_model.eval()
with torch.no_grad():
interpolated_sigma = trained_model(x_interpolated).squeeze()
return interpolated_sigma
class sigmas_interpolate:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_0": ("SIGMAS", {"forceInput": True}),
"sigmas_1": ("SIGMAS", {"forceInput": True}),
"mode": (["linear", "nearest", "polynomial", "exponential", "power", "model"],),
"order": ("INT", {"default": 8, "min": 1,"max": 64,"step": 1}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS","SIGMAS",)
RETURN_NAMES = ("sigmas_0", "sigmas_1")
CATEGORY = "RES4LYF/sigmas"
def interpolate_sigma_schedule_poly(self, sigma_schedule, target_steps):
order = self.order
sigma_schedule_np = sigma_schedule.cpu().numpy()
# orig steps (assuming even spacing)
original_steps = np.linspace(0, 1, len(sigma_schedule_np))
# fit polynomial of the given order
coefficients = np.polyfit(original_steps, sigma_schedule_np, deg=order)
# generate new steps where we want to interpolate the data
target_steps_np = np.linspace(0, 1, target_steps)
# eval polynomial at new steps
interpolated_sigma_np = np.polyval(coefficients, target_steps_np)
interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype)
return interpolated_sigma
def interpolate_sigma_schedule_constrained(self, sigma_schedule, target_steps):
sigma_schedule_np = sigma_schedule.cpu().numpy()
# orig steps
original_steps = np.linspace(0, 1, len(sigma_schedule_np))
# target steps for interpolation
target_steps_np = np.linspace(0, 1, target_steps)
# fit cubic spline with fixed start and end values
cs = CubicSpline(original_steps, sigma_schedule_np, bc_type=((1, 0.0), (1, 0.0)))
# eval spline at the target steps
interpolated_sigma_np = cs(target_steps_np)
interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype)
return interpolated_sigma
def interpolate_sigma_schedule_exp(self, sigma_schedule, target_steps):
# transform to log space
log_sigma_schedule = torch.log(sigma_schedule)
# define the original and target step ranges
original_steps = torch.linspace(0, 1, steps=len(sigma_schedule))
target_steps = torch.linspace(0, 1, steps=target_steps)
# interpolate in log space
interpolated_log_sigma = F.interpolate(
log_sigma_schedule.unsqueeze(0).unsqueeze(0), # Add fake batch and channel dimensions
size=target_steps.shape[0],
mode='linear',
align_corners=True
).squeeze()
# transform back to exponential space
interpolated_sigma_schedule = torch.exp(interpolated_log_sigma)
return interpolated_sigma_schedule
def interpolate_sigma_schedule_power(self, sigma_schedule, target_steps):
sigma_schedule_np = sigma_schedule.cpu().numpy()
original_steps = np.linspace(1, len(sigma_schedule_np), len(sigma_schedule_np))
# power regression using a log-log transformation
log_x = np.log(original_steps)
log_y = np.log(sigma_schedule_np)
# linear regression on log-log data
coefficients = np.polyfit(log_x, log_y, deg=1) # degree 1 for linear fit in log-log space
a = np.exp(coefficients[1]) # a = "b" = intercept (exp because of the log transform)
b = coefficients[0] # b = "m" = slope
target_steps_np = np.linspace(1, len(sigma_schedule_np), target_steps)
# power law prediction: y = a * x^b
interpolated_sigma_np = a * (target_steps_np ** b)
interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype)
return interpolated_sigma
def interpolate_sigma_schedule_linear(self, sigma_schedule, target_steps):
return F.interpolate(sigma_schedule.unsqueeze(0).unsqueeze(0), target_steps, mode='linear').squeeze(0).squeeze(0)
def interpolate_sigma_schedule_nearest(self, sigma_schedule, target_steps):
return F.interpolate(sigma_schedule.unsqueeze(0).unsqueeze(0), target_steps, mode='nearest').squeeze(0).squeeze(0)
def interpolate_nearest_neighbor(self, sigma_schedule, target_steps):
original_steps = torch.linspace(0, 1, steps=len(sigma_schedule))
target_steps = torch.linspace(0, 1, steps=target_steps)
# interpolate original -> target steps using nearest neighbor
indices = torch.searchsorted(original_steps, target_steps)
indices = torch.clamp(indices, 0, len(sigma_schedule) - 1) # clamp indices to valid range
# set nearest neighbor via indices
interpolated_sigma = sigma_schedule[indices]
return interpolated_sigma
def main(self, sigmas_0, sigmas_1, mode, order):
self.order = order
if mode == "linear":
interpolate = self.interpolate_sigma_schedule_linear
if mode == "nearest":
interpolate = self.interpolate_nearest_neighbor
elif mode == "polynomial":
interpolate = self.interpolate_sigma_schedule_poly
elif mode == "exponential":
interpolate = self.interpolate_sigma_schedule_exp
elif mode == "power":
interpolate = self.interpolate_sigma_schedule_power
elif mode == "model":
with torch.inference_mode(False):
interpolate = interpolate_sigma_schedule_model
sigmas_0 = interpolate(sigmas_0, len(sigmas_1))
return (sigmas_0, sigmas_1,)
class sigmas_noise_inversion:
# flip sigmas for unsampling, and pad both fwd/rev directions with null bytes to disable noise scaling, etc from the model.
# will cause model to return epsilon prediction instead of calculated denoised latent image.
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS","SIGMAS",)
RETURN_NAMES = ("sigmas_fwd","sigmas_rev",)
CATEGORY = "RES4LYF/sigmas"
DESCRIPTION = "For use with unsampling. Connect sigmas_fwd to the unsampling (first) node, and sigmas_rev to the sampling (second) node."
def main(self, sigmas):
sigmas = sigmas.clone().to(torch.float64)
null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype)
sigmas_fwd = torch.flip(sigmas, dims=[0])
sigmas_fwd = torch.cat([sigmas_fwd, null])
sigmas_rev = torch.cat([null, sigmas])
sigmas_rev = torch.cat([sigmas_rev, null])
return (sigmas_fwd, sigmas_rev,)
def compute_sigma_next_variance_floor(sigma):
return (-1 + torch.sqrt(1 + 4 * sigma)) / 2
class sigmas_variance_floor:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
DESCRIPTION = ("Process a sigma schedule so that any steps that are too large for variance-locked SDE sampling are replaced with the maximum permissible value."
"Will be very difficult to approach sigma = 0 due to the nature of the math, as steps become very small much below approximately sigma = 0.15 to 0.2.")
def main(self, sigmas):
dtype = sigmas.dtype
sigmas = sigmas.clone().to(torch.float64)
for i in range(len(sigmas) - 1):
sigma_next = (-1 + torch.sqrt(1 + 4 * sigmas[i])) / 2
if sigmas[i+1] < sigma_next and sigmas[i+1] > 0.0:
print("swapped i+1 with sigma_next+0.001: ", sigmas[i+1], sigma_next + 0.001)
sigmas[i+1] = sigma_next + 0.001
return (sigmas.to(dtype),)
class sigmas_from_text:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"text": ("STRING", {"default": "", "multiline": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
CATEGORY = "RES4LYF/sigmas"
def main(self, text):
text_list = [float(val) for val in text.replace(",", " ").split()]
#text_list = [float(val.strip()) for val in text.split(",")]
sigmas = torch.tensor(text_list).to('cuda').to(torch.float64)
return (sigmas,)
class sigmas_concatenate:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_1": ("SIGMAS", {"forceInput": True}),
"sigmas_2": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas_1, sigmas_2):
return (torch.cat((sigmas_1, sigmas_2)),)
class sigmas_truncate:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"sigmas_until": ("INT", {"default": 10, "min": 0,"max": 1000,"step": 1}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, sigmas_until):
return (sigmas[:sigmas_until],)
class sigmas_start:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"sigmas_until": ("INT", {"default": 10, "min": 0,"max": 1000,"step": 1}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, sigmas_until):
return (sigmas[sigmas_until:],)
class sigmas_split:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"sigmas_start": ("INT", {"default": 0, "min": 0,"max": 1000,"step": 1}),
"sigmas_end": ("INT", {"default": 1000, "min": 0,"max": 1000,"step": 1}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, sigmas_start, sigmas_end):
return (sigmas[sigmas_start:sigmas_end],)
sigmas_stop_step = sigmas_end - sigmas_start
return (sigmas[sigmas_start:][:sigmas_stop_step],)
class sigmas_pad:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"value": ("FLOAT", {"default": 0.0, "min": -10000,"max": 10000,"step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, value):
return (torch.cat((sigmas, torch.tensor([value], dtype=sigmas.dtype))),)
class sigmas_unpad:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas):
return (sigmas[:-1],)
class sigmas_set_floor:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
"new_floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01})
}
}
RETURN_TYPES = ("SIGMAS",)
FUNCTION = "set_floor"
CATEGORY = "RES4LYF/sigmas"
def set_floor(self, sigmas, floor, new_floor):
sigmas[sigmas <= floor] = new_floor
return (sigmas,)
class sigmas_delete_below_floor:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01})
}
}
RETURN_TYPES = ("SIGMAS",)
FUNCTION = "delete_below_floor"
CATEGORY = "RES4LYF/sigmas"
def delete_below_floor(self, sigmas, floor):
return (sigmas[sigmas >= floor],)
class sigmas_delete_value:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"value": ("FLOAT", {"default": 0.0, "min": -1000,"max": 1000,"step": 0.01})
}
}
RETURN_TYPES = ("SIGMAS",)
FUNCTION = "delete_value"
CATEGORY = "RES4LYF/sigmas"
def delete_value(self, sigmas, value):
return (sigmas[sigmas != value],)
class sigmas_delete_consecutive_duplicates:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_1": ("SIGMAS", {"forceInput": True})
}
}
RETURN_TYPES = ("SIGMAS",)
FUNCTION = "delete_consecutive_duplicates"
CATEGORY = "RES4LYF/sigmas"
def delete_consecutive_duplicates(self, sigmas_1):
mask = sigmas_1[:-1] != sigmas_1[1:]
mask = torch.cat((mask, torch.tensor([True])))
return (sigmas_1[mask],)
class sigmas_cleanup:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"sigmin": ("FLOAT", {"default": 0.0291675, "min": 0,"max": 1000,"step": 0.01})
}
}
RETURN_TYPES = ("SIGMAS",)
FUNCTION = "cleanup"
CATEGORY = "RES4LYF/sigmas"
def cleanup(self, sigmas, sigmin):
sigmas_culled = sigmas[sigmas >= sigmin]
mask = sigmas_culled[:-1] != sigmas_culled[1:]
mask = torch.cat((mask, torch.tensor([True])))
filtered_sigmas = sigmas_culled[mask]
return (torch.cat((filtered_sigmas,torch.tensor([0]))),)
class sigmas_mult:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"multiplier": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01})
},
"optional": {
"sigmas2": ("SIGMAS", {"forceInput": False})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, multiplier, sigmas2=None):
if sigmas2 is not None:
return (sigmas * sigmas2 * multiplier,)
else:
return (sigmas * multiplier,)
class sigmas_modulus:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"divisor": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, divisor):
return (sigmas % divisor,)
class sigmas_quotient:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"divisor": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, divisor):
return (sigmas // divisor,)
class sigmas_add:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"addend": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, addend):
return (sigmas + addend,)
class sigmas_power:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"power": ("FLOAT", {"default": 1, "min": -100,"max": 100,"step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, power):
return (sigmas ** power,)
class sigmas_abs:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas):
return (abs(sigmas),)
class sigmas2_mult:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_1": ("SIGMAS", {"forceInput": True}),
"sigmas_2": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas_1, sigmas_2):
return (sigmas_1 * sigmas_2,)
class sigmas2_add:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_1": ("SIGMAS", {"forceInput": True}),
"sigmas_2": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas_1, sigmas_2):
return (sigmas_1 + sigmas_2,)
class sigmas_rescale:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"start": ("FLOAT", {"default": 1.0, "min": -10000,"max": 10000,"step": 0.01}),
"end": ("FLOAT", {"default": 0.0, "min": -10000,"max": 10000,"step": 0.01}),
"sigmas": ("SIGMAS", ),
},
"optional": {
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas_rescaled",)
CATEGORY = "RES4LYF/sigmas"
DESCRIPTION = ("Can be used to set denoise. Results are generally better than with the approach used by KSampler and most nodes with denoise values "
"(which slice the sigmas schedule according to step count, not the noise level). Will also flip the sigma schedule if the start and end values are reversed."
)
def main(self, start=0, end=-1, sigmas=None):
s_out_1 = ((sigmas - sigmas.min()) * (start - end)) / (sigmas.max() - sigmas.min()) + end
return (s_out_1,)
class sigmas_math1:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"start": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}),
"stop": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}),
"trim": ("INT", {"default": 0, "min": -10000,"max": 0,"step": 1}),
"x": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"y": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"z": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"f1": ("STRING", {"default": "s", "multiline": True}),
"rescale" : ("BOOLEAN", {"default": False}),
"max1": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}),
"min1": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
},
"optional": {
"a": ("SIGMAS", {"forceInput": False}),
"b": ("SIGMAS", {"forceInput": False}),
"c": ("SIGMAS", {"forceInput": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, start=0, stop=0, trim=0, a=None, b=None, c=None, x=1.0, y=1.0, z=1.0, f1="s", rescale=False, min1=1.0, max1=1.0):
if stop == 0:
t_lens = [len(tensor) for tensor in [a, b, c] if tensor is not None]
t_len = stop = min(t_lens) if t_lens else 0
else:
stop = stop + 1
t_len = stop - start
stop = stop + trim
t_len = t_len + trim
t_a = t_b = t_c = None
if a is not None:
t_a = a[start:stop]
if b is not None:
t_b = b[start:stop]
if c is not None:
t_c = c[start:stop]
t_s = torch.arange(0.0, t_len)
t_x = torch.full((t_len,), x)
t_y = torch.full((t_len,), y)
t_z = torch.full((t_len,), z)
eval_namespace = {"__builtins__": None, "round": builtins.round, "np": np, "a": t_a, "b": t_b, "c": t_c, "x": t_x, "y": t_y, "z": t_z, "s": t_s, "torch": torch}
eval_namespace.update(np.__dict__)
s_out_1 = eval(f1, eval_namespace)
if rescale == True:
s_out_1 = ((s_out_1 - min(s_out_1)) * (max1 - min1)) / (max(s_out_1) - min(s_out_1)) + min1
return (s_out_1,)
class sigmas_math3:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"start": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}),
"stop": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}),
"trim": ("INT", {"default": 0, "min": -10000,"max": 0,"step": 1}),
},
"optional": {
"a": ("SIGMAS", {"forceInput": False}),
"b": ("SIGMAS", {"forceInput": False}),
"c": ("SIGMAS", {"forceInput": False}),
"x": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"y": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"z": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"f1": ("STRING", {"default": "s", "multiline": True}),
"rescale1" : ("BOOLEAN", {"default": False}),
"max1": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}),
"min1": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
"f2": ("STRING", {"default": "s", "multiline": True}),
"rescale2" : ("BOOLEAN", {"default": False}),
"max2": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}),
"min2": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
"f3": ("STRING", {"default": "s", "multiline": True}),
"rescale3" : ("BOOLEAN", {"default": False}),
"max3": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}),
"min3": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS","SIGMAS","SIGMAS")
CATEGORY = "RES4LYF/sigmas"
def main(self, start=0, stop=0, trim=0, a=None, b=None, c=None, x=1.0, y=1.0, z=1.0, f1="s", f2="s", f3="s", rescale1=False, rescale2=False, rescale3=False, min1=1.0, max1=1.0, min2=1.0, max2=1.0, min3=1.0, max3=1.0):
if stop == 0:
t_lens = [len(tensor) for tensor in [a, b, c] if tensor is not None]
t_len = stop = min(t_lens) if t_lens else 0
else:
stop = stop + 1
t_len = stop - start
stop = stop + trim
t_len = t_len + trim
t_a = t_b = t_c = None
if a is not None:
t_a = a[start:stop]
if b is not None:
t_b = b[start:stop]
if c is not None:
t_c = c[start:stop]
t_s = torch.arange(0.0, t_len)
t_x = torch.full((t_len,), x)
t_y = torch.full((t_len,), y)
t_z = torch.full((t_len,), z)
eval_namespace = {"__builtins__": None, "np": np, "a": t_a, "b": t_b, "c": t_c, "x": t_x, "y": t_y, "z": t_z, "s": t_s, "torch": torch}
eval_namespace.update(np.__dict__)
s_out_1 = eval(f1, eval_namespace)
s_out_2 = eval(f2, eval_namespace)
s_out_3 = eval(f3, eval_namespace)
if rescale1 == True:
s_out_1 = ((s_out_1 - min(s_out_1)) * (max1 - min1)) / (max(s_out_1) - min(s_out_1)) + min1
if rescale2 == True:
s_out_2 = ((s_out_2 - min(s_out_2)) * (max2 - min2)) / (max(s_out_2) - min(s_out_2)) + min2
if rescale3 == True:
s_out_3 = ((s_out_3 - min(s_out_3)) * (max3 - min3)) / (max(s_out_3) - min(s_out_3)) + min3
return s_out_1, s_out_2, s_out_3
class sigmas_iteration_karras:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps_up": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}),
"steps_down": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}),
"rho_up": ("FLOAT", {"default": 3, "min": -10000,"max": 10000,"step": 0.01}),
"rho_down": ("FLOAT", {"default": 4, "min": -10000,"max": 10000,"step": 0.01}),
"s_min_start": ("FLOAT", {"default":0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
"s_max": ("FLOAT", {"default": 2, "min": -10000,"max": 10000,"step": 0.01}),
"s_min_end": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
},
"optional": {
"momentums": ("SIGMAS", {"forceInput": False}),
"sigmas": ("SIGMAS", {"forceInput": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS","SIGMAS")
RETURN_NAMES = ("momentums","sigmas")
CATEGORY = "RES4LYF/schedulers"
def main(self, steps_up, steps_down, rho_up, rho_down, s_min_start, s_max, s_min_end, sigmas=None, momentums=None):
s_up = get_sigmas_karras(steps_up, s_min_start, s_max, rho_up)
s_down = get_sigmas_karras(steps_down, s_min_end, s_max, rho_down)
s_up = s_up[:-1]
s_down = s_down[:-1]
s_up = torch.flip(s_up, dims=[0])
sigmas_new = torch.cat((s_up, s_down), dim=0)
momentums_new = torch.cat((s_up, -1*s_down), dim=0)
if sigmas is not None:
sigmas = torch.cat([sigmas, sigmas_new])
else:
sigmas = sigmas_new
if momentums is not None:
momentums = torch.cat([momentums, momentums_new])
else:
momentums = momentums_new
return (momentums,sigmas)
class sigmas_iteration_polyexp:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps_up": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}),
"steps_down": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}),
"rho_up": ("FLOAT", {"default": 0.6, "min": -10000,"max": 10000,"step": 0.01}),
"rho_down": ("FLOAT", {"default": 0.8, "min": -10000,"max": 10000,"step": 0.01}),
"s_min_start": ("FLOAT", {"default":0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
"s_max": ("FLOAT", {"default": 2, "min": -10000,"max": 10000,"step": 0.01}),
"s_min_end": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
},
"optional": {
"momentums": ("SIGMAS", {"forceInput": False}),
"sigmas": ("SIGMAS", {"forceInput": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS","SIGMAS")
RETURN_NAMES = ("momentums","sigmas")
CATEGORY = "RES4LYF/schedulers"
def main(self, steps_up, steps_down, rho_up, rho_down, s_min_start, s_max, s_min_end, sigmas=None, momentums=None):
s_up = get_sigmas_polyexponential(steps_up, s_min_start, s_max, rho_up)
s_down = get_sigmas_polyexponential(steps_down, s_min_end, s_max, rho_down)
s_up = s_up[:-1]
s_down = s_down[:-1]
s_up = torch.flip(s_up, dims=[0])
sigmas_new = torch.cat((s_up, s_down), dim=0)
momentums_new = torch.cat((s_up, -1*s_down), dim=0)
if sigmas is not None:
sigmas = torch.cat([sigmas, sigmas_new])
else:
sigmas = sigmas_new
if momentums is not None:
momentums = torch.cat([momentums, momentums_new])
else:
momentums = momentums_new
return (momentums,sigmas)
class tan_scheduler:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 20, "min": 0,"max": 100000,"step": 1}),
"offset": ("FLOAT", {"default": 20, "min": 0,"max": 100000,"step": 0.1}),
"slope": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}),
"start": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}),
"end": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}),
"sgm" : ("BOOLEAN", {"default": False}),
"pad" : ("BOOLEAN", {"default": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/schedulers"
def main(self, steps, slope, offset, start, end, sgm, pad):
smax = ((2/pi)*atan(-slope*(0-offset))+1)/2
smin = ((2/pi)*atan(-slope*((steps-1)-offset))+1)/2
srange = smax-smin
sscale = start - end
if sgm:
steps+=1
sigmas = [ ( (((2/pi)*atan(-slope*(x-offset))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)]
if sgm:
sigmas = sigmas[:-1]
if pad:
sigmas = torch.tensor(sigmas+[0])
else:
sigmas = torch.tensor(sigmas)
return (sigmas,)
class tan_scheduler_2stage:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}),
"midpoint": ("INT", {"default": 20, "min": 0,"max": 100000,"step": 1}),
"pivot_1": ("INT", {"default": 10, "min": 0,"max": 100000,"step": 1}),
"pivot_2": ("INT", {"default": 30, "min": 0,"max": 100000,"step": 1}),
"slope_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.1}),
"slope_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.1}),
"start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.1}),
"middle": ("FLOAT", {"default": 0.5, "min": -100000,"max": 100000,"step": 0.1}),
"end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.1}),
"pad" : ("BOOLEAN", {"default": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
CATEGORY = "RES4LYF/schedulers"
def get_tan_sigmas(self, steps, slope, pivot, start, end):
smax = ((2/pi)*atan(-slope*(0-pivot))+1)/2
smin = ((2/pi)*atan(-slope*((steps-1)-pivot))+1)/2
srange = smax-smin
sscale = start - end
sigmas = [ ( (((2/pi)*atan(-slope*(x-pivot))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)]
return sigmas
def main(self, steps, midpoint, start, middle, end, pivot_1, pivot_2, slope_1, slope_2, pad):
steps += 2
stage_2_len = steps - midpoint
stage_1_len = steps - stage_2_len
tan_sigmas_1 = self.get_tan_sigmas(stage_1_len, slope_1, pivot_1, start, middle)
tan_sigmas_2 = self.get_tan_sigmas(stage_2_len, slope_2, pivot_2 - stage_1_len, middle, end)
tan_sigmas_1 = tan_sigmas_1[:-1]
if pad:
tan_sigmas_2 = tan_sigmas_2+[0]
tan_sigmas = torch.tensor(tan_sigmas_1 + tan_sigmas_2)
return (tan_sigmas,)
class tan_scheduler_2stage_simple:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}),
"pivot_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}),
"pivot_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}),
"slope_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}),
"slope_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}),
"start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}),
"middle": ("FLOAT", {"default": 0.5, "min": -100000,"max": 100000,"step": 0.01}),
"end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.01}),
"pad" : ("BOOLEAN", {"default": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
CATEGORY = "RES4LYF/schedulers"
def get_tan_sigmas(self, steps, slope, pivot, start, end):
smax = ((2/pi)*atan(-slope*(0-pivot))+1)/2
smin = ((2/pi)*atan(-slope*((steps-1)-pivot))+1)/2
srange = smax-smin
sscale = start - end
sigmas = [ ( (((2/pi)*atan(-slope*(x-pivot))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)]
return sigmas
def main(self, steps, start, middle, end, pivot_1, pivot_2, slope_1, slope_2, pad):
steps += 2
midpoint = int( (steps*pivot_1 + steps*pivot_2) / 2 )
pivot_1 = int(steps * pivot_1)
pivot_2 = int(steps * pivot_2)
slope_1 = slope_1 / (steps/40)
slope_2 = slope_2 / (steps/40)
stage_2_len = steps - midpoint
stage_1_len = steps - stage_2_len
tan_sigmas_1 = self.get_tan_sigmas(stage_1_len, slope_1, pivot_1, start, middle)
tan_sigmas_2 = self.get_tan_sigmas(stage_2_len, slope_2, pivot_2 - stage_1_len, middle, end)
tan_sigmas_1 = tan_sigmas_1[:-1]
if pad:
tan_sigmas_2 = tan_sigmas_2+[0]
tan_sigmas = torch.tensor(tan_sigmas_1 + tan_sigmas_2)
return (tan_sigmas,)
class linear_quadratic_advanced:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model": ("MODEL",),
"steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}),
"denoise": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}),
"inflection_percent": ("FLOAT", {"default": 0.5, "min": 0,"max": 1,"step": 0.01}),
},
# "optional": {
# }
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
CATEGORY = "RES4LYF/schedulers"
def main(self, steps, denoise, inflection_percent, model=None):
sigmas = get_sigmas(model, "linear_quadratic", steps, denoise, inflection_percent)
return (sigmas, )
class constant_scheduler:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}),
"value_start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}),
"value_end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.01}),
"cutoff_percent": ("FLOAT", {"default": 1.0, "min": 0,"max": 1,"step": 0.01}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
CATEGORY = "RES4LYF/schedulers"
def main(self, steps, value_start, value_end, cutoff_percent):
sigmas = torch.ones(steps + 1) * value_start
cutoff_step = int(round(steps * cutoff_percent)) + 1
sigmas = torch.concat((sigmas[:cutoff_step], torch.ones(steps + 1 - cutoff_step) * value_end), dim=0)
return (sigmas,)
def get_sigmas_simple_exponential(model, steps):
s = model.model_sampling
sigs = []
ss = len(s.sigmas) / steps
for x in range(steps):
sigs += [float(s.sigmas[-(1 + int(x * ss))])]
sigs += [0.0]
sigs = torch.FloatTensor(sigs)
exp = torch.exp(torch.log(torch.linspace(1, 0, steps + 1)))
return sigs * exp
extra_schedulers = {
"simple_exponential": get_sigmas_simple_exponential
}
def get_sigmas(model, scheduler, steps, denoise, lq_inflection_percent=0.5): #adapted from comfyui
total_steps = steps
if denoise < 1.0:
if denoise <= 0.0:
return (torch.FloatTensor([]),)
total_steps = int(steps/denoise)
#model_sampling = model.get_model_object("model_sampling")
if hasattr(model, "model"):
model_sampling = model.model.model_sampling
elif hasattr(model, "inner_model"):
model_sampling = model.inner_model.inner_model.model_sampling
if scheduler == "beta57":
sigmas = comfy.samplers.beta_scheduler(model_sampling, total_steps, alpha=0.5, beta=0.7)
elif scheduler == "linear_quadratic":
linear_steps = int(total_steps * lq_inflection_percent)
sigmas = comfy.samplers.linear_quadratic_schedule(model_sampling, total_steps, threshold_noise=0.025, linear_steps=linear_steps)
else:
sigmas = comfy.samplers.calculate_sigmas(model_sampling, scheduler, total_steps).cpu()
sigmas = sigmas[-(steps + 1):]
return sigmas
================================================
FILE: legacy/tiling.py
================================================
import torch
import itertools
import numpy as np
# tiled sampler code adapted from https://github.com/BlenderNeko/ComfyUI_TiledKSampler
# for use with https://github.com/ClownsharkBatwing/UltraCascade
def grouper(n, iterable):
it = iter(iterable)
while True:
chunk = list(itertools.islice(it, n))
if not chunk:
return
yield chunk
def create_batches(n, iterable):
groups = itertools.groupby(iterable, key= lambda x: (x[1], x[3]))
for _, x in groups:
for y in grouper(n, x):
yield y
def get_slice(tensor, h, h_len, w, w_len):
t = tensor.narrow(-2, h, h_len)
t = t.narrow(-1, w, w_len)
return t
def set_slice(tensor1,tensor2, h, h_len, w, w_len, mask=None):
if mask is not None:
tensor1[:,:,h:h+h_len,w:w+w_len] = tensor1[:,:,h:h+h_len,w:w+w_len] * (1 - mask) + tensor2 * mask
else:
tensor1[:,:,h:h+h_len,w:w+w_len] = tensor2
def get_tiles_and_masks_simple(steps, latent_shape, tile_height, tile_width, compression=4):
latent_size_h = latent_shape[-2]
latent_size_w = latent_shape[-1]
tile_size_h = int(tile_height // compression) #CHANGED FROM 8
tile_size_w = int(tile_width // compression) #CHANGED FROM 8
h = np.arange(0,latent_size_h, tile_size_h)
w = np.arange(0,latent_size_w, tile_size_w)
def create_tile(hs, ws, i, j):
h = int(hs[i])
w = int(ws[j])
h_len = min(tile_size_h, latent_size_h - h)
w_len = min(tile_size_w, latent_size_w - w)
return (h, h_len, w, w_len, steps, None)
passes = [
[[create_tile(h, w, i, j) for i in range(len(h)) for j in range(len(w))]],
]
return passes
def get_tiles_and_masks_padded(steps, latent_shape, tile_height, tile_width, compression=4):
batch_size = latent_shape[0]
latent_size_h = latent_shape[-2]
latent_size_w = latent_shape[-1]
tile_size_h = int(tile_height // compression) #CHANGED FROM 8
tile_size_w = int(tile_width // compression) #CHANGED FROM 8
#if compression > 1:
tile_size_h = int((tile_size_h // 4) * 4) #MIGHT BE A PROBLEM WITH STAGE C?
tile_size_w = int((tile_size_w // 4) * 4)
#masks
mask_h = [0,tile_size_h // 4, tile_size_h - tile_size_h // 4, tile_size_h]
mask_w = [0,tile_size_w // 4, tile_size_w - tile_size_w // 4, tile_size_w]
masks = [[] for _ in range(3)]
for i in range(3):
for j in range(3):
mask = torch.zeros((batch_size,1,tile_size_h, tile_size_w), dtype=torch.float32, device='cpu')
mask[:,:, mask_h[i]:mask_h[i+1],
mask_w[j]:mask_w[j+1]] = 1.0
masks[i].append(mask)
def create_mask(h_ind, w_ind, h_ind_max, w_ind_max, mask_h, mask_w, h_len, w_len):
mask = masks[1][1]
if not (h_ind == 0 or h_ind == h_ind_max or w_ind == 0 or w_ind == w_ind_max):
return get_slice(mask, 0, h_len, 0, w_len)
mask = mask.clone()
if h_ind == 0 and mask_h:
mask += masks[0][1]
if h_ind == h_ind_max and mask_h:
mask += masks[2][1]
if w_ind == 0 and mask_w:
mask += masks[1][0]
if w_ind == w_ind_max and mask_w:
mask += masks[1][2]
if h_ind == 0 and w_ind == 0 and mask_h and mask_w:
mask += masks[0][0]
if h_ind == 0 and w_ind == w_ind_max and mask_h and mask_w:
mask += masks[0][2]
if h_ind == h_ind_max and w_ind == 0 and mask_h and mask_w:
mask += masks[2][0]
if h_ind == h_ind_max and w_ind == w_ind_max and mask_h and mask_w:
mask += masks[2][2]
return get_slice(mask, 0, h_len, 0, w_len)
h = np.arange(0,latent_size_h, tile_size_h)
h_shift = np.arange(tile_size_h // 2, latent_size_h - tile_size_h // 2, tile_size_h)
w = np.arange(0,latent_size_w, tile_size_w)
w_shift = np.arange(tile_size_w // 2, latent_size_w - tile_size_h // 2, tile_size_w)
def create_tile(hs, ws, mask_h, mask_w, i, j):
h = int(hs[i])
w = int(ws[j])
h_len = min(tile_size_h, latent_size_h - h)
w_len = min(tile_size_w, latent_size_w - w)
mask = create_mask(i,j,len(hs)-1, len(ws)-1, mask_h, mask_w, h_len, w_len)
return (h, h_len, w, w_len, steps, mask)
passes = [
[[create_tile(h, w, True, True, i, j) for i in range(len(h)) for j in range(len(w))]],
[[create_tile(h_shift, w, False, True, i, j) for i in range(len(h_shift)) for j in range(len(w))]],
[[create_tile(h, w_shift, True, False, i, j) for i in range(len(h)) for j in range(len(w_shift))]],
[[create_tile(h_shift, w_shift, False, False, i,j) for i in range(len(h_shift)) for j in range(len(w_shift))]],
]
return passes
def mask_at_boundary(h, h_len, w, w_len, tile_size_h, tile_size_w, latent_size_h, latent_size_w, mask, device='cpu', compression=4):
tile_size_h = int(tile_size_h // compression) #CHANGED FROM 8
tile_size_w = int(tile_size_w // compression) #CHANGED FROM 8
if (h_len == tile_size_h or h_len == latent_size_h) and (w_len == tile_size_w or w_len == latent_size_w):
return h, h_len, w, w_len, mask
h_offset = min(0, latent_size_h - (h + tile_size_h))
w_offset = min(0, latent_size_w - (w + tile_size_w))
new_mask = torch.zeros((1,1,tile_size_h, tile_size_w), dtype=torch.float32, device=device)
new_mask[:,:,-h_offset:h_len if h_offset == 0 else tile_size_h, -w_offset:w_len if w_offset == 0 else tile_size_w] = 1.0 if mask is None else mask
return h + h_offset, tile_size_h, w + w_offset, tile_size_w, new_mask
def get_tiles_and_masks_rgrid(steps, latent_shape, tile_height, tile_width, generator, compression=4):
def calc_coords(latent_size, tile_size, jitter):
tile_coords = int((latent_size + jitter - 1) // tile_size + 1)
tile_coords = [np.clip(tile_size * c - jitter, 0, latent_size) for c in range(tile_coords + 1)]
tile_coords = [(c1, c2-c1) for c1, c2 in zip(tile_coords, tile_coords[1:])]
return tile_coords
#calc stuff
batch_size = latent_shape[0]
latent_size_h = latent_shape[-2]
latent_size_w = latent_shape[-1]
tile_size_h = int(tile_height // compression) #CHANGED FROM 8
tile_size_w = int(tile_width // compression) #CHANGED FROM 8
tiles_all = []
for s in range(steps):
rands = torch.rand((2,), dtype=torch.float32, generator=generator, device='cpu').numpy()
jitter_w1 = int(rands[0] * tile_size_w)
jitter_w2 = int(((rands[0] + .5) % 1.0) * tile_size_w)
jitter_h1 = int(rands[1] * tile_size_h)
jitter_h2 = int(((rands[1] + .5) % 1.0) * tile_size_h)
#calc number of tiles
tiles_h = [
calc_coords(latent_size_h, tile_size_h, jitter_h1),
calc_coords(latent_size_h, tile_size_h, jitter_h2)
]
tiles_w = [
calc_coords(latent_size_w, tile_size_w, jitter_w1),
calc_coords(latent_size_w, tile_size_w, jitter_w2)
]
tiles = []
if s % 2 == 0:
for i, h in enumerate(tiles_h[0]):
for w in tiles_w[i%2]:
tiles.append((int(h[0]), int(h[1]), int(w[0]), int(w[1]), 1, None))
else:
for i, w in enumerate(tiles_w[0]):
for h in tiles_h[i%2]:
tiles.append((int(h[0]), int(h[1]), int(w[0]), int(w[1]), 1, None))
tiles_all.append(tiles)
return [tiles_all]
================================================
FILE: lightricks/model.py
================================================
import torch
from torch import nn
import torch.nn.functional as F
import comfy.ldm.modules.attention
import comfy.ldm.common_dit
from einops import rearrange
import math
from typing import Dict, Optional, Tuple, List
from .symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords
from ..helper import ExtraOptions
def get_timestep_embedding(
timesteps: torch.Tensor,
embedding_dim: int,
flip_sin_to_cos: bool = False,
downscale_freq_shift: float = 1,
scale: float = 1,
max_period: int = 10000,
):
"""
This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.
Args
timesteps (torch.Tensor):
a 1-D Tensor of N indices, one per batch element. These may be fractional.
embedding_dim (int):
the dimension of the output.
flip_sin_to_cos (bool):
Whether the embedding order should be `cos, sin` (if True) or `sin, cos` (if False)
downscale_freq_shift (float):
Controls the delta between frequencies between dimensions
scale (float):
Scaling factor applied to the embeddings.
max_period (int):
Controls the maximum frequency of the embeddings
Returns
torch.Tensor: an [N x dim] Tensor of positional embeddings.
"""
assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array"
half_dim = embedding_dim // 2
exponent = -math.log(max_period) * torch.arange(
start=0, end=half_dim, dtype=torch.float32, device=timesteps.device
)
exponent = exponent / (half_dim - downscale_freq_shift)
emb = torch.exp(exponent)
emb = timesteps[:, None].float() * emb[None, :]
# scale embeddings
emb = scale * emb
# concat sine and cosine embeddings
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1)
# flip sine and cosine embeddings
if flip_sin_to_cos:
emb = torch.cat([emb[:, half_dim:], emb[:, :half_dim]], dim=-1)
# zero pad
if embedding_dim % 2 == 1:
emb = torch.nn.functional.pad(emb, (0, 1, 0, 0))
return emb
class TimestepEmbedding(nn.Module):
def __init__(
self,
in_channels: int,
time_embed_dim: int,
act_fn: str = "silu",
out_dim: int = None,
post_act_fn: Optional[str] = None,
cond_proj_dim=None,
sample_proj_bias=True,
dtype=None, device=None, operations=None,
):
super().__init__()
self.linear_1 = operations.Linear(in_channels, time_embed_dim, sample_proj_bias, dtype=dtype, device=device)
if cond_proj_dim is not None:
self.cond_proj = operations.Linear(cond_proj_dim, in_channels, bias=False, dtype=dtype, device=device)
else:
self.cond_proj = None
self.act = nn.SiLU()
if out_dim is not None:
time_embed_dim_out = out_dim
else:
time_embed_dim_out = time_embed_dim
self.linear_2 = operations.Linear(time_embed_dim, time_embed_dim_out, sample_proj_bias, dtype=dtype, device=device)
if post_act_fn is None:
self.post_act = None
# else:
# self.post_act = get_activation(post_act_fn)
def forward(self, sample, condition=None):
if condition is not None:
sample = sample + self.cond_proj(condition)
sample = self.linear_1(sample)
if self.act is not None:
sample = self.act(sample)
sample = self.linear_2(sample)
if self.post_act is not None:
sample = self.post_act(sample)
return sample
class Timesteps(nn.Module):
def __init__(self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float, scale: int = 1):
super().__init__()
self.num_channels = num_channels
self.flip_sin_to_cos = flip_sin_to_cos
self.downscale_freq_shift = downscale_freq_shift
self.scale = scale
def forward(self, timesteps):
t_emb = get_timestep_embedding(
timesteps,
self.num_channels,
flip_sin_to_cos=self.flip_sin_to_cos,
downscale_freq_shift=self.downscale_freq_shift,
scale=self.scale,
)
return t_emb
class PixArtAlphaCombinedTimestepSizeEmbeddings(nn.Module):
"""
For PixArt-Alpha.
Reference:
https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/diffusion/model/nets/PixArtMS.py#L164C9-L168C29
"""
def __init__(self, embedding_dim, size_emb_dim, use_additional_conditions: bool = False, dtype=None, device=None, operations=None):
super().__init__()
self.outdim = size_emb_dim
self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0)
self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim, dtype=dtype, device=device, operations=operations)
def forward(self, timestep, resolution, aspect_ratio, batch_size, hidden_dtype):
timesteps_proj = self.time_proj(timestep)
timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype)) # (N, D)
return timesteps_emb
class AdaLayerNormSingle(nn.Module):
r"""
Norm layer adaptive layer norm single (adaLN-single).
As proposed in PixArt-Alpha (see: https://arxiv.org/abs/2310.00426; Section 2.3).
Parameters:
embedding_dim (`int`): The size of each embedding vector.
use_additional_conditions (`bool`): To use additional conditions for normalization or not.
"""
def __init__(self, embedding_dim: int, use_additional_conditions: bool = False, dtype=None, device=None, operations=None):
super().__init__()
self.emb = PixArtAlphaCombinedTimestepSizeEmbeddings(
embedding_dim, size_emb_dim=embedding_dim // 3, use_additional_conditions=use_additional_conditions, dtype=dtype, device=device, operations=operations
)
self.silu = nn.SiLU()
self.linear = operations.Linear(embedding_dim, 6 * embedding_dim, bias=True, dtype=dtype, device=device)
def forward(
self,
timestep: torch.Tensor,
added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
batch_size: Optional[int] = None,
hidden_dtype: Optional[torch.dtype] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
# No modulation happening here.
added_cond_kwargs = added_cond_kwargs or {"resolution": None, "aspect_ratio": None}
embedded_timestep = self.emb(timestep, **added_cond_kwargs, batch_size=batch_size, hidden_dtype=hidden_dtype)
return self.linear(self.silu(embedded_timestep)), embedded_timestep
class PixArtAlphaTextProjection(nn.Module):
"""
Projects caption embeddings. Also handles dropout for classifier-free guidance.
Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/nets/PixArt_blocks.py
"""
def __init__(self, in_features, hidden_size, out_features=None, act_fn="gelu_tanh", dtype=None, device=None, operations=None):
super().__init__()
if out_features is None:
out_features = hidden_size
self.linear_1 = operations.Linear(in_features=in_features, out_features=hidden_size, bias=True, dtype=dtype, device=device)
if act_fn == "gelu_tanh":
self.act_1 = nn.GELU(approximate="tanh")
elif act_fn == "silu":
self.act_1 = nn.SiLU()
else:
raise ValueError(f"Unknown activation function: {act_fn}")
self.linear_2 = operations.Linear(in_features=hidden_size, out_features=out_features, bias=True, dtype=dtype, device=device)
def forward(self, caption):
hidden_states = self.linear_1(caption)
hidden_states = self.act_1(hidden_states)
hidden_states = self.linear_2(hidden_states)
return hidden_states
class GELU_approx(nn.Module):
def __init__(self, dim_in, dim_out, dtype=None, device=None, operations=None):
super().__init__()
self.proj = operations.Linear(dim_in, dim_out, dtype=dtype, device=device)
def forward(self, x):
return torch.nn.functional.gelu(self.proj(x), approximate="tanh")
class FeedForward(nn.Module):
def __init__(self, dim, dim_out, mult=4, glu=False, dropout=0., dtype=None, device=None, operations=None):
super().__init__()
inner_dim = int(dim * mult)
project_in = GELU_approx(dim, inner_dim, dtype=dtype, device=device, operations=operations)
self.net = nn.Sequential(
project_in,
nn.Dropout(dropout),
operations.Linear(inner_dim, dim_out, dtype=dtype, device=device)
)
def forward(self, x):
return self.net(x)
def apply_rotary_emb(input_tensor, freqs_cis): #TODO: remove duplicate funcs and pick the best/fastest one
cos_freqs = freqs_cis[0]
sin_freqs = freqs_cis[1]
t_dup = rearrange(input_tensor, "... (d r) -> ... d r", r=2)
t1, t2 = t_dup.unbind(dim=-1)
t_dup = torch.stack((-t2, t1), dim=-1)
input_tensor_rot = rearrange(t_dup, "... d r -> ... (d r)")
out = input_tensor * cos_freqs + input_tensor_rot * sin_freqs
return out
class CrossAttention(nn.Module):
def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., attn_precision=None, dtype=None, device=None, operations=None):
super().__init__()
inner_dim = dim_head * heads
context_dim = query_dim if context_dim is None else context_dim
self.attn_precision = attn_precision
self.heads = heads
self.dim_head = dim_head
self.q_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device)
self.k_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device)
self.to_q = operations.Linear(query_dim, inner_dim, bias=True, dtype=dtype, device=device)
self.to_k = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device)
self.to_v = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device)
self.to_out = nn.Sequential(operations.Linear(inner_dim, query_dim, dtype=dtype, device=device), nn.Dropout(dropout))
def forward(self, x, context=None, mask=None, pe=None):
q = self.to_q(x)
context = x if context is None else context
k = self.to_k(context)
v = self.to_v(context)
q = self.q_norm(q)
k = self.k_norm(k)
if pe is not None:
q = apply_rotary_emb(q, pe)
k = apply_rotary_emb(k, pe)
if mask is None:
out = comfy.ldm.modules.attention.optimized_attention(q, k, v, self.heads, attn_precision=self.attn_precision)
else:
out = comfy.ldm.modules.attention.optimized_attention_masked(q, k, v, self.heads, mask, attn_precision=self.attn_precision)
return self.to_out(out)
class BasicTransformerBlock(nn.Module):
def __init__(self, dim, n_heads, d_head, context_dim=None, attn_precision=None, dtype=None, device=None, operations=None):
super().__init__()
self.attn_precision = attn_precision
self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, context_dim=None, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations)
self.ff = FeedForward(dim, dim_out=dim, glu=True, dtype=dtype, device=device, operations=operations)
self.attn2 = CrossAttention(query_dim=dim, context_dim=context_dim, heads=n_heads, dim_head=d_head, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations)
self.scale_shift_table = nn.Parameter(torch.empty(6, dim, device=device, dtype=dtype))
def forward(self, x, context=None, attention_mask=None, timestep=None, pe=None):
shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (self.scale_shift_table[None, None].to(device=x.device, dtype=x.dtype) + timestep.reshape(x.shape[0], timestep.shape[1], self.scale_shift_table.shape[0], -1)).unbind(dim=2)
x += self.attn1(comfy.ldm.common_dit.rms_norm(x) * (1 + scale_msa) + shift_msa, pe=pe) * gate_msa
x += self.attn2(x, context=context, mask=attention_mask)
y = comfy.ldm.common_dit.rms_norm(x) * (1 + scale_mlp) + shift_mlp
x += self.ff(y) * gate_mlp
return x
def get_fractional_positions(indices_grid, max_pos):
fractional_positions = torch.stack(
[
indices_grid[:, i] / max_pos[i]
for i in range(3)
],
dim=-1,
)
return fractional_positions
def precompute_freqs_cis(indices_grid, dim, out_dtype, theta=10000.0, max_pos=[20, 2048, 2048]):
dtype = torch.float32 #self.dtype
fractional_positions = get_fractional_positions(indices_grid, max_pos)
start = 1
end = theta
device = fractional_positions.device
indices = theta ** (
torch.linspace(
math.log(start, theta),
math.log(end, theta),
dim // 6,
device=device,
dtype=dtype,
)
)
indices = indices.to(dtype=dtype)
indices = indices * math.pi / 2
freqs = (
(indices * (fractional_positions.unsqueeze(-1) * 2 - 1))
.transpose(-1, -2)
.flatten(2)
)
cos_freq = freqs.cos().repeat_interleave(2, dim=-1)
sin_freq = freqs.sin().repeat_interleave(2, dim=-1)
if dim % 6 != 0:
cos_padding = torch.ones_like(cos_freq[:, :, : dim % 6])
sin_padding = torch.zeros_like(cos_freq[:, :, : dim % 6])
cos_freq = torch.cat([cos_padding, cos_freq], dim=-1)
sin_freq = torch.cat([sin_padding, sin_freq], dim=-1)
return cos_freq.to(out_dtype), sin_freq.to(out_dtype)
class ReLTXVModel(torch.nn.Module):
def __init__(self,
in_channels=128,
cross_attention_dim=2048,
attention_head_dim=64,
num_attention_heads=32,
caption_channels=4096,
num_layers=28,
positional_embedding_theta=10000.0,
positional_embedding_max_pos=[20, 2048, 2048],
causal_temporal_positioning=False,
vae_scale_factors=(8, 32, 32),
dtype=None, device=None, operations=None, **kwargs):
super().__init__()
self.generator = None
self.vae_scale_factors = vae_scale_factors
self.dtype = dtype
self.out_channels = in_channels
self.inner_dim = num_attention_heads * attention_head_dim
self.causal_temporal_positioning = causal_temporal_positioning
self.patchify_proj = operations.Linear(in_channels, self.inner_dim, bias=True, dtype=dtype, device=device)
self.adaln_single = AdaLayerNormSingle(
self.inner_dim, use_additional_conditions=False, dtype=dtype, device=device, operations=operations
)
# self.adaln_single.linear = operations.Linear(self.inner_dim, 4 * self.inner_dim, bias=True, dtype=dtype, device=device)
self.caption_projection = PixArtAlphaTextProjection(
in_features=caption_channels, hidden_size=self.inner_dim, dtype=dtype, device=device, operations=operations
)
self.transformer_blocks = nn.ModuleList(
[
BasicTransformerBlock(
self.inner_dim,
num_attention_heads,
attention_head_dim,
context_dim=cross_attention_dim,
# attn_precision=attn_precision,
dtype=dtype, device=device, operations=operations
)
for d in range(num_layers)
]
)
self.scale_shift_table = nn.Parameter(torch.empty(2, self.inner_dim, dtype=dtype, device=device))
self.norm_out = operations.LayerNorm(self.inner_dim, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.proj_out = operations.Linear(self.inner_dim, self.out_channels, dtype=dtype, device=device)
self.patchifier = SymmetricPatchifier(1)
def forward(self, x, timestep, context, attention_mask, frame_rate=25, transformer_options={}, keyframe_idxs=None, **kwargs):
patches_replace = transformer_options.get("patches_replace", {})
SIGMA = timestep[0].unsqueeze(0) #/ 1000
EO = transformer_options.get("ExtraOptions", ExtraOptions(""))
y0_style_pos = transformer_options.get("y0_style_pos")
y0_style_neg = transformer_options.get("y0_style_neg")
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight", 0.0)
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight", 0.0)
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight", 0.0)
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight", 0.0)
y0_style_neg_synweight *= y0_style_neg_weight
x_orig = x.clone()
orig_shape = list(x.shape)
x, latent_coords = self.patchifier.patchify(x)
pixel_coords = latent_to_pixel_coords(
latent_coords=latent_coords,
scale_factors=self.vae_scale_factors,
causal_fix=self.causal_temporal_positioning,
)
if keyframe_idxs is not None:
pixel_coords[:, :, -keyframe_idxs.shape[2]:] = keyframe_idxs
fractional_coords = pixel_coords.to(torch.float32)
fractional_coords[:, 0] = fractional_coords[:, 0] * (1.0 / frame_rate)
x = self.patchify_proj(x)
timestep = timestep * 1000.0
if attention_mask is not None and not torch.is_floating_point(attention_mask):
attention_mask = (attention_mask - 1).to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])) * torch.finfo(x.dtype).max
pe = precompute_freqs_cis(fractional_coords, dim=self.inner_dim, out_dtype=x.dtype)
batch_size = x.shape[0]
timestep, embedded_timestep = self.adaln_single(
timestep.flatten(),
{"resolution": None, "aspect_ratio": None},
batch_size=batch_size,
hidden_dtype=x.dtype,
)
# Second dimension is 1 or number of tokens (if timestep_per_token)
timestep = timestep.view(batch_size, -1, timestep.shape[-1])
embedded_timestep = embedded_timestep.view(
batch_size, -1, embedded_timestep.shape[-1]
)
# 2. Blocks
if self.caption_projection is not None:
batch_size = x.shape[0]
context = self.caption_projection(context)
context = context.view(
batch_size, -1, x.shape[-1]
)
blocks_replace = patches_replace.get("dit", {})
for i, block in enumerate(self.transformer_blocks):
if ("double_block", i) in blocks_replace:
def block_wrap(args):
out = {}
out["img"] = block(args["img"], context=args["txt"], attention_mask=args["attention_mask"], timestep=args["vec"], pe=args["pe"])
return out
out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "attention_mask": attention_mask, "vec": timestep, "pe": pe}, {"original_block": block_wrap})
x = out["img"]
else:
x = block(
x,
context=context,
attention_mask=attention_mask,
timestep=timestep,
pe=pe
)
# 3. Output
scale_shift_values = (
self.scale_shift_table[None, None].to(device=x.device, dtype=x.dtype) + embedded_timestep[:, :, None]
)
shift, scale = scale_shift_values[:, :, 0], scale_shift_values[:, :, 1]
x = self.norm_out(x)
# Modulation
x = x * (1 + scale) + shift
x = self.proj_out(x)
x = self.patchifier.unpatchify(
latents=x,
output_height=orig_shape[3],
output_width=orig_shape[4],
output_num_frames=orig_shape[2],
out_channels=orig_shape[1] // math.prod(self.patchifier.patch_size),
)
eps = x
dtype = eps.dtype if self.style_dtype is None else self.style_dtype
pinv_dtype = torch.float32 if dtype != torch.float64 else dtype
W_inv = None
#if eps.shape[0] == 2 or (eps.shape[0] == 1): #: and not UNCOND):
if y0_style_pos is not None and y0_style_pos_weight != 0.0:
y0_style_pos = y0_style_pos.to(torch.float32)
x = x_orig.clone().to(torch.float32)
eps = eps.to(torch.float32)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
img, img_latent_coords = self.patchifier.patchify(denoised)
img_y0_adain, img_y0_adain_latent_coords = self.patchifier.patchify(y0_style_pos)
W = self.patchify_proj.weight.data.to(torch.float32) # shape [2560, 64]
b = self.patchify_proj.bias .data.to(torch.float32) # shape [2560]
denoised_embed = F.linear(img .to(W), W, b).to(img)
y0_adain_embed = F.linear(img_y0_adain.to(W), W, b).to(img_y0_adain)
if transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype)
denoised_embed = F.linear(denoised_embed.to(W), W, b).to(img)
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
self.y0_adain_embed = y0_adain_embed
f_s = y0_adain_embed[0].clone()
self.mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - self.mu_s
cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values
whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T
self.y0_color = whiten.to(f_s_centered)
for wct_i in range(eps.shape[0]):
f_c = denoised_embed[wct_i].clone()
mu_c = f_c.mean(dim=0, keepdim=True)
f_c_centered = f_c - mu_c
cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
inv_sqrt_eig = S_eig.clamp(min=0).rsqrt()
whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T
whiten = whiten.to(f_c_centered)
f_c_whitened = f_c_centered @ whiten.T
f_cs = f_c_whitened @ self.y0_color.T + self.mu_s
denoised_embed[wct_i] = f_cs
denoised_approx = (denoised_embed - b.to(denoised_embed)) @ torch.linalg.pinv(W).T.to(denoised_embed)
denoised_approx = denoised_approx.to(eps)
denoised_approx = self.patchifier.unpatchify(
latents=denoised_approx,
output_height=orig_shape[3],
output_width=orig_shape[4],
output_num_frames=orig_shape[2],
out_channels=orig_shape[1] // math.prod(self.patchifier.patch_size),
)
eps = (x - denoised_approx) / sigma
#UNCOND = transformer_options['cond_or_uncond'][cond_iter] == 1
if eps.shape[0] == 1 and transformer_options['cond_or_uncond'][0] == 1:
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
#if eps.shape[0] == 2:
# eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
else: #if not UNCOND:
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
else:
eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
eps = eps.float()
#if eps.shape[0] == 2 or (eps.shape[0] == 1): # and UNCOND):
if y0_style_neg is not None and y0_style_neg_weight != 0.0:
y0_style_neg = y0_style_neg.to(torch.float32)
x = x_orig.clone().to(torch.float32)
eps = eps.to(torch.float32)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
img, img_latent_coords = self.patchifier.patchify(denoised)
img_y0_adain, img_y0_adain_latent_coords = self.patchifier.patchify(y0_style_neg)
W = self.patchify_proj.weight.data.to(torch.float32) # shape [2560, 64]
b = self.patchify_proj.bias .data.to(torch.float32) # shape [2560]
denoised_embed = F.linear(img .to(W), W, b).to(img)
y0_adain_embed = F.linear(img_y0_adain.to(W), W, b).to(img_y0_adain)
if transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype)
denoised_embed = F.linear(denoised_embed.to(W), W, b).to(img)
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
self.y0_adain_embed = y0_adain_embed
f_s = y0_adain_embed[0].clone()
self.mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - self.mu_s
cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values
whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T
self.y0_color = whiten.to(f_s_centered)
for wct_i in range(eps.shape[0]):
f_c = denoised_embed[wct_i].clone()
mu_c = f_c.mean(dim=0, keepdim=True)
f_c_centered = f_c - mu_c
cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
inv_sqrt_eig = S_eig.clamp(min=0).rsqrt()
whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T
whiten = whiten.to(f_c_centered)
f_c_whitened = f_c_centered @ whiten.T
f_cs = f_c_whitened @ self.y0_color.T + self.mu_s
denoised_embed[wct_i] = f_cs
denoised_approx = (denoised_embed - b.to(denoised_embed)) @ torch.linalg.pinv(W).T.to(denoised_embed)
denoised_approx = denoised_approx.to(eps)
#denoised_approx = rearrange(denoised_approx, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
#denoised_approx = self.unpatchify(denoised_approx, (h + 1) // self.patch_size, (w + 1) // self.patch_size)[:,:,:h,:w]
denoised_approx = self.patchifier.unpatchify(
latents=denoised_approx,
output_height=orig_shape[3],
output_width=orig_shape[4],
output_num_frames=orig_shape[2],
out_channels=orig_shape[1] // math.prod(self.patchifier.patch_size),
)
if eps.shape[0] == 1 and not transformer_options['cond_or_uncond'][0] == 1:
eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0])
else:
eps = (x - denoised_approx) / sigma
eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
eps = eps.float()
return eps
def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
mean_c = content.mean(1, keepdim=True)
std_c = content.std (1, keepdim=True).add_(eps) # in-place add
mean_s = style.mean (1, keepdim=True)
std_s = style.std (1, keepdim=True).add_(eps)
content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain
return content
def adain_seq(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
return ((content - content.mean(1, keepdim=True)) / (content.std(1, keepdim=True) + eps)) * (style.std(1, keepdim=True) + eps) + style.mean(1, keepdim=True)
================================================
FILE: lightricks/symmetric_patchifier.py
================================================
from abc import ABC, abstractmethod
from typing import Tuple
import torch
from einops import rearrange
from torch import Tensor
def latent_to_pixel_coords(
latent_coords: Tensor, scale_factors: Tuple[int, int, int], causal_fix: bool = False
) -> Tensor:
"""
Converts latent coordinates to pixel coordinates by scaling them according to the VAE's
configuration.
Args:
latent_coords (Tensor): A tensor of shape [batch_size, 3, num_latents]
containing the latent corner coordinates of each token.
scale_factors (Tuple[int, int, int]): The scale factors of the VAE's latent space.
causal_fix (bool): Whether to take into account the different temporal scale
of the first frame. Default = False for backwards compatibility.
Returns:
Tensor: A tensor of pixel coordinates corresponding to the input latent coordinates.
"""
pixel_coords = (
latent_coords
* torch.tensor(scale_factors, device=latent_coords.device)[None, :, None]
)
if causal_fix:
# Fix temporal scale for first frame to 1 due to causality
pixel_coords[:, 0] = (pixel_coords[:, 0] + 1 - scale_factors[0]).clamp(min=0)
return pixel_coords
class Patchifier(ABC):
def __init__(self, patch_size: int):
super().__init__()
self._patch_size = (1, patch_size, patch_size)
@abstractmethod
def patchify(
self, latents: Tensor, frame_rates: Tensor, scale_grid: bool
) -> Tuple[Tensor, Tensor]:
pass
@abstractmethod
def unpatchify(
self,
latents: Tensor,
output_height: int,
output_width: int,
output_num_frames: int,
out_channels: int,
) -> Tuple[Tensor, Tensor]:
pass
@property
def patch_size(self):
return self._patch_size
def get_latent_coords(
self, latent_num_frames, latent_height, latent_width, batch_size, device
):
"""
Return a tensor of shape [batch_size, 3, num_patches] containing the
top-left corner latent coordinates of each latent patch.
The tensor is repeated for each batch element.
"""
latent_sample_coords = torch.meshgrid(
torch.arange(0, latent_num_frames, self._patch_size[0], device=device),
torch.arange(0, latent_height, self._patch_size[1], device=device),
torch.arange(0, latent_width, self._patch_size[2], device=device),
indexing="ij",
)
latent_sample_coords = torch.stack(latent_sample_coords, dim=0)
latent_coords = latent_sample_coords.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1)
latent_coords = rearrange(
latent_coords, "b c f h w -> b c (f h w)", b=batch_size
)
return latent_coords
class SymmetricPatchifier(Patchifier):
def patchify(
self,
latents: Tensor,
) -> Tuple[Tensor, Tensor]:
b, _, f, h, w = latents.shape
latent_coords = self.get_latent_coords(f, h, w, b, latents.device)
latents = rearrange(
latents,
"b c (f p1) (h p2) (w p3) -> b (f h w) (c p1 p2 p3)",
p1=self._patch_size[0],
p2=self._patch_size[1],
p3=self._patch_size[2],
)
return latents, latent_coords
def unpatchify(
self,
latents: Tensor,
output_height: int,
output_width: int,
output_num_frames: int,
out_channels: int,
) -> Tuple[Tensor, Tensor]:
output_height = output_height // self._patch_size[1]
output_width = output_width // self._patch_size[2]
latents = rearrange(
latents,
"b (f h w) (c p q) -> b c f (h p) (w q) ",
f=output_num_frames,
h=output_height,
w=output_width,
p=self._patch_size[1],
q=self._patch_size[2],
)
return latents
================================================
FILE: lightricks/vae/causal_conv3d.py
================================================
from typing import Tuple, Union
import torch
import torch.nn as nn
import comfy.ops
ops = comfy.ops.disable_weight_init
class CausalConv3d(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size: int = 3,
stride: Union[int, Tuple[int]] = 1,
dilation: int = 1,
groups: int = 1,
spatial_padding_mode: str = "zeros",
**kwargs,
):
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
kernel_size = (kernel_size, kernel_size, kernel_size)
self.time_kernel_size = kernel_size[0]
dilation = (dilation, 1, 1)
height_pad = kernel_size[1] // 2
width_pad = kernel_size[2] // 2
padding = (0, height_pad, width_pad)
self.conv = ops.Conv3d(
in_channels,
out_channels,
kernel_size,
stride=stride,
dilation=dilation,
padding=padding,
padding_mode=spatial_padding_mode,
groups=groups,
)
def forward(self, x, causal: bool = True):
if causal:
first_frame_pad = x[:, :, :1, :, :].repeat(
(1, 1, self.time_kernel_size - 1, 1, 1)
)
x = torch.concatenate((first_frame_pad, x), dim=2)
else:
first_frame_pad = x[:, :, :1, :, :].repeat(
(1, 1, (self.time_kernel_size - 1) // 2, 1, 1)
)
last_frame_pad = x[:, :, -1:, :, :].repeat(
(1, 1, (self.time_kernel_size - 1) // 2, 1, 1)
)
x = torch.concatenate((first_frame_pad, x, last_frame_pad), dim=2)
x = self.conv(x)
return x
@property
def weight(self):
return self.conv.weight
================================================
FILE: lightricks/vae/causal_video_autoencoder.py
================================================
from __future__ import annotations
import torch
from torch import nn
from functools import partial
import math
from einops import rearrange
from typing import List, Optional, Tuple, Union
from .conv_nd_factory import make_conv_nd, make_linear_nd
from .pixel_norm import PixelNorm
from ..model import PixArtAlphaCombinedTimestepSizeEmbeddings
import comfy.ops
ops = comfy.ops.disable_weight_init
class Encoder(nn.Module):
r"""
The `Encoder` layer of a variational autoencoder that encodes its input into a latent representation.
Args:
dims (`int` or `Tuple[int, int]`, *optional*, defaults to 3):
The number of dimensions to use in convolutions.
in_channels (`int`, *optional*, defaults to 3):
The number of input channels.
out_channels (`int`, *optional*, defaults to 3):
The number of output channels.
blocks (`List[Tuple[str, int]]`, *optional*, defaults to `[("res_x", 1)]`):
The blocks to use. Each block is a tuple of the block name and the number of layers.
base_channels (`int`, *optional*, defaults to 128):
The number of output channels for the first convolutional layer.
norm_num_groups (`int`, *optional*, defaults to 32):
The number of groups for normalization.
patch_size (`int`, *optional*, defaults to 1):
The patch size to use. Should be a power of 2.
norm_layer (`str`, *optional*, defaults to `group_norm`):
The normalization layer to use. Can be either `group_norm` or `pixel_norm`.
latent_log_var (`str`, *optional*, defaults to `per_channel`):
The number of channels for the log variance. Can be either `per_channel`, `uniform`, `constant` or `none`.
"""
def __init__(
self,
dims: Union[int, Tuple[int, int]] = 3,
in_channels: int = 3,
out_channels: int = 3,
blocks: List[Tuple[str, int | dict]] = [("res_x", 1)],
base_channels: int = 128,
norm_num_groups: int = 32,
patch_size: Union[int, Tuple[int]] = 1,
norm_layer: str = "group_norm", # group_norm, pixel_norm
latent_log_var: str = "per_channel",
spatial_padding_mode: str = "zeros",
):
super().__init__()
self.patch_size = patch_size
self.norm_layer = norm_layer
self.latent_channels = out_channels
self.latent_log_var = latent_log_var
self.blocks_desc = blocks
in_channels = in_channels * patch_size**2
output_channel = base_channels
self.conv_in = make_conv_nd(
dims=dims,
in_channels=in_channels,
out_channels=output_channel,
kernel_size=3,
stride=1,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
self.down_blocks = nn.ModuleList([])
for block_name, block_params in blocks:
input_channel = output_channel
if isinstance(block_params, int):
block_params = {"num_layers": block_params}
if block_name == "res_x":
block = UNetMidBlock3D(
dims=dims,
in_channels=input_channel,
num_layers=block_params["num_layers"],
resnet_eps=1e-6,
resnet_groups=norm_num_groups,
norm_layer=norm_layer,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "res_x_y":
output_channel = block_params.get("multiplier", 2) * output_channel
block = ResnetBlock3D(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
eps=1e-6,
groups=norm_num_groups,
norm_layer=norm_layer,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_time":
block = make_conv_nd(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
kernel_size=3,
stride=(2, 1, 1),
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_space":
block = make_conv_nd(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
kernel_size=3,
stride=(1, 2, 2),
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_all":
block = make_conv_nd(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
kernel_size=3,
stride=(2, 2, 2),
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_all_x_y":
output_channel = block_params.get("multiplier", 2) * output_channel
block = make_conv_nd(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
kernel_size=3,
stride=(2, 2, 2),
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_all_res":
output_channel = block_params.get("multiplier", 2) * output_channel
block = SpaceToDepthDownsample(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
stride=(2, 2, 2),
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_space_res":
output_channel = block_params.get("multiplier", 2) * output_channel
block = SpaceToDepthDownsample(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
stride=(1, 2, 2),
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_time_res":
output_channel = block_params.get("multiplier", 2) * output_channel
block = SpaceToDepthDownsample(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
stride=(2, 1, 1),
spatial_padding_mode=spatial_padding_mode,
)
else:
raise ValueError(f"unknown block: {block_name}")
self.down_blocks.append(block)
# out
if norm_layer == "group_norm":
self.conv_norm_out = nn.GroupNorm(
num_channels=output_channel, num_groups=norm_num_groups, eps=1e-6
)
elif norm_layer == "pixel_norm":
self.conv_norm_out = PixelNorm()
elif norm_layer == "layer_norm":
self.conv_norm_out = LayerNorm(output_channel, eps=1e-6)
self.conv_act = nn.SiLU()
conv_out_channels = out_channels
if latent_log_var == "per_channel":
conv_out_channels *= 2
elif latent_log_var == "uniform":
conv_out_channels += 1
elif latent_log_var == "constant":
conv_out_channels += 1
elif latent_log_var != "none":
raise ValueError(f"Invalid latent_log_var: {latent_log_var}")
self.conv_out = make_conv_nd(
dims,
output_channel,
conv_out_channels,
3,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
self.gradient_checkpointing = False
def forward(self, sample: torch.FloatTensor) -> torch.FloatTensor:
r"""The forward method of the `Encoder` class."""
sample = patchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
sample = self.conv_in(sample)
checkpoint_fn = (
partial(torch.utils.checkpoint.checkpoint, use_reentrant=False)
if self.gradient_checkpointing and self.training
else lambda x: x
)
for down_block in self.down_blocks:
sample = checkpoint_fn(down_block)(sample)
sample = self.conv_norm_out(sample)
sample = self.conv_act(sample)
sample = self.conv_out(sample)
if self.latent_log_var == "uniform":
last_channel = sample[:, -1:, ...]
num_dims = sample.dim()
if num_dims == 4:
# For shape (B, C, H, W)
repeated_last_channel = last_channel.repeat(
1, sample.shape[1] - 2, 1, 1
)
sample = torch.cat([sample, repeated_last_channel], dim=1)
elif num_dims == 5:
# For shape (B, C, F, H, W)
repeated_last_channel = last_channel.repeat(
1, sample.shape[1] - 2, 1, 1, 1
)
sample = torch.cat([sample, repeated_last_channel], dim=1)
else:
raise ValueError(f"Invalid input shape: {sample.shape}")
elif self.latent_log_var == "constant":
sample = sample[:, :-1, ...]
approx_ln_0 = (
-30
) # this is the minimal clamp value in DiagonalGaussianDistribution objects
sample = torch.cat(
[sample, torch.ones_like(sample, device=sample.device) * approx_ln_0],
dim=1,
)
return sample
class Decoder(nn.Module):
r"""
The `Decoder` layer of a variational autoencoder that decodes its latent representation into an output sample.
Args:
dims (`int` or `Tuple[int, int]`, *optional*, defaults to 3):
The number of dimensions to use in convolutions.
in_channels (`int`, *optional*, defaults to 3):
The number of input channels.
out_channels (`int`, *optional*, defaults to 3):
The number of output channels.
blocks (`List[Tuple[str, int]]`, *optional*, defaults to `[("res_x", 1)]`):
The blocks to use. Each block is a tuple of the block name and the number of layers.
base_channels (`int`, *optional*, defaults to 128):
The number of output channels for the first convolutional layer.
norm_num_groups (`int`, *optional*, defaults to 32):
The number of groups for normalization.
patch_size (`int`, *optional*, defaults to 1):
The patch size to use. Should be a power of 2.
norm_layer (`str`, *optional*, defaults to `group_norm`):
The normalization layer to use. Can be either `group_norm` or `pixel_norm`.
causal (`bool`, *optional*, defaults to `True`):
Whether to use causal convolutions or not.
"""
def __init__(
self,
dims,
in_channels: int = 3,
out_channels: int = 3,
blocks: List[Tuple[str, int | dict]] = [("res_x", 1)],
base_channels: int = 128,
layers_per_block: int = 2,
norm_num_groups: int = 32,
patch_size: int = 1,
norm_layer: str = "group_norm",
causal: bool = True,
timestep_conditioning: bool = False,
spatial_padding_mode: str = "zeros",
):
super().__init__()
self.patch_size = patch_size
self.layers_per_block = layers_per_block
out_channels = out_channels * patch_size**2
self.causal = causal
self.blocks_desc = blocks
# Compute output channel to be product of all channel-multiplier blocks
output_channel = base_channels
for block_name, block_params in list(reversed(blocks)):
block_params = block_params if isinstance(block_params, dict) else {}
if block_name == "res_x_y":
output_channel = output_channel * block_params.get("multiplier", 2)
if block_name == "compress_all":
output_channel = output_channel * block_params.get("multiplier", 1)
self.conv_in = make_conv_nd(
dims,
in_channels,
output_channel,
kernel_size=3,
stride=1,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
self.up_blocks = nn.ModuleList([])
for block_name, block_params in list(reversed(blocks)):
input_channel = output_channel
if isinstance(block_params, int):
block_params = {"num_layers": block_params}
if block_name == "res_x":
block = UNetMidBlock3D(
dims=dims,
in_channels=input_channel,
num_layers=block_params["num_layers"],
resnet_eps=1e-6,
resnet_groups=norm_num_groups,
norm_layer=norm_layer,
inject_noise=block_params.get("inject_noise", False),
timestep_conditioning=timestep_conditioning,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "attn_res_x":
block = UNetMidBlock3D(
dims=dims,
in_channels=input_channel,
num_layers=block_params["num_layers"],
resnet_groups=norm_num_groups,
norm_layer=norm_layer,
inject_noise=block_params.get("inject_noise", False),
timestep_conditioning=timestep_conditioning,
attention_head_dim=block_params["attention_head_dim"],
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "res_x_y":
output_channel = output_channel // block_params.get("multiplier", 2)
block = ResnetBlock3D(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
eps=1e-6,
groups=norm_num_groups,
norm_layer=norm_layer,
inject_noise=block_params.get("inject_noise", False),
timestep_conditioning=False,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_time":
block = DepthToSpaceUpsample(
dims=dims,
in_channels=input_channel,
stride=(2, 1, 1),
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_space":
block = DepthToSpaceUpsample(
dims=dims,
in_channels=input_channel,
stride=(1, 2, 2),
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_all":
output_channel = output_channel // block_params.get("multiplier", 1)
block = DepthToSpaceUpsample(
dims=dims,
in_channels=input_channel,
stride=(2, 2, 2),
residual=block_params.get("residual", False),
out_channels_reduction_factor=block_params.get("multiplier", 1),
spatial_padding_mode=spatial_padding_mode,
)
else:
raise ValueError(f"unknown layer: {block_name}")
self.up_blocks.append(block)
if norm_layer == "group_norm":
self.conv_norm_out = nn.GroupNorm(
num_channels=output_channel, num_groups=norm_num_groups, eps=1e-6
)
elif norm_layer == "pixel_norm":
self.conv_norm_out = PixelNorm()
elif norm_layer == "layer_norm":
self.conv_norm_out = LayerNorm(output_channel, eps=1e-6)
self.conv_act = nn.SiLU()
self.conv_out = make_conv_nd(
dims,
output_channel,
out_channels,
3,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
self.gradient_checkpointing = False
self.timestep_conditioning = timestep_conditioning
if timestep_conditioning:
self.timestep_scale_multiplier = nn.Parameter(
torch.tensor(1000.0, dtype=torch.float32)
)
self.last_time_embedder = PixArtAlphaCombinedTimestepSizeEmbeddings(
output_channel * 2, 0, operations=ops,
)
self.last_scale_shift_table = nn.Parameter(torch.empty(2, output_channel))
# def forward(self, sample: torch.FloatTensor, target_shape) -> torch.FloatTensor:
def forward(
self,
sample: torch.FloatTensor,
timestep: Optional[torch.Tensor] = None,
) -> torch.FloatTensor:
r"""The forward method of the `Decoder` class."""
batch_size = sample.shape[0]
sample = self.conv_in(sample, causal=self.causal)
checkpoint_fn = (
partial(torch.utils.checkpoint.checkpoint, use_reentrant=False)
if self.gradient_checkpointing and self.training
else lambda x: x
)
scaled_timestep = None
if self.timestep_conditioning:
assert (
timestep is not None
), "should pass timestep with timestep_conditioning=True"
scaled_timestep = timestep * self.timestep_scale_multiplier.to(dtype=sample.dtype, device=sample.device)
for up_block in self.up_blocks:
if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D):
sample = checkpoint_fn(up_block)(
sample, causal=self.causal, timestep=scaled_timestep
)
else:
sample = checkpoint_fn(up_block)(sample, causal=self.causal)
sample = self.conv_norm_out(sample)
if self.timestep_conditioning:
embedded_timestep = self.last_time_embedder(
timestep=scaled_timestep.flatten(),
resolution=None,
aspect_ratio=None,
batch_size=sample.shape[0],
hidden_dtype=sample.dtype,
)
embedded_timestep = embedded_timestep.view(
batch_size, embedded_timestep.shape[-1], 1, 1, 1
)
ada_values = self.last_scale_shift_table[
None, ..., None, None, None
].to(device=sample.device, dtype=sample.dtype) + embedded_timestep.reshape(
batch_size,
2,
-1,
embedded_timestep.shape[-3],
embedded_timestep.shape[-2],
embedded_timestep.shape[-1],
)
shift, scale = ada_values.unbind(dim=1)
sample = sample * (1 + scale) + shift
sample = self.conv_act(sample)
sample = self.conv_out(sample, causal=self.causal)
sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
return sample
class UNetMidBlock3D(nn.Module):
"""
A 3D UNet mid-block [`UNetMidBlock3D`] with multiple residual blocks.
Args:
in_channels (`int`): The number of input channels.
dropout (`float`, *optional*, defaults to 0.0): The dropout rate.
num_layers (`int`, *optional*, defaults to 1): The number of residual blocks.
resnet_eps (`float`, *optional*, 1e-6 ): The epsilon value for the resnet blocks.
resnet_groups (`int`, *optional*, defaults to 32):
The number of groups to use in the group normalization layers of the resnet blocks.
norm_layer (`str`, *optional*, defaults to `group_norm`):
The normalization layer to use. Can be either `group_norm` or `pixel_norm`.
inject_noise (`bool`, *optional*, defaults to `False`):
Whether to inject noise into the hidden states.
timestep_conditioning (`bool`, *optional*, defaults to `False`):
Whether to condition the hidden states on the timestep.
Returns:
`torch.FloatTensor`: The output of the last residual block, which is a tensor of shape `(batch_size,
in_channels, height, width)`.
"""
def __init__(
self,
dims: Union[int, Tuple[int, int]],
in_channels: int,
dropout: float = 0.0,
num_layers: int = 1,
resnet_eps: float = 1e-6,
resnet_groups: int = 32,
norm_layer: str = "group_norm",
inject_noise: bool = False,
timestep_conditioning: bool = False,
spatial_padding_mode: str = "zeros",
):
super().__init__()
resnet_groups = (
resnet_groups if resnet_groups is not None else min(in_channels // 4, 32)
)
self.timestep_conditioning = timestep_conditioning
if timestep_conditioning:
self.time_embedder = PixArtAlphaCombinedTimestepSizeEmbeddings(
in_channels * 4, 0, operations=ops,
)
self.res_blocks = nn.ModuleList(
[
ResnetBlock3D(
dims=dims,
in_channels=in_channels,
out_channels=in_channels,
eps=resnet_eps,
groups=resnet_groups,
dropout=dropout,
norm_layer=norm_layer,
inject_noise=inject_noise,
timestep_conditioning=timestep_conditioning,
spatial_padding_mode=spatial_padding_mode,
)
for _ in range(num_layers)
]
)
def forward(
self,
hidden_states: torch.FloatTensor,
causal: bool = True,
timestep: Optional[torch.Tensor] = None,
) -> torch.FloatTensor:
timestep_embed = None
if self.timestep_conditioning:
assert (
timestep is not None
), "should pass timestep with timestep_conditioning=True"
batch_size = hidden_states.shape[0]
timestep_embed = self.time_embedder(
timestep=timestep.flatten(),
resolution=None,
aspect_ratio=None,
batch_size=batch_size,
hidden_dtype=hidden_states.dtype,
)
timestep_embed = timestep_embed.view(
batch_size, timestep_embed.shape[-1], 1, 1, 1
)
for resnet in self.res_blocks:
hidden_states = resnet(hidden_states, causal=causal, timestep=timestep_embed)
return hidden_states
class SpaceToDepthDownsample(nn.Module):
def __init__(self, dims, in_channels, out_channels, stride, spatial_padding_mode):
super().__init__()
self.stride = stride
self.group_size = in_channels * math.prod(stride) // out_channels
self.conv = make_conv_nd(
dims=dims,
in_channels=in_channels,
out_channels=out_channels // math.prod(stride),
kernel_size=3,
stride=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
def forward(self, x, causal: bool = True):
if self.stride[0] == 2:
x = torch.cat(
[x[:, :, :1, :, :], x], dim=2
) # duplicate first frames for padding
# skip connection
x_in = rearrange(
x,
"b c (d p1) (h p2) (w p3) -> b (c p1 p2 p3) d h w",
p1=self.stride[0],
p2=self.stride[1],
p3=self.stride[2],
)
x_in = rearrange(x_in, "b (c g) d h w -> b c g d h w", g=self.group_size)
x_in = x_in.mean(dim=2)
# conv
x = self.conv(x, causal=causal)
x = rearrange(
x,
"b c (d p1) (h p2) (w p3) -> b (c p1 p2 p3) d h w",
p1=self.stride[0],
p2=self.stride[1],
p3=self.stride[2],
)
x = x + x_in
return x
class DepthToSpaceUpsample(nn.Module):
def __init__(
self,
dims,
in_channels,
stride,
residual=False,
out_channels_reduction_factor=1,
spatial_padding_mode="zeros",
):
super().__init__()
self.stride = stride
self.out_channels = (
math.prod(stride) * in_channels // out_channels_reduction_factor
)
self.conv = make_conv_nd(
dims=dims,
in_channels=in_channels,
out_channels=self.out_channels,
kernel_size=3,
stride=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
self.residual = residual
self.out_channels_reduction_factor = out_channels_reduction_factor
def forward(self, x, causal: bool = True, timestep: Optional[torch.Tensor] = None):
if self.residual:
# Reshape and duplicate the input to match the output shape
x_in = rearrange(
x,
"b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)",
p1=self.stride[0],
p2=self.stride[1],
p3=self.stride[2],
)
num_repeat = math.prod(self.stride) // self.out_channels_reduction_factor
x_in = x_in.repeat(1, num_repeat, 1, 1, 1)
if self.stride[0] == 2:
x_in = x_in[:, :, 1:, :, :]
x = self.conv(x, causal=causal)
x = rearrange(
x,
"b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)",
p1=self.stride[0],
p2=self.stride[1],
p3=self.stride[2],
)
if self.stride[0] == 2:
x = x[:, :, 1:, :, :]
if self.residual:
x = x + x_in
return x
class LayerNorm(nn.Module):
def __init__(self, dim, eps, elementwise_affine=True) -> None:
super().__init__()
self.norm = ops.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine)
def forward(self, x):
x = rearrange(x, "b c d h w -> b d h w c")
x = self.norm(x)
x = rearrange(x, "b d h w c -> b c d h w")
return x
class ResnetBlock3D(nn.Module):
r"""
A Resnet block.
Parameters:
in_channels (`int`): The number of channels in the input.
out_channels (`int`, *optional*, default to be `None`):
The number of output channels for the first conv layer. If None, same as `in_channels`.
dropout (`float`, *optional*, defaults to `0.0`): The dropout probability to use.
groups (`int`, *optional*, default to `32`): The number of groups to use for the first normalization layer.
eps (`float`, *optional*, defaults to `1e-6`): The epsilon to use for the normalization.
"""
def __init__(
self,
dims: Union[int, Tuple[int, int]],
in_channels: int,
out_channels: Optional[int] = None,
dropout: float = 0.0,
groups: int = 32,
eps: float = 1e-6,
norm_layer: str = "group_norm",
inject_noise: bool = False,
timestep_conditioning: bool = False,
spatial_padding_mode: str = "zeros",
):
super().__init__()
self.in_channels = in_channels
out_channels = in_channels if out_channels is None else out_channels
self.out_channels = out_channels
self.inject_noise = inject_noise
if norm_layer == "group_norm":
self.norm1 = nn.GroupNorm(
num_groups=groups, num_channels=in_channels, eps=eps, affine=True
)
elif norm_layer == "pixel_norm":
self.norm1 = PixelNorm()
elif norm_layer == "layer_norm":
self.norm1 = LayerNorm(in_channels, eps=eps, elementwise_affine=True)
self.non_linearity = nn.SiLU()
self.conv1 = make_conv_nd(
dims,
in_channels,
out_channels,
kernel_size=3,
stride=1,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
if inject_noise:
self.per_channel_scale1 = nn.Parameter(torch.zeros((in_channels, 1, 1)))
if norm_layer == "group_norm":
self.norm2 = nn.GroupNorm(
num_groups=groups, num_channels=out_channels, eps=eps, affine=True
)
elif norm_layer == "pixel_norm":
self.norm2 = PixelNorm()
elif norm_layer == "layer_norm":
self.norm2 = LayerNorm(out_channels, eps=eps, elementwise_affine=True)
self.dropout = torch.nn.Dropout(dropout)
self.conv2 = make_conv_nd(
dims,
out_channels,
out_channels,
kernel_size=3,
stride=1,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
if inject_noise:
self.per_channel_scale2 = nn.Parameter(torch.zeros((in_channels, 1, 1)))
self.conv_shortcut = (
make_linear_nd(
dims=dims, in_channels=in_channels, out_channels=out_channels
)
if in_channels != out_channels
else nn.Identity()
)
self.norm3 = (
LayerNorm(in_channels, eps=eps, elementwise_affine=True)
if in_channels != out_channels
else nn.Identity()
)
self.timestep_conditioning = timestep_conditioning
if timestep_conditioning:
self.scale_shift_table = nn.Parameter(
torch.randn(4, in_channels) / in_channels**0.5
)
def _feed_spatial_noise(
self, hidden_states: torch.FloatTensor, per_channel_scale: torch.FloatTensor
) -> torch.FloatTensor:
spatial_shape = hidden_states.shape[-2:]
device = hidden_states.device
dtype = hidden_states.dtype
# similar to the "explicit noise inputs" method in style-gan
spatial_noise = torch.randn(spatial_shape, device=device, dtype=dtype)[None]
scaled_noise = (spatial_noise * per_channel_scale)[None, :, None, ...]
hidden_states = hidden_states + scaled_noise
return hidden_states
def forward(
self,
input_tensor: torch.FloatTensor,
causal: bool = True,
timestep: Optional[torch.Tensor] = None,
) -> torch.FloatTensor:
hidden_states = input_tensor
batch_size = hidden_states.shape[0]
hidden_states = self.norm1(hidden_states)
if self.timestep_conditioning:
assert (
timestep is not None
), "should pass timestep with timestep_conditioning=True"
ada_values = self.scale_shift_table[
None, ..., None, None, None
].to(device=hidden_states.device, dtype=hidden_states.dtype) + timestep.reshape(
batch_size,
4,
-1,
timestep.shape[-3],
timestep.shape[-2],
timestep.shape[-1],
)
shift1, scale1, shift2, scale2 = ada_values.unbind(dim=1)
hidden_states = hidden_states * (1 + scale1) + shift1
hidden_states = self.non_linearity(hidden_states)
hidden_states = self.conv1(hidden_states, causal=causal)
if self.inject_noise:
hidden_states = self._feed_spatial_noise(
hidden_states, self.per_channel_scale1.to(device=hidden_states.device, dtype=hidden_states.dtype)
)
hidden_states = self.norm2(hidden_states)
if self.timestep_conditioning:
hidden_states = hidden_states * (1 + scale2) + shift2
hidden_states = self.non_linearity(hidden_states)
hidden_states = self.dropout(hidden_states)
hidden_states = self.conv2(hidden_states, causal=causal)
if self.inject_noise:
hidden_states = self._feed_spatial_noise(
hidden_states, self.per_channel_scale2.to(device=hidden_states.device, dtype=hidden_states.dtype)
)
input_tensor = self.norm3(input_tensor)
batch_size = input_tensor.shape[0]
input_tensor = self.conv_shortcut(input_tensor)
output_tensor = input_tensor + hidden_states
return output_tensor
def patchify(x, patch_size_hw, patch_size_t=1):
if patch_size_hw == 1 and patch_size_t == 1:
return x
if x.dim() == 4:
x = rearrange(
x, "b c (h q) (w r) -> b (c r q) h w", q=patch_size_hw, r=patch_size_hw
)
elif x.dim() == 5:
x = rearrange(
x,
"b c (f p) (h q) (w r) -> b (c p r q) f h w",
p=patch_size_t,
q=patch_size_hw,
r=patch_size_hw,
)
else:
raise ValueError(f"Invalid input shape: {x.shape}")
return x
def unpatchify(x, patch_size_hw, patch_size_t=1):
if patch_size_hw == 1 and patch_size_t == 1:
return x
if x.dim() == 4:
x = rearrange(
x, "b (c r q) h w -> b c (h q) (w r)", q=patch_size_hw, r=patch_size_hw
)
elif x.dim() == 5:
x = rearrange(
x,
"b (c p r q) f h w -> b c (f p) (h q) (w r)",
p=patch_size_t,
q=patch_size_hw,
r=patch_size_hw,
)
return x
class processor(nn.Module):
def __init__(self):
super().__init__()
self.register_buffer("std-of-means", torch.empty(128))
self.register_buffer("mean-of-means", torch.empty(128))
self.register_buffer("mean-of-stds", torch.empty(128))
self.register_buffer("mean-of-stds_over_std-of-means", torch.empty(128))
self.register_buffer("channel", torch.empty(128))
def un_normalize(self, x):
return (x * self.get_buffer("std-of-means").view(1, -1, 1, 1, 1).to(x)) + self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1).to(x)
def normalize(self, x):
return (x - self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1).to(x)) / self.get_buffer("std-of-means").view(1, -1, 1, 1, 1).to(x)
class VideoVAE(nn.Module):
def __init__(self, version=0, config=None):
super().__init__()
if config is None:
config = self.guess_config(version)
self.timestep_conditioning = config.get("timestep_conditioning", False)
double_z = config.get("double_z", True)
latent_log_var = config.get(
"latent_log_var", "per_channel" if double_z else "none"
)
self.encoder = Encoder(
dims=config["dims"],
in_channels=config.get("in_channels", 3),
out_channels=config["latent_channels"],
blocks=config.get("encoder_blocks", config.get("encoder_blocks", config.get("blocks"))),
patch_size=config.get("patch_size", 1),
latent_log_var=latent_log_var,
norm_layer=config.get("norm_layer", "group_norm"),
spatial_padding_mode=config.get("spatial_padding_mode", "zeros"),
)
self.decoder = Decoder(
dims=config["dims"],
in_channels=config["latent_channels"],
out_channels=config.get("out_channels", 3),
blocks=config.get("decoder_blocks", config.get("decoder_blocks", config.get("blocks"))),
patch_size=config.get("patch_size", 1),
norm_layer=config.get("norm_layer", "group_norm"),
causal=config.get("causal_decoder", False),
timestep_conditioning=self.timestep_conditioning,
spatial_padding_mode=config.get("spatial_padding_mode", "zeros"),
)
self.per_channel_statistics = processor()
def guess_config(self, version):
if version == 0:
config = {
"_class_name": "CausalVideoAutoencoder",
"dims": 3,
"in_channels": 3,
"out_channels": 3,
"latent_channels": 128,
"blocks": [
["res_x", 4],
["compress_all", 1],
["res_x_y", 1],
["res_x", 3],
["compress_all", 1],
["res_x_y", 1],
["res_x", 3],
["compress_all", 1],
["res_x", 3],
["res_x", 4],
],
"scaling_factor": 1.0,
"norm_layer": "pixel_norm",
"patch_size": 4,
"latent_log_var": "uniform",
"use_quant_conv": False,
"causal_decoder": False,
}
elif version == 1:
config = {
"_class_name": "CausalVideoAutoencoder",
"dims": 3,
"in_channels": 3,
"out_channels": 3,
"latent_channels": 128,
"decoder_blocks": [
["res_x", {"num_layers": 5, "inject_noise": True}],
["compress_all", {"residual": True, "multiplier": 2}],
["res_x", {"num_layers": 6, "inject_noise": True}],
["compress_all", {"residual": True, "multiplier": 2}],
["res_x", {"num_layers": 7, "inject_noise": True}],
["compress_all", {"residual": True, "multiplier": 2}],
["res_x", {"num_layers": 8, "inject_noise": False}]
],
"encoder_blocks": [
["res_x", {"num_layers": 4}],
["compress_all", {}],
["res_x_y", 1],
["res_x", {"num_layers": 3}],
["compress_all", {}],
["res_x_y", 1],
["res_x", {"num_layers": 3}],
["compress_all", {}],
["res_x", {"num_layers": 3}],
["res_x", {"num_layers": 4}]
],
"scaling_factor": 1.0,
"norm_layer": "pixel_norm",
"patch_size": 4,
"latent_log_var": "uniform",
"use_quant_conv": False,
"causal_decoder": False,
"timestep_conditioning": True,
}
else:
config = {
"_class_name": "CausalVideoAutoencoder",
"dims": 3,
"in_channels": 3,
"out_channels": 3,
"latent_channels": 128,
"encoder_blocks": [
["res_x", {"num_layers": 4}],
["compress_space_res", {"multiplier": 2}],
["res_x", {"num_layers": 6}],
["compress_time_res", {"multiplier": 2}],
["res_x", {"num_layers": 6}],
["compress_all_res", {"multiplier": 2}],
["res_x", {"num_layers": 2}],
["compress_all_res", {"multiplier": 2}],
["res_x", {"num_layers": 2}]
],
"decoder_blocks": [
["res_x", {"num_layers": 5, "inject_noise": False}],
["compress_all", {"residual": True, "multiplier": 2}],
["res_x", {"num_layers": 5, "inject_noise": False}],
["compress_all", {"residual": True, "multiplier": 2}],
["res_x", {"num_layers": 5, "inject_noise": False}],
["compress_all", {"residual": True, "multiplier": 2}],
["res_x", {"num_layers": 5, "inject_noise": False}]
],
"scaling_factor": 1.0,
"norm_layer": "pixel_norm",
"patch_size": 4,
"latent_log_var": "uniform",
"use_quant_conv": False,
"causal_decoder": False,
"timestep_conditioning": True
}
return config
def encode(self, x):
frames_count = x.shape[2]
if ((frames_count - 1) % 8) != 0:
raise ValueError("Invalid number of frames: Encode input must have 1 + 8 * x frames (e.g., 1, 9, 17, ...). Please check your input.")
means, logvar = torch.chunk(self.encoder(x), 2, dim=1)
return self.per_channel_statistics.normalize(means)
def decode(self, x, timestep=0.05, noise_scale=0.025):
if self.timestep_conditioning: #TODO: seed
x = torch.randn_like(x) * noise_scale + (1.0 - noise_scale) * x
return self.decoder(self.per_channel_statistics.un_normalize(x), timestep=timestep)
================================================
FILE: lightricks/vae/conv_nd_factory.py
================================================
from typing import Tuple, Union
from .dual_conv3d import DualConv3d
from .causal_conv3d import CausalConv3d
import comfy.ops
ops = comfy.ops.disable_weight_init
def make_conv_nd(
dims: Union[int, Tuple[int, int]],
in_channels: int,
out_channels: int,
kernel_size: int,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True,
causal=False,
spatial_padding_mode="zeros",
temporal_padding_mode="zeros",
):
if not (spatial_padding_mode == temporal_padding_mode or causal):
raise NotImplementedError("spatial and temporal padding modes must be equal")
if dims == 2:
return ops.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias,
padding_mode=spatial_padding_mode,
)
elif dims == 3:
if causal:
return CausalConv3d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias,
spatial_padding_mode=spatial_padding_mode,
)
return ops.Conv3d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias,
padding_mode=spatial_padding_mode,
)
elif dims == (2, 1):
return DualConv3d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias=bias,
padding_mode=spatial_padding_mode,
)
else:
raise ValueError(f"unsupported dimensions: {dims}")
def make_linear_nd(
dims: int,
in_channels: int,
out_channels: int,
bias=True,
):
if dims == 2:
return ops.Conv2d(
in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias
)
elif dims == 3 or dims == (2, 1):
return ops.Conv3d(
in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias
)
else:
raise ValueError(f"unsupported dimensions: {dims}")
================================================
FILE: lightricks/vae/dual_conv3d.py
================================================
import math
from typing import Tuple, Union
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange
class DualConv3d(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride: Union[int, Tuple[int, int, int]] = 1,
padding: Union[int, Tuple[int, int, int]] = 0,
dilation: Union[int, Tuple[int, int, int]] = 1,
groups=1,
bias=True,
padding_mode="zeros",
):
super(DualConv3d, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.padding_mode = padding_mode
# Ensure kernel_size, stride, padding, and dilation are tuples of length 3
if isinstance(kernel_size, int):
kernel_size = (kernel_size, kernel_size, kernel_size)
if kernel_size == (1, 1, 1):
raise ValueError(
"kernel_size must be greater than 1. Use make_linear_nd instead."
)
if isinstance(stride, int):
stride = (stride, stride, stride)
if isinstance(padding, int):
padding = (padding, padding, padding)
if isinstance(dilation, int):
dilation = (dilation, dilation, dilation)
# Set parameters for convolutions
self.groups = groups
self.bias = bias
# Define the size of the channels after the first convolution
intermediate_channels = (
out_channels if in_channels < out_channels else in_channels
)
# Define parameters for the first convolution
self.weight1 = nn.Parameter(
torch.Tensor(
intermediate_channels,
in_channels // groups,
1,
kernel_size[1],
kernel_size[2],
)
)
self.stride1 = (1, stride[1], stride[2])
self.padding1 = (0, padding[1], padding[2])
self.dilation1 = (1, dilation[1], dilation[2])
if bias:
self.bias1 = nn.Parameter(torch.Tensor(intermediate_channels))
else:
self.register_parameter("bias1", None)
# Define parameters for the second convolution
self.weight2 = nn.Parameter(
torch.Tensor(
out_channels, intermediate_channels // groups, kernel_size[0], 1, 1
)
)
self.stride2 = (stride[0], 1, 1)
self.padding2 = (padding[0], 0, 0)
self.dilation2 = (dilation[0], 1, 1)
if bias:
self.bias2 = nn.Parameter(torch.Tensor(out_channels))
else:
self.register_parameter("bias2", None)
# Initialize weights and biases
self.reset_parameters()
def reset_parameters(self):
nn.init.kaiming_uniform_(self.weight1, a=math.sqrt(5))
nn.init.kaiming_uniform_(self.weight2, a=math.sqrt(5))
if self.bias:
fan_in1, _ = nn.init._calculate_fan_in_and_fan_out(self.weight1)
bound1 = 1 / math.sqrt(fan_in1)
nn.init.uniform_(self.bias1, -bound1, bound1)
fan_in2, _ = nn.init._calculate_fan_in_and_fan_out(self.weight2)
bound2 = 1 / math.sqrt(fan_in2)
nn.init.uniform_(self.bias2, -bound2, bound2)
def forward(self, x, use_conv3d=False, skip_time_conv=False):
if use_conv3d:
return self.forward_with_3d(x=x, skip_time_conv=skip_time_conv)
else:
return self.forward_with_2d(x=x, skip_time_conv=skip_time_conv)
def forward_with_3d(self, x, skip_time_conv):
# First convolution
x = F.conv3d(
x,
self.weight1,
self.bias1,
self.stride1,
self.padding1,
self.dilation1,
self.groups,
padding_mode=self.padding_mode,
)
if skip_time_conv:
return x
# Second convolution
x = F.conv3d(
x,
self.weight2,
self.bias2,
self.stride2,
self.padding2,
self.dilation2,
self.groups,
padding_mode=self.padding_mode,
)
return x
def forward_with_2d(self, x, skip_time_conv):
b, c, d, h, w = x.shape
# First 2D convolution
x = rearrange(x, "b c d h w -> (b d) c h w")
# Squeeze the depth dimension out of weight1 since it's 1
weight1 = self.weight1.squeeze(2)
# Select stride, padding, and dilation for the 2D convolution
stride1 = (self.stride1[1], self.stride1[2])
padding1 = (self.padding1[1], self.padding1[2])
dilation1 = (self.dilation1[1], self.dilation1[2])
x = F.conv2d(
x,
weight1,
self.bias1,
stride1,
padding1,
dilation1,
self.groups,
padding_mode=self.padding_mode,
)
_, _, h, w = x.shape
if skip_time_conv:
x = rearrange(x, "(b d) c h w -> b c d h w", b=b)
return x
# Second convolution which is essentially treated as a 1D convolution across the 'd' dimension
x = rearrange(x, "(b d) c h w -> (b h w) c d", b=b)
# Reshape weight2 to match the expected dimensions for conv1d
weight2 = self.weight2.squeeze(-1).squeeze(-1)
# Use only the relevant dimension for stride, padding, and dilation for the 1D convolution
stride2 = self.stride2[0]
padding2 = self.padding2[0]
dilation2 = self.dilation2[0]
x = F.conv1d(
x,
weight2,
self.bias2,
stride2,
padding2,
dilation2,
self.groups,
padding_mode=self.padding_mode,
)
x = rearrange(x, "(b h w) c d -> b c d h w", b=b, h=h, w=w)
return x
@property
def weight(self):
return self.weight2
def test_dual_conv3d_consistency():
# Initialize parameters
in_channels = 3
out_channels = 5
kernel_size = (3, 3, 3)
stride = (2, 2, 2)
padding = (1, 1, 1)
# Create an instance of the DualConv3d class
dual_conv3d = DualConv3d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias=True,
)
# Example input tensor
test_input = torch.randn(1, 3, 10, 10, 10)
# Perform forward passes with both 3D and 2D settings
output_conv3d = dual_conv3d(test_input, use_conv3d=True)
output_2d = dual_conv3d(test_input, use_conv3d=False)
# Assert that the outputs from both methods are sufficiently close
assert torch.allclose(
output_conv3d, output_2d, atol=1e-6
), "Outputs are not consistent between 3D and 2D convolutions."
================================================
FILE: lightricks/vae/pixel_norm.py
================================================
import torch
from torch import nn
class PixelNorm(nn.Module):
def __init__(self, dim=1, eps=1e-8):
super(PixelNorm, self).__init__()
self.dim = dim
self.eps = eps
def forward(self, x):
return x / torch.sqrt(torch.mean(x**2, dim=self.dim, keepdim=True) + self.eps)
================================================
FILE: loaders.py
================================================
import folder_paths
import torch
import comfy.samplers
import comfy.sample
import comfy.sampler_helpers
import comfy.model_sampling
import comfy.latent_formats
import comfy.sd
import comfy.clip_vision
import comfy.supported_models
from comfy.utils import load_torch_file
# Documentation: Self-documenting code
# Instructions for use: Obvious
# Expected results: Fork desync
# adapted from https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py
clip_types = ["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "hunyuan_dit", "flux", "mochi", "ltxv", "hunyuan_video", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace"]
class BaseModelLoader:
@staticmethod
def load_taesd(name):
sd = {}
approx_vaes = folder_paths.get_filename_list("vae_approx")
encoder = next(filter(lambda a: a.startswith(f"{name}_encoder."), approx_vaes))
decoder = next(filter(lambda a: a.startswith(f"{name}_decoder."), approx_vaes))
enc = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("vae_approx", encoder))
for k in enc:
sd[f"taesd_encoder.{k}"] = enc[k]
dec = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("vae_approx", decoder))
for k in dec:
sd[f"taesd_decoder.{k}"] = dec[k]
# VAE scale and shift mapping
vae_params = {
"taesd": (0.18215, 0.0),
"taesdxl": (0.13025, 0.0),
"taesd3": (1.5305, 0.0609),
"taef1": (0.3611, 0.1159)
}
if name in vae_params:
scale, shift = vae_params[name]
sd["vae_scale"] = torch.tensor(scale)
sd["vae_shift"] = torch.tensor(shift)
return sd
@staticmethod
def guess_clip_type(model):
import comfy.model_base as mb
type_map = [
(mb.SDXLRefiner, "sdxl"),
(mb.SDXL, "sdxl"),
(mb.SD15_instructpix2pix, "stable_diffusion"),
(mb.SDXL_instructpix2pix, "sdxl"),
(mb.StableCascade_C, "stable_cascade"),
(mb.StableCascade_B, "stable_cascade"),
(mb.Flux, "flux"),
(mb.LTXV, "ltxv"),
(mb.HunyuanDiT, "hunyuan_dit"),
(mb.HunyuanVideo, "hunyuan_video"),
(mb.HunyuanVideoI2V, "hunyuan_video"),
(mb.HunyuanVideoSkyreelsI2V, "hunyuan_video"),
(mb.PixArt, "pixart"),
(mb.CosmosVideo, "cosmos"),
(mb.Lumina2, "lumina2"),
(mb.WAN21, "wan"),
(mb.WAN21_Vace, "wan"),
(mb.WAN21_Camera, "wan"),
(mb.HiDream, "hidream"),
(mb.Chroma, "chroma"),
(mb.ACEStep, "ace"),
(mb.SD3, "sd3"),
(mb.GenmoMochi, "mochi"),
]
for cls, clip_type in type_map:
if isinstance(model, cls):
return clip_type.upper()
# fallback
known_types = {
"stable_diffusion", "stable_cascade", "sd3", "stable_audio", "hunyuan_dit", "flux", "mochi", "ltxv",
"hunyuan_video", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace"
}
class_name = model.__class__.__name__.lower()
for t in known_types:
if t in class_name:
return t.upper()
default_clip_type = "stable_diffusion"
return default_clip_type.upper()
@staticmethod
def get_model_files():
return [f for f in folder_paths.get_filename_list("checkpoints") +
folder_paths.get_filename_list("diffusion_models")
if f.endswith((".ckpt", ".safetensors", ".sft", ".pt"))]
@staticmethod
def get_weight_options():
return ["default", "fp8_e4m3fn", "fp8_e4m3fn_fast", "fp8_e5m2"]
@staticmethod
def get_clip_options():
return [".use_ckpt_clip"] + folder_paths.get_filename_list("text_encoders")
@staticmethod
def vae_list():
vaes = folder_paths.get_filename_list("vae")
approx_vaes = folder_paths.get_filename_list("vae_approx")
sdxl_taesd_enc = False
sdxl_taesd_dec = False
sd1_taesd_enc = False
sd1_taesd_dec = False
sd3_taesd_enc = False
sd3_taesd_dec = False
f1_taesd_enc = False
f1_taesd_dec = False
for v in approx_vaes:
if v.startswith("taesd_decoder."):
sd1_taesd_dec = True
elif v.startswith("taesd_encoder."):
sd1_taesd_enc = True
elif v.startswith("taesdxl_decoder."):
sdxl_taesd_dec = True
elif v.startswith("taesdxl_encoder."):
sdxl_taesd_enc = True
elif v.startswith("taesd3_decoder."):
sd3_taesd_dec = True
elif v.startswith("taesd3_encoder."):
sd3_taesd_enc = True
elif v.startswith("taef1_encoder."):
f1_taesd_enc = True
elif v.startswith("taef1_decoder."):
f1_taesd_dec = True
if sd1_taesd_dec and sd1_taesd_enc:
vaes.append("taesd")
if sdxl_taesd_dec and sdxl_taesd_enc:
vaes.append("taesdxl")
if sd3_taesd_dec and sd3_taesd_enc:
vaes.append("taesd3")
if f1_taesd_dec and f1_taesd_enc:
vaes.append("taef1")
return vaes
def process_weight_dtype(self, weight_dtype):
model_options = {}
if weight_dtype == "fp8_e4m3fn":
model_options["dtype"] = torch.float8_e4m3fn
elif weight_dtype == "fp8_e4m3fn_fast":
model_options["dtype"] = torch.float8_e4m3fn
model_options["fp8_optimizations"] = True
elif weight_dtype == "fp8_e5m2":
model_options["dtype"] = torch.float8_e5m2
return model_options
def load_checkpoint(self, model_name, output_vae, output_clip, model_options):
try:
ckpt_path = folder_paths.get_full_path_or_raise("checkpoints", model_name)
out = None
try:
out = comfy.sd.load_checkpoint_guess_config(
ckpt_path,
output_vae=output_vae,
output_clip=output_clip,
embedding_directory=folder_paths.get_folder_paths("embeddings"),
model_options=model_options
)
except RuntimeError as e:
if "ERROR: Could not detect model type of:" in str(e):
error_msg = ""
if output_vae is True:
error_msg += "Model/Checkpoint file does not contain a VAE\n"
if output_clip is True:
error_msg += "Model/Checkpoint file does not contain a CLIP\n"
if error_msg != "":
raise ValueError(error_msg)
else:
out = (comfy.sd.load_diffusion_model(ckpt_path, model_options),)
else:
raise e
return out
except FileNotFoundError:
ckpt_path = folder_paths.get_full_path_or_raise("diffusion_models", model_name)
model = comfy.sd.load_diffusion_model(ckpt_path, model_options=model_options)
return (model, )
def load_vae(self, vae_name, ckpt_out):
if vae_name == ".use_ckpt_vae":
if ckpt_out[2] is None:
raise ValueError("Model does not have a VAE")
return ckpt_out[2]
elif vae_name in ["taesd", "taesdxl", "taesd3", "taef1"]:
sd = self.load_taesd(vae_name)
return comfy.sd.VAE(sd=sd)
elif vae_name == ".none":
return None
else:
vae_path = folder_paths.get_full_path_or_raise("vae", vae_name)
sd = comfy.utils.load_torch_file(vae_path)
return comfy.sd.VAE(sd=sd)
def load_clipvision(ckpt_path):
sd = load_torch_file(ckpt_path)
clip_vision = comfy.clip_vision.load(ckpt_path)
return clip_vision
class FluxLoader(BaseModelLoader):
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model_name": (s.get_model_files(),),
"weight_dtype": (s.get_weight_options(),),
"clip_name1": (s.get_clip_options(),),
"clip_name2_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),),
"vae_name": ([".use_ckpt_vae"] + s.vae_list(),),
"clip_vision_name": ([".none"] + folder_paths.get_filename_list("clip_vision"),),
"style_model_name": ([".none"] + folder_paths.get_filename_list("style_models"),),
}}
RETURN_TYPES = ("MODEL", "CLIP", "VAE", "CLIP_VISION", "STYLE_MODEL")
RETURN_NAMES = ("model", "clip", "vae", "clip_vision", "style_model")
FUNCTION = "main"
CATEGORY = "RES4LYF/loaders"
def main(self, model_name, weight_dtype, clip_name1, clip_name2_opt, vae_name, clip_vision_name, style_model_name):
model_options = self.process_weight_dtype(weight_dtype)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
if clip_name1 == ".use_ckpt_clip" and clip_name2_opt != ".none":
raise ValueError("Cannot specify both \".use_ckpt_clip\" and another clip")
output_vae = vae_name == ".use_ckpt_vae"
output_clip = clip_name1 == ".use_ckpt_clip"
ckpt_out = self.load_checkpoint(model_name, output_vae, output_clip, model_options)
if clip_name1 == ".use_ckpt_clip":
if ckpt_out[1] is None:
raise ValueError("Model does not have a clip")
clip = ckpt_out[1]
else:
clip_paths = [folder_paths.get_full_path_or_raise("text_encoders", clip_name1)]
if clip_name2_opt != ".none":
clip_paths.append(folder_paths.get_full_path_or_raise("text_encoders", clip_name2_opt))
clip = comfy.sd.load_clip(clip_paths,
embedding_directory=folder_paths.get_folder_paths("embeddings"),
clip_type=comfy.sd.CLIPType.FLUX)
clip_vision = None if clip_vision_name == ".none" else \
load_clipvision(folder_paths.get_full_path_or_raise("clip_vision", clip_vision_name))
style_model = None if style_model_name == ".none" else \
comfy.sd.load_style_model(folder_paths.get_full_path_or_raise("style_models", style_model_name))
vae = self.load_vae(vae_name, ckpt_out)
return (ckpt_out[0], clip, vae, clip_vision, style_model)
class SD35Loader(BaseModelLoader):
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model_name": (s.get_model_files(),),
"weight_dtype": (s.get_weight_options(),),
"clip_name1": (s.get_clip_options(),),
"clip_name2_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),),
"clip_name3_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),),
"vae_name": ([".use_ckpt_vae"] + folder_paths.get_filename_list("vae") + ["taesd", "taesdxl", "taesd3", "taef1"],),
}}
RETURN_TYPES = ("MODEL", "CLIP", "VAE")
RETURN_NAMES = ("model", "clip", "vae")
FUNCTION = "main"
CATEGORY = "RES4LYF/loaders"
def main(self, model_name, weight_dtype, clip_name1, clip_name2_opt, clip_name3_opt, vae_name):
model_options = self.process_weight_dtype(weight_dtype)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
if clip_name1 == ".use_ckpt_clip" and (clip_name2_opt != ".none" or clip_name3_opt != ".none"):
raise ValueError("Cannot specify both \".use_ckpt_clip\" and another clip")
output_vae = vae_name == ".use_ckpt_vae"
output_clip = clip_name1 == ".use_ckpt_clip"
ckpt_out = self.load_checkpoint(model_name, output_vae, output_clip, model_options)
if clip_name1 == ".use_ckpt_clip":
if ckpt_out[1] is None:
raise ValueError("Model does not have a clip")
clip = ckpt_out[1]
else:
clip_paths = [folder_paths.get_full_path_or_raise("text_encoders", clip_name1)]
for clip_name in [clip_name2_opt, clip_name3_opt]:
if clip_name != ".none":
clip_paths.append(folder_paths.get_full_path_or_raise("text_encoders", clip_name))
clip = comfy.sd.load_clip(clip_paths,
embedding_directory=folder_paths.get_folder_paths("embeddings"),
clip_type=comfy.sd.CLIPType.SD3)
vae = self.load_vae(vae_name, ckpt_out)
return (ckpt_out[0], clip, vae)
class RES4LYFModelLoader(BaseModelLoader):
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model_name": (s.get_model_files(),),
"weight_dtype": (s.get_weight_options(),),
"clip_name1_opt": ([".none"] + s.get_clip_options(),),
"clip_name2_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),),
"clip_name3_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),),
"clip_name4_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),),
"clip_type": ([".auto"] + clip_types,),
"vae_name": ([".none", ".use_ckpt_vae"] + folder_paths.get_filename_list("vae") + ["taesd", "taesdxl", "taesd3", "taef1"],),
}}
RETURN_TYPES = ("MODEL", "CLIP", "VAE")
RETURN_NAMES = ("model", "clip", "vae")
FUNCTION = "main"
CATEGORY = "RES4LYF/loaders"
def main(self, model_name, weight_dtype, clip_name1_opt, clip_name2_opt, clip_name3_opt, clip_name4_opt, clip_type, vae_name):
model_options = self.process_weight_dtype(weight_dtype)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
if clip_name1_opt == ".use_ckpt_clip" and (clip_name2_opt != ".none" or clip_name3_opt != ".none" or clip_name4_opt != ".none"):
raise ValueError("Cannot specify both \".use_ckpt_clip\" and another clip")
output_vae = vae_name == ".use_ckpt_vae"
output_clip = clip_name1_opt == ".use_ckpt_clip"
ckpt_out = self.load_checkpoint(model_name, output_vae, output_clip, model_options)
if clip_name1_opt == ".use_ckpt_clip":
if ckpt_out[1] is None:
raise ValueError("Model does not have a clip")
clip = ckpt_out[1]
elif clip_name1_opt == ".none":
clip = None
else:
clip_paths = [folder_paths.get_full_path_or_raise("text_encoders", clip_name1_opt)]
for clip_name in [clip_name2_opt, clip_name3_opt, clip_name4_opt]:
if clip_name != ".none":
clip_paths.append(folder_paths.get_full_path_or_raise("text_encoders", clip_name))
if "auto" in clip_type and ckpt_out[0].model is not None:
sdCLIPType = getattr(comfy.sd.CLIPType, self.guess_clip_type(ckpt_out[0].model), comfy.sd.CLIPType.STABLE_DIFFUSION)
else:
sdCLIPType = getattr(comfy.sd.CLIPType, clip_type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION)
clip = comfy.sd.load_clip(clip_paths,
embedding_directory=folder_paths.get_folder_paths("embeddings"),
clip_type=sdCLIPType)
vae = self.load_vae(vae_name, ckpt_out)
return (ckpt_out[0], clip, vae)
from .style_transfer import Retrojector
import torch.nn as nn
class LayerPatcher:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"embedder": (s.get_model_patches(),),
"gates": (s.get_model_patches(),),
"last_layer": (s.get_model_patches(),),
"dtype": (["bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
#"retrojector": (s.get_model_patches(),),
}}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
FUNCTION = "main"
CATEGORY = "RES4LYF/patchers"
@staticmethod
def get_model_patches():
return [f for f in folder_paths.get_filename_list("diffusion_models") if f.endswith((".safetensors", ".sft"))]
def main(self, model, embedder, gates, last_layer, retrojector=None, dtype="float64"):
dtype = getattr(torch, dtype)
embedder = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("diffusion_models", embedder))
last_layer = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("diffusion_models", last_layer))
#retrojector = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("diffusion_models", retrojector))
gates = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("diffusion_models", gates))
m = model.model.diffusion_model
if embedder:
m.x_embedder.proj = nn.Linear(
m.x_embedder.proj.in_features,
m.x_embedder.proj.out_features,
bias=True,
device=m.x_embedder.proj.weight.data.device,
dtype=dtype
)
m.x_embedder.proj.weight.data = embedder['x_embedder.proj.weight'].to(dtype).cuda()
m.x_embedder.proj.bias.data = embedder['x_embedder.proj.bias'].to(dtype).cuda()
if gates:
for key, tensor in gates.items():
#print(f"Patching {key} with shape {tensor.shape}")
set_nested_attr(model=m, key=key, value=tensor, dtype=dtype)
if last_layer:
m.final_layer.linear.weight.data = last_layer['final_layer.linear.weight'].to(dtype).cuda()
m.final_layer.linear.bias.data = last_layer['final_layer.linear.bias'].to(dtype).cuda()
m.final_layer.adaLN_modulation[1].weight.data = last_layer['final_layer.adaLN_modulation.1.weight'].to(dtype).cuda()
m.final_layer.adaLN_modulation[1].bias.data = last_layer['final_layer.adaLN_modulation.1.bias'].to(dtype).cuda()
#if retrojector:
# m.Retrojector = Retrojector(model.model.diffusion_model.img_in, pinv_dtype=style_dtype, dtype=style_dtype)
# m.final_layer.linear.weight.data = last_layer['final_layer.linear.weight']
# m.final_layer.linear.bias.data = last_layer['final_layer.linear.bias']
# m.final_layer.adaLN_modulation[1].weight.data = last_layer['final_layer.adaLN_modulation.1.weight']
# m.final_layer.adaLN_modulation[1].bias.data = last_layer['final_layer.adaLN_modulation.1.bias']
return (model,)
def set_nested_attr(model, key, value, dtype):
parts = key.split(".")
attr = model
for p in parts[:-1]:
if p.isdigit():
attr = attr[int(p)]
else:
attr = getattr(attr, p)
getattr(attr, parts[-1]).data.copy_(value.to(getattr(attr, parts[-1]).device, dtype=dtype))
================================================
FILE: misc_scripts/replace_metadata.py
================================================
#!/usr/bin/env python3
import argparse
from PIL import Image
from PIL.PngImagePlugin import PngInfo
def extract_metadata(image_path):
image = Image.open(image_path)
metadata = image.info
return metadata
def replace_metadata(source_image_path, target_image_path, output_image_path):
metadata = extract_metadata(source_image_path)
target_image = Image.open(target_image_path)
png_info = PngInfo()
for key, value in metadata.items():
png_info.add_text(key, str(value))
target_image.save(output_image_path, pnginfo=png_info)
def main():
parser = argparse.ArgumentParser(description="Copy metadata from one PNG image to another.")
parser.add_argument('source', type=str, help="Path to the source PNG image with the metadata.")
parser.add_argument('target', type=str, help="Path to the target PNG image to replace metadata.")
parser.add_argument('output', type=str, help="Path for the output PNG image with replaced metadata.")
args = parser.parse_args()
replace_metadata(args.source, args.target, args.output)
print(f"Metadata from '{args.source}' has been copied to '{args.output}'.")
if __name__ == "__main__":
main()
================================================
FILE: models.py
================================================
import torch
import types
from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar
import re
import folder_paths
import os
import json
import math
import comfy.samplers
import comfy.sample
import comfy.sampler_helpers
import comfy.utils
import comfy.model_management
from comfy.cli_args import args
from .flux.redux import ReReduxImageEncoder
from comfy.ldm.flux.redux import ReduxImageEncoder
from comfy.ldm.flux.model import Flux
from comfy.ldm.flux.layers import SingleStreamBlock, DoubleStreamBlock
from .flux.model import ReFlux
from .flux.layers import SingleStreamBlock as ReSingleStreamBlock, DoubleStreamBlock as ReDoubleStreamBlock
from comfy.ldm.flux.model import Flux
from comfy.ldm.flux.layers import SingleStreamBlock, DoubleStreamBlock
from comfy.ldm.hidream.model import HiDreamImageTransformer2DModel
from comfy.ldm.hidream.model import HiDreamImageBlock, HiDreamImageSingleTransformerBlock, HiDreamImageTransformerBlock, HiDreamAttention
from .hidream.model import HDModel
from .hidream.model import HDBlock, HDBlockDouble, HDBlockSingle, HDAttention, HDMoEGate, HDMOEFeedForwardSwiGLU, HDFeedForwardSwiGLU, HDLastLayer
from comfy.ldm.modules.diffusionmodules.mmdit import OpenAISignatureMMDITWrapper, JointBlock
from .sd35.mmdit import ReOpenAISignatureMMDITWrapper, ReJointBlock
from comfy.ldm.aura.mmdit import MMDiT, DiTBlock, MMDiTBlock, SingleAttention, DoubleAttention
from .aura.mmdit import ReMMDiT, ReDiTBlock, ReMMDiTBlock, ReSingleAttention, ReDoubleAttention
from comfy.ldm.wan.model import WanAttentionBlock, WanI2VCrossAttention, WanModel, WanSelfAttention, WanT2VCrossAttention
from .wan.model import ReWanAttentionBlock, ReWanI2VCrossAttention, ReWanModel, ReWanRawSelfAttention, ReWanSelfAttention, ReWanSlidingSelfAttention, ReWanT2VSlidingCrossAttention, ReWanT2VCrossAttention, ReWanT2VRawCrossAttention
from comfy.ldm.chroma.model import Chroma
from comfy.ldm.chroma.layers import SingleStreamBlock as ChromaSingleStreamBlock, DoubleStreamBlock as ChromaDoubleStreamBlock
from .chroma.model import ReChroma
from .chroma.layers import ReChromaSingleStreamBlock, ReChromaDoubleStreamBlock
from comfy.ldm.lightricks.model import LTXVModel
#from comfy.ldm.chroma.layers import SingleStreamBlock as ChromaSingleStreamBlock, DoubleStreamBlock as ChromaDoubleStreamBlock
from .lightricks.model import ReLTXVModel
#from .chroma.layers import ReChromaSingleStreamBlock, ReChromaDoubleStreamBlock
from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel, ResBlock
from comfy.ldm.modules.attention import SpatialTransformer, BasicTransformerBlock, CrossAttention
from .sd.openaimodel import ReUNetModel, ReResBlock
from .sd.attention import ReBasicTransformerBlock, ReCrossAttention, ReSpatialTransformer
from .latents import get_orthogonal, get_cosine_similarity
from .style_transfer import StyleWCT, WaveletStyleWCT, Retrojector, StyleMMDiT_Model
from .res4lyf import RESplain
from .helper import parse_range_string
from comfy.model_sampling import *
class PRED:
TYPE_VP = {CONST}
TYPE_VE = {EPS}
TYPE_VPRED = {V_PREDICTION, EDM}
TYPE_X0 = {X0, IMG_TO_IMG}
TYPE_ALL = TYPE_VP | TYPE_VE | TYPE_VPRED | TYPE_X0
@classmethod
def get_type(cls, model_sampling):
bases = type(model_sampling).__mro__
return next((v_type for v_type in bases if v_type in cls.TYPE_ALL), None)
def time_snr_shift_exponential(alpha, t):
return math.exp(alpha) / (math.exp(alpha) + (1 / t - 1) ** 1.0)
def time_snr_shift_linear(alpha, t):
if alpha == 1.0:
return t
return alpha * t / (1 + (alpha - 1) * t)
COMPILE_MODES = ["default", "max-autotune", "max-autotune-no-cudagraphs", "reduce-overhead"]
class TorchCompileModels:
def __init__(self):
self._compiled = False
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model" : ("MODEL",),
"backend" : (["inductor", "cudagraphs"],),
"fullgraph" : ("BOOLEAN", {"default": False, "tooltip": "Enable full graph mode"}),
"mode" : (COMPILE_MODES, {"default": "default"}),
"dynamic" : ("BOOLEAN", {"default": False, "tooltip": "Enable dynamic mode"}),
"dynamo_cache_size_limit" : ("INT", {"default": 64, "min": 0, "max": 1024, "step": 1, "tooltip": "torch._dynamo.config.cache_size_limit"}),
"triton_max_block_x" : ("INT", {"default": 0, "min": 0, "max": 4294967296, "step": 1})
}}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
FUNCTION = "main"
CATEGORY = "RES4LYF/model_patches"
def main(self,
model,
backend = "inductor",
mode = "default",
fullgraph = False,
dynamic = False,
dynamo_cache_size_limit = 64,
triton_max_block_x = 0,
):
m = model.clone()
diffusion_model = m.get_model_object("diffusion_model")
torch._dynamo.config.cache_size_limit = dynamo_cache_size_limit
if triton_max_block_x > 0:
import os
os.environ["TRITON_MAX_BLOCK_X"] = "4096"
if not self._compiled:
try:
if hasattr(diffusion_model, "double_blocks"):
for i, block in enumerate(diffusion_model.double_blocks):
m.add_object_patch(f"diffusion_model.double_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
if hasattr(diffusion_model, "single_blocks"):
for i, block in enumerate(diffusion_model.single_blocks):
m.add_object_patch(f"diffusion_model.single_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
if hasattr(diffusion_model, "double_layers"):
for i, block in enumerate(diffusion_model.double_layers):
m.add_object_patch(f"diffusion_model.double_layers.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
if hasattr(diffusion_model, "single_layers"):
for i, block in enumerate(diffusion_model.single_layers):
m.add_object_patch(f"diffusion_model.single_layers.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
if hasattr(diffusion_model, "double_stream_blocks"):
for i, block in enumerate(diffusion_model.double_stream_blocks):
m.add_object_patch(f"diffusion_model.double_stream_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
if hasattr(diffusion_model, "single_stream_blocks"):
for i, block in enumerate(diffusion_model.single_stream_blocks):
m.add_object_patch(f"diffusion_model.single_stream_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
if hasattr(diffusion_model, "joint_blocks"):
for i, block in enumerate(diffusion_model.joint_blocks):
m.add_object_patch(f"diffusion_model.joint_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
if hasattr(diffusion_model, "blocks"):
for i, block in enumerate(diffusion_model.blocks):
m.add_object_patch(f"diffusion_model.blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
if self._compiled == False:
raise RuntimeError("Model not compiled. Verify that this is a Flux, SD3.5, HiDream, WAN, or Aura model!")
compile_settings = {
"backend": backend,
"mode": mode,
"fullgraph": fullgraph,
"dynamic": dynamic,
}
setattr(m.model, "compile_settings", compile_settings)
except:
raise RuntimeError("Failed to compile model. Verify that this is a Flux, SD3.5, HiDream, WAN, or Aura model!")
return (m, )
class ReWanPatcherAdvanced:
def __init__(self):
self.sliding_window_size = 0
self.sliding_window_self_attn = "false"
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model" : ("MODEL",),
#"self_attn_blocks" : ("STRING", {"default": "0,1,2,3,4,5,6,7,8,9,", "multiline": True}),
"self_attn_blocks" : ("STRING", {"default": "all", "multiline": True}),
"cross_attn_blocks" : ("STRING", {"default": "all", "multiline": True}),
"enable" : ("BOOLEAN", {"default": True}),
"sliding_window_self_attn" : (['false', 'standard', 'circular'], {"default": "false"}),
"sliding_window_frames" : ("INT", {"default": 60, "min": 4, "max": 0xffffffffffffffff, "step": 4, "tooltip": "How many real frames each frame sees. Divide frames by 4 to get real frames."}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
def main(self, model, self_attn_blocks, cross_attn_blocks, sliding_window_self_attn="false", sliding_window_frames=60, style_dtype="float32", enable=True, force=False):
style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None
model.model.diffusion_model.style_dtype = style_dtype
model.model.diffusion_model.proj_weights = None
model.model.diffusion_model.y0_adain_embed = None
sliding_window_size = sliding_window_frames // 4
self_attn_blocks = parse_range_string(self_attn_blocks)
cross_attn_blocks = parse_range_string(cross_attn_blocks)
T2V = type(model.model.model_config) is comfy.supported_models.WAN21_T2V
if (enable or force) and model.model.diffusion_model.__class__ == WanModel:
m = model.clone()
m.model.diffusion_model.__class__ = ReWanModel
m.model.diffusion_model.threshold_inv = False
for i, block in enumerate(m.model.diffusion_model.blocks):
block.__class__ = ReWanAttentionBlock
if i in self_attn_blocks:
if sliding_window_self_attn != "false":
block.self_attn.__class__ = ReWanSlidingSelfAttention
block.self_attn.winderz = sliding_window_size
block.self_attn.winderz_type = sliding_window_self_attn
else:
block.self_attn.__class__ = ReWanSelfAttention
block.self_attn.winderz_type = "false"
else:
block.self_attn.__class__ = ReWanRawSelfAttention
if i in cross_attn_blocks:
if T2V:
if False: #sliding_window_self_attn != "false":
block.cross_attn.__class__ = ReWanT2VSlidingCrossAttention
block.cross_attn.winderz = sliding_window_size
block.cross_attn.winderz_type = sliding_window_self_attn
else:
block.cross_attn.__class__ = ReWanT2VCrossAttention
else:
block.cross_attn.__class__ = ReWanI2VCrossAttention
block.idx = i
block.self_attn.idx = i
block.cross_attn.idx = i # 40 total blocks (i == 39)
elif enable and (sliding_window_self_attn != self.sliding_window_self_attn or sliding_window_size != self.sliding_window_size) and model.model.diffusion_model.__class__ == ReWanModel:
m = model.clone()
for i, block in enumerate(m.model.diffusion_model.blocks):
if i in self_attn_blocks:
block.self_attn.winderz = sliding_window_size
block.self_attn.winderz_type = sliding_window_self_attn
elif not enable and model.model.diffusion_model.__class__ == ReWanModel:
m = model.clone()
m.model.diffusion_model.__class__ = WanModel
for i, block in enumerate(m.model.diffusion_model.blocks):
block.__class__ = WanAttentionBlock
block.self_attn.__class__ = WanSelfAttention
block.cross_attn.__class__ = WanT2VCrossAttention
block.idx = i
elif model.model.diffusion_model.__class__ not in {ReWanModel, WanModel}:
raise ValueError("This node is for enabling regional conditioning for WAN only!")
m = model
return (m,)
class ReWanPatcher(ReWanPatcherAdvanced):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"model" : ("MODEL",),
"enable" : ("BOOLEAN", {"default": True}),
}
}
def main(self, model, enable=True, force=False):
return super().main(
model = model,
self_attn_blocks = "all",
cross_attn_blocks = "all",
enable = enable,
force = force
)
class ReDoubleStreamBlockNoMask(ReDoubleStreamBlock):
def forward(self, c, mask=None):
return super().forward(c, mask=None)
class ReSingleStreamBlockNoMask(ReSingleStreamBlock):
def forward(self, c, mask=None):
return super().forward(c, mask=None)
class ReFluxPatcherAdvanced:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model" : ("MODEL",),
"doublestream_blocks" : ("STRING", {"default": "all", "multiline": True}),
"singlestream_blocks" : ("STRING", {"default": "all", "multiline": True}),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
def main(self, model, doublestream_blocks, singlestream_blocks, style_dtype, enable=True, force=False):
doublestream_blocks = parse_range_string(doublestream_blocks)
singlestream_blocks = parse_range_string(singlestream_blocks)
style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None
model.model.diffusion_model.style_dtype = style_dtype
model.model.diffusion_model.proj_weights = None
model.model.diffusion_model.y0_adain_embed = None
model.model.diffusion_model.adain_pw_cache = None
model.model.diffusion_model.StyleWCT = StyleWCT()
model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.img_in, pinv_dtype=style_dtype, dtype=style_dtype)
if (enable or force) and model.model.diffusion_model.__class__ == Flux:
m = model.clone()
m.model.diffusion_model.__class__ = ReFlux
m.model.diffusion_model.threshold_inv = False
for i, block in enumerate(m.model.diffusion_model.double_blocks):
if i in doublestream_blocks:
block.__class__ = ReDoubleStreamBlock
else:
block.__class__ = ReDoubleStreamBlockNoMask
block.idx = i
for i, block in enumerate(m.model.diffusion_model.single_blocks):
if i in singlestream_blocks:
block.__class__ = ReSingleStreamBlock
else:
block.__class__ = ReSingleStreamBlockNoMask
block.idx = i
elif not enable and model.model.diffusion_model.__class__ == ReFlux:
m = model.clone()
m.model.diffusion_model.__class__ = Flux
for i, block in enumerate(m.model.diffusion_model.double_blocks):
block.__class__ = DoubleStreamBlock
block.idx = i
for i, block in enumerate(m.model.diffusion_model.single_blocks):
block.__class__ = SingleStreamBlock
block.idx = i
#elif model.model.diffusion_model.__class__ != Flux and model.model.diffusion_model.__class__ != ReFlux:
elif model.model.diffusion_model.__class__ not in {ReFlux, Flux}:
raise ValueError("This node is for enabling regional conditioning for Flux only!")
else:
m = model
return (m,)
class ReFluxPatcher(ReFluxPatcherAdvanced):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"model" : ("MODEL",),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
def main(self, model, style_dtype="float32", enable=True, force=False):
return super().main(
model = model,
doublestream_blocks = "all",
singlestream_blocks = "all",
style_dtype = style_dtype,
enable = enable,
force = force
)
class ReReduxPatcher:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"style_model" : ("STYLE_MODEL",),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("STYLE_MODEL",)
RETURN_NAMES = ("style_model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
EXPERIMENTAL = True
def main(self, style_model, style_dtype, enable=True, force=False):
style_model.model.style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None
style_model.model.proj_weights = None
style_model.model.y0_adain_embed = None
if (enable or force) and style_model.model.__class__ == ReduxImageEncoder:
m = style_model#.clone()
m.model.__class__ = ReReduxImageEncoder
m.model.threshold_inv = False
elif not enable and style_model.model.__class__ == ReReduxImageEncoder:
m = style_model#.clone()
m.model.__class__ = ReduxImageEncoder
elif style_model.model.__class__ not in {ReReduxImageEncoder, ReduxImageEncoder}:
raise ValueError("This node is for enabling style conditioning for Redux only!")
else:
m = style_model
return (m,)
class ReChromaDoubleStreamBlockNoMask(ReChromaDoubleStreamBlock):
def forward(self, c, mask=None):
return super().forward(c, mask=None)
class ReChromaSingleStreamBlockNoMask(ReChromaSingleStreamBlock):
def forward(self, c, mask=None):
return super().forward(c, mask=None)
class ReChromaPatcherAdvanced:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model" : ("MODEL",),
"doublestream_blocks" : ("STRING", {"default": "all", "multiline": True}),
"singlestream_blocks" : ("STRING", {"default": "all", "multiline": True}),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
def main(self, model, doublestream_blocks, singlestream_blocks, style_dtype, enable=True, force=False):
doublestream_blocks = parse_range_string(doublestream_blocks)
singlestream_blocks = parse_range_string(singlestream_blocks)
style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None
model.model.diffusion_model.style_dtype = style_dtype
model.model.diffusion_model.proj_weights = None
model.model.diffusion_model.y0_adain_embed = None
model.model.diffusion_model.StyleWCT = StyleWCT()
model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.img_in, pinv_dtype=style_dtype, dtype=style_dtype)
if (enable or force) and model.model.diffusion_model.__class__ == Chroma:
m = model.clone()
m.model.diffusion_model.__class__ = ReChroma
m.model.diffusion_model.threshold_inv = False
for i, block in enumerate(m.model.diffusion_model.double_blocks):
if i in doublestream_blocks:
block.__class__ = ReChromaDoubleStreamBlock
else:
block.__class__ = ReChromaDoubleStreamBlockNoMask
block.idx = i
for i, block in enumerate(m.model.diffusion_model.single_blocks):
if i in singlestream_blocks:
block.__class__ = ReChromaSingleStreamBlock
else:
block.__class__ = ReChromaSingleStreamBlockNoMask
block.idx = i
elif not enable and model.model.diffusion_model.__class__ == ReChroma:
m = model.clone()
m.model.diffusion_model.__class__ = Chroma
for i, block in enumerate(m.model.diffusion_model.double_blocks):
block.__class__ = DoubleStreamBlock
block.idx = i
for i, block in enumerate(m.model.diffusion_model.single_blocks):
block.__class__ = SingleStreamBlock
block.idx = i
#elif model.model.diffusion_model.__class__ != Chroma and model.model.diffusion_model.__class__ != ReChroma:
elif model.model.diffusion_model.__class__ not in {ReChroma, Chroma}:
raise ValueError("This node is for enabling regional conditioning for Chroma only!")
else:
m = model
return (m,)
class ReChromaPatcher(ReChromaPatcherAdvanced):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"model" : ("MODEL",),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
def main(self, model, style_dtype="float32", enable=True, force=False):
return super().main(
model = model,
doublestream_blocks = "all",
singlestream_blocks = "all",
style_dtype = style_dtype,
enable = enable,
force = force
)
"""class ReLTXVDoubleStreamBlockNoMask(ReLTXVDoubleStreamBlock):
def forward(self, c, mask=None):
return super().forward(c, mask=None)
class ReLTXVSingleStreamBlockNoMask(ReLTXVSingleStreamBlock):
def forward(self, c, mask=None):
return super().forward(c, mask=None)"""
class ReLTXVPatcherAdvanced:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model" : ("MODEL",),
"doublestream_blocks" : ("STRING", {"default": "all", "multiline": True}),
"singlestream_blocks" : ("STRING", {"default": "all", "multiline": True}),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
def main(self, model, doublestream_blocks, singlestream_blocks, style_dtype, enable=True, force=False):
doublestream_blocks = parse_range_string(doublestream_blocks)
singlestream_blocks = parse_range_string(singlestream_blocks)
style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None
model.model.diffusion_model.style_dtype = style_dtype
model.model.diffusion_model.proj_weights = None
model.model.diffusion_model.y0_adain_embed = None
model.model.diffusion_model.StyleWCT = StyleWCT()
model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.patchify_proj, pinv_dtype=style_dtype, dtype=style_dtype)
if (enable or force) and model.model.diffusion_model.__class__ == LTXVModel:
m = model.clone()
m.model.diffusion_model.__class__ = ReLTXVModel
m.model.diffusion_model.threshold_inv = False
"""for i, block in enumerate(m.model.diffusion_model.double_blocks):
if i in doublestream_blocks:
block.__class__ = ReChromaDoubleStreamBlock
else:
block.__class__ = ReChromaDoubleStreamBlockNoMask
block.idx = i
for i, block in enumerate(m.model.diffusion_model.single_blocks):
if i in singlestream_blocks:
block.__class__ = ReChromaSingleStreamBlock
else:
block.__class__ = ReChromaSingleStreamBlockNoMask
block.idx = i"""
elif not enable and model.model.diffusion_model.__class__ == ReLTXVModel:
m = model.clone()
m.model.diffusion_model.__class__ = LTXVModel
"""for i, block in enumerate(m.model.diffusion_model.double_blocks):
block.__class__ = DoubleStreamBlock
block.idx = i
for i, block in enumerate(m.model.diffusion_model.single_blocks):
block.__class__ = SingleStreamBlock
block.idx = i"""
#elif model.model.diffusion_model.__class__ != LTXVModel and model.model.diffusion_model.__class__ != ReLTXVModel:
elif model.model.diffusion_model.__class__ not in {ReLTXVModel, LTXVModel}:
raise ValueError("This node is for enabling regional conditioning for LTXV only!")
else:
m = model
return (m,)
class ReLTXVPatcher(ReLTXVPatcherAdvanced):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"model" : ("MODEL",),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
def main(self, model, style_dtype="float32", enable=True, force=False):
return super().main(
model = model,
doublestream_blocks = "all",
singlestream_blocks = "all",
style_dtype = style_dtype,
enable = enable,
force = force
)
class ReSDPatcherAdvanced:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model" : ("MODEL",),
"doublestream_blocks" : ("STRING", {"default": "all", "multiline": True}),
"singlestream_blocks" : ("STRING", {"default": "all", "multiline": True}),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
#EXPERIMENTAL = True
def main(self, model, doublestream_blocks, singlestream_blocks, style_dtype, enable=True, force=False):
doublestream_blocks = parse_range_string(doublestream_blocks)
singlestream_blocks = parse_range_string(singlestream_blocks)
style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None
model.model.diffusion_model.style_dtype = style_dtype
model.model.diffusion_model.proj_weights = None
model.model.diffusion_model.y0_adain_embed = None
model.model.diffusion_model.StyleWCT = StyleWCT()
model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.input_blocks[0][0], pinv_dtype=style_dtype, dtype=style_dtype, patch_size=1)
if (enable or force) and model.model.diffusion_model.__class__ == UNetModel:
m = model.clone()
m.model.diffusion_model.__class__ = ReUNetModel
m.model.diffusion_model.threshold_inv = False
for i in range(len(m.model.diffusion_model.input_blocks)):
for j in range(len(m.model.diffusion_model.input_blocks[i])):
if isinstance(m.model.diffusion_model.input_blocks[i][j], ResBlock):
m.model.diffusion_model.input_blocks[i][j].__class__ = ReResBlock
if isinstance(m.model.diffusion_model.input_blocks[i][j], SpatialTransformer):
m.model.diffusion_model.input_blocks[i][j].__class__ = ReSpatialTransformer
for k in range(len(m.model.diffusion_model.input_blocks[i][j].transformer_blocks)):
m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].__class__ = ReBasicTransformerBlock
m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].attn1.__class__ = ReCrossAttention
m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].attn2.__class__ = ReCrossAttention
#m.model.diffusion_model.middle_block[1].transformer_blocks[0].__class__ = ReBasicTransformerBlock
for i in range(len(m.model.diffusion_model.middle_block)):
if isinstance(m.model.diffusion_model.middle_block[i], ResBlock):
m.model.diffusion_model.middle_block[i].__class__ = ReResBlock
if isinstance(m.model.diffusion_model.middle_block[i], SpatialTransformer):
m.model.diffusion_model.middle_block[i].__class__ = ReSpatialTransformer
for k in range(len(m.model.diffusion_model.middle_block[i].transformer_blocks)):
m.model.diffusion_model.middle_block[i].transformer_blocks[k].__class__ = ReBasicTransformerBlock
m.model.diffusion_model.middle_block[i].transformer_blocks[k].attn1.__class__ = ReCrossAttention
m.model.diffusion_model.middle_block[i].transformer_blocks[k].attn2.__class__ = ReCrossAttention
for i in range(len(m.model.diffusion_model.output_blocks)):
for j in range(len(m.model.diffusion_model.output_blocks[i])):
if isinstance(m.model.diffusion_model.output_blocks[i][j], ResBlock):
m.model.diffusion_model.output_blocks[i][j].__class__ = ReResBlock
if isinstance(m.model.diffusion_model.output_blocks[i][j], SpatialTransformer):
m.model.diffusion_model.output_blocks[i][j].__class__ = ReSpatialTransformer
for k in range(len(m.model.diffusion_model.output_blocks[i][j].transformer_blocks)):
m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].__class__ = ReBasicTransformerBlock
m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].attn1.__class__ = ReCrossAttention
m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].attn2.__class__ = ReCrossAttention
elif not enable and model.model.diffusion_model.__class__ == ReUNetModel:
m = model.clone()
m.model.diffusion_model.__class__ = UNetModel
for i in range(len(m.model.diffusion_model.input_blocks)):
for j in range(len(m.model.diffusion_model.input_blocks[i])):
if isinstance(m.model.diffusion_model.input_blocks[i][j], ReResBlock):
m.model.diffusion_model.input_blocks[i][j].__class__ = ResBlock
if isinstance(m.model.diffusion_model.input_blocks[i][j], ReSpatialTransformer):
m.model.diffusion_model.input_blocks[i][j].__class__ = SpatialTransformer
for k in range(len(m.model.diffusion_model.input_blocks[i][j].transformer_blocks)):
m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].__class__ = BasicTransformerBlock
m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].attn1.__class__ = CrossAttention
m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].attn2.__class__ = CrossAttention
#m.model.diffusion_model.middle_block[1].transformer_blocks[0].__class__ = BasicTransformerBlock
for i in range(len(m.model.diffusion_model.middle_block)):
if isinstance(m.model.diffusion_model.middle_block[i], ReResBlock):
m.model.diffusion_model.middle_block[i].__class__ = ResBlock
if isinstance(m.model.diffusion_model.middle_block[i], ReSpatialTransformer):
m.model.diffusion_model.middle_block[i].__class__ = SpatialTransformer
for k in range(len(m.model.diffusion_model.middle_block[i].transformer_blocks)):
m.model.diffusion_model.middle_block[i].transformer_blocks[k].__class__ = BasicTransformerBlock
m.model.diffusion_model.middle_block[i].transformer_blocks[k].attn1.__class__ = CrossAttention
m.model.diffusion_model.middle_block[i].transformer_blocks[k].attn2.__class__ = CrossAttention
for i in range(len(m.model.diffusion_model.output_blocks)):
for j in range(len(m.model.diffusion_model.output_blocks[i])):
if isinstance(m.model.diffusion_model.output_blocks[i][j], ReResBlock):
m.model.diffusion_model.output_blocks[i[j]].__class__ = ResBlock
if isinstance(m.model.diffusion_model.output_blocks[i][j], ReSpatialTransformer):
m.model.diffusion_model.output_blocks[i[j]].__class__ = SpatialTransformer
for k in range(len(m.model.diffusion_model.output_blocks[i][j].transformer_blocks)):
m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].__class__ = BasicTransformerBlock
m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].attn1.__class__ = CrossAttention
m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].attn2.__class__ = CrossAttention
#elif model.model.diffusion_model.__class__ != UNetModel and model.model.diffusion_model.__class__ != ReUNetModel:
elif model.model.diffusion_model.__class__ not in {ReUNetModel, UNetModel}:
raise ValueError("This node is for enabling regional conditioning for SD1.5 and SDXL only!")
else:
m = model
return (m,)
class ReSDPatcher(ReSDPatcherAdvanced):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"model" : ("MODEL",),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
def main(self, model, style_dtype="float32", enable=True, force=False):
return super().main(
model = model,
doublestream_blocks = "all",
singlestream_blocks = "all",
style_dtype = style_dtype,
enable = enable,
force = force
)
class HDBlockDoubleNoMask(HDBlockDouble):
def forward(self, c, mask=None):
return super().forward(c, mask=None)
class HDBlockSingleNoMask(HDBlockSingle):
def forward(self, c, mask=None):
return super().forward(c, mask=None)
class ReHiDreamPatcherAdvanced:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model" : ("MODEL",),
"double_stream_blocks" : ("STRING", {"default": "all", "multiline": True}),
"single_stream_blocks" : ("STRING", {"default": "all", "multiline": True}),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
def main(self, model, double_stream_blocks, single_stream_blocks, style_dtype, enable=True, force=False):
double_stream_blocks = parse_range_string(double_stream_blocks)
single_stream_blocks = parse_range_string(single_stream_blocks)
style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None
model.model.diffusion_model.style_dtype = style_dtype
model.model.diffusion_model.proj_weights = None
model.model.diffusion_model.y0_adain_embed = None
model.model.diffusion_model.StyleWCT = StyleWCT()
model.model.diffusion_model.WaveletStyleWCT = WaveletStyleWCT()
model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.x_embedder.proj, pinv_dtype=style_dtype, dtype=style_dtype)
#model.model.diffusion_model.Endojector = Retrojector(model.model.diffusion_model.final_layer.linear, pinv_dtype=style_dtype, dtype=style_dtype, ENDO=True)
#model.model.diffusion_model.Style = StyleMMDiT_HiDream()
#model.model.diffusion_model.Style.Retrojector = Retrojector(model.model.diffusion_model.x_embedder.proj, pinv_dtype=style_dtype, dtype=style_dtype)
sort_buffer = {}
if (enable or force) and model.model.diffusion_model.__class__ == HiDreamImageTransformer2DModel:
m = model.clone()
m.model.diffusion_model.__class__ = HDModel
m.model.diffusion_model.threshold_inv = False
m.model.diffusion_model.final_layer.__class__ = HDLastLayer
m.model.diffusion_model.final_layer.linear.weight.data = m.model.diffusion_model.final_layer.linear.weight.data.to(torch.bfloat16)
m.model.diffusion_model.final_layer.linear.bias.data = m.model.diffusion_model.final_layer.linear.bias.data.to(torch.bfloat16)
for i, block in enumerate(m.model.diffusion_model.double_stream_blocks):
block.__class__ = HDBlock
if i in double_stream_blocks:
block.block.__class__ = HDBlockDouble
else:
block.block.__class__ = HDBlockDoubleNoMask
block.block.attn1.__class__ = HDAttention
block.block.ff_i.__class__ = HDMOEFeedForwardSwiGLU
block.block.ff_i.shared_experts.__class__ = HDFeedForwardSwiGLU
for j in range(len(block.block.ff_i.experts)):
block.block.ff_i.experts[j].__class__ = HDFeedForwardSwiGLU
block.block.ff_i.gate.__class__ = HDMoEGate
block.block.ff_t.__class__ = HDFeedForwardSwiGLU
block.block.attn1.single_stream = False
block.block.attn1.double_stream = True
block.block.sort_buffer = sort_buffer
block.block.attn1.sort_buffer = sort_buffer
block.idx = i
block.block.idx = i
block.block.attn1.idx = i
for i, block in enumerate(m.model.diffusion_model.single_stream_blocks):
block.__class__ = HDBlock
if i in single_stream_blocks:
block.block.__class__ = HDBlockSingle
else:
block.block.__class__ = HDBlockSingleNoMask
block.block.attn1.__class__ = HDAttention
block.block.ff_i.__class__ = HDMOEFeedForwardSwiGLU
block.block.ff_i.shared_experts.__class__ = HDFeedForwardSwiGLU
for j in range(len(block.block.ff_i.experts)):
block.block.ff_i.experts[j].__class__ = HDFeedForwardSwiGLU
block.block.ff_i.gate.__class__ = HDMoEGate
block.block.attn1.single_stream = True
block.block.attn1.double_stream = False
block.block.sort_buffer = sort_buffer
block.block.attn1.sort_buffer = sort_buffer
block.idx = i
block.block.idx = i
block.block.attn1.idx = i
elif not enable and model.model.diffusion_model.__class__ == HDModel:
m = model.clone()
m.model.diffusion_model.__class__ = HiDreamImageTransformer2DModel
for i, block in enumerate(m.model.diffusion_model.double_stream_blocks):
if i in double_stream_blocks:
block.__class__ = HiDreamImageBlock
block.block.__class__ = HiDreamImageTransformerBlock
block.block.attn1.__class__ = HiDreamAttention
block.idx = i
for i, block in enumerate(m.model.diffusion_model.single_stream_blocks):
if i in single_stream_blocks:
block.__class__ = HiDreamImageBlock
block.block.__class__ = HiDreamImageSingleTransformerBlock
block.block.attn1.__class__ = HiDreamAttention
block.idx = i
#elif model.model.diffusion_model.__class__ != HDModel and model.model.diffusion_model.__class__ != HiDreamImageTransformer2DModel:
elif model.model.diffusion_model.__class__ not in {HDModel, HiDreamImageTransformer2DModel}:
raise ValueError("This node is for enabling regional conditioning for HiDream only!")
else:
m = model
return (m,)
class ReHiDreamPatcher(ReHiDreamPatcherAdvanced):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"model" : ("MODEL",),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
def main(self, model, style_dtype="default", enable=True, force=False):
return super().main(
model = model,
double_stream_blocks = "all",
single_stream_blocks = "all",
style_dtype = style_dtype,
enable = enable,
force = force
)
class ReJointBlockNoMask(ReJointBlock):
def forward(self, c, mask=None):
return super().forward(c, mask=None)
class ReSD35PatcherAdvanced:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model" : ("MODEL",),
"joint_blocks" : ("STRING", {"default": "all", "multiline": True}),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
def main(self, model, joint_blocks, style_dtype, enable=True, force=False):
style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None
model.model.diffusion_model.style_dtype = style_dtype
model.model.diffusion_model.proj_weights = None
model.model.diffusion_model.y0_adain_embed = None
model.model.diffusion_model.StyleWCT = StyleWCT()
model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.x_embedder.proj, pinv_dtype=style_dtype, dtype=style_dtype)
joint_blocks = parse_range_string(joint_blocks)
if (enable or force) and model.model.diffusion_model.__class__ == OpenAISignatureMMDITWrapper:
m = model.clone()
m.model.diffusion_model.__class__ = ReOpenAISignatureMMDITWrapper
m.model.diffusion_model.threshold_inv = False
for i, block in enumerate(m.model.diffusion_model.joint_blocks):
if i in joint_blocks:
block.__class__ = ReJointBlock
else:
ReJointBlockNoMask
block.idx = i
elif not enable and model.model.diffusion_model.__class__ == ReOpenAISignatureMMDITWrapper:
m = model.clone()
m.model.diffusion_model.__class__ = OpenAISignatureMMDITWrapper
for i, block in enumerate(m.model.diffusion_model.joint_blocks):
block.__class__ = JointBlock
block.idx = i
elif model.model.diffusion_model.__class__ not in {ReOpenAISignatureMMDITWrapper, OpenAISignatureMMDITWrapper}:
raise ValueError("This node is for enabling regional conditioning for SD3.5 only!")
m = model
return (m,)
class ReSD35Patcher(ReSD35PatcherAdvanced):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"model" : ("MODEL",),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
def main(self, model, style_dtype="float32", enable=True, force=False):
return super().main(
model = model,
joint_blocks = "all",
style_dtype = style_dtype,
enable = enable,
force = force
)
class ReDoubleAttentionNoMask(ReDoubleAttention):
def forward(self, c, mask=None):
return super().forward(c, mask=None)
class ReSingleAttentionNoMask(ReSingleAttention):
def forward(self, c, mask=None):
return super().forward(c, mask=None)
class ReAuraPatcherAdvanced:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model" : ("MODEL",),
"doublelayer_blocks" : ("STRING", {"default": "all", "multiline": True}),
"singlelayer_blocks" : ("STRING", {"default": "all", "multiline": True}),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
def main(self, model, doublelayer_blocks, singlelayer_blocks, style_dtype, enable=True, force=False):
doublelayer_blocks = parse_range_string(doublelayer_blocks)
singlelayer_blocks = parse_range_string(singlelayer_blocks)
style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None
model.model.diffusion_model.style_dtype = style_dtype
model.model.diffusion_model.proj_weights = None
model.model.diffusion_model.y0_adain_embed = None
model.model.diffusion_model.StyleWCT = StyleWCT()
model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.init_x_linear, pinv_dtype=style_dtype, dtype=style_dtype)
if (enable or force) and model.model.diffusion_model.__class__ == MMDiT:
m = model.clone()
m.model.diffusion_model.__class__ = ReMMDiT
m.model.diffusion_model.threshold_inv = False
for i, block in enumerate(m.model.diffusion_model.double_layers):
block.__class__ = ReMMDiTBlock
if i in doublelayer_blocks:
block.attn.__class__ = ReDoubleAttention
else:
block.attn.__class__ = ReDoubleAttentionNoMask
block.idx = i
for i, block in enumerate(m.model.diffusion_model.single_layers):
block.__class__ = ReDiTBlock
if i in singlelayer_blocks:
block.attn.__class__ = ReSingleAttention
else:
block.attn.__class__ = ReSingleAttentionNoMask
block.idx = i
elif not enable and model.model.diffusion_model.__class__ == ReMMDiT:
m = model.clone()
m.model.diffusion_model.__class__ = MMDiT
for i, block in enumerate(m.model.diffusion_model.double_layers):
block.__class__ = MMDiTBlock
block.attn.__class__ = DoubleAttention
block.idx = i
for i, block in enumerate(m.model.diffusion_model.single_layers):
block.__class__ = DiTBlock
block.attn.__class__ = SingleAttention
block.idx = i
elif model.model.diffusion_model.__class__ not in {ReMMDiT, MMDiT}:
raise ValueError("This node is for enabling regional conditioning for AuraFlow only!")
m = model
return (m,)
class ReAuraPatcher(ReAuraPatcherAdvanced):
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"model" : ("MODEL",),
"style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}),
"enable" : ("BOOLEAN", {"default": True}),
}
}
def main(self, model, style_dtype="float32", enable=True, force=False):
return super().main(
model = model,
doublelayer_blocks = "all",
singlelayer_blocks = "all",
style_dtype = style_dtype,
enable = enable,
force = force
)
class FluxOrthoCFGPatcher:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"enable": ("BOOLEAN", {"default": True}),
"ortho_T5": ("BOOLEAN", {"default": True}),
"ortho_clip_L": ("BOOLEAN", {"default": True}),
"zero_clip_L": ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
CATEGORY = "RES4LYF/model_patches"
FUNCTION = "main"
EXPERIMENTAL = True
original_forward = Flux.forward
@staticmethod
def new_forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs):
for _ in range(500):
if self.ortho_T5 and get_cosine_similarity(context[0], context[1]) != 0:
context[0] = get_orthogonal(context[0], context[1])
if self.ortho_clip_L and get_cosine_similarity(y[0], y[1]) != 0:
y[0] = get_orthogonal(y[0].unsqueeze(0), y[1].unsqueeze(0)).squeeze(0)
RESplain("postcossim1: ", get_cosine_similarity(context[0], context[1]))
RESplain("postcossim2: ", get_cosine_similarity(y[0], y[1]))
if self.zero_clip_L:
y[0] = torch.zeros_like(y[0])
return FluxOrthoCFGPatcher.original_forward(self, x, timestep, context, y, guidance, control, transformer_options, **kwargs)
def main(self, model, enable=True, ortho_T5=True, ortho_clip_L=True, zero_clip_L=True):
m = model.clone()
if enable:
m.model.diffusion_model.ortho_T5 = ortho_T5
m.model.diffusion_model.ortho_clip_L = ortho_clip_L
m.model.diffusion_model.zero_clip_L = zero_clip_L
Flux.forward = types.MethodType(FluxOrthoCFGPatcher.new_forward, m.model.diffusion_model)
else:
Flux.forward = FluxOrthoCFGPatcher.original_forward
return (m,)
class FluxGuidanceDisable:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model": ("MODEL",),
"disable": ("BOOLEAN", {"default": True}),
"zero_clip_L": ("BOOLEAN", {"default": True}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
FUNCTION = "main"
CATEGORY = "RES4LYF/model_patches"
original_forward = Flux.forward
@staticmethod
def new_forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs):
y = torch.zeros_like(y)
return FluxGuidanceDisable.original_forward(self, x, timestep, context, y, guidance, control, transformer_options, **kwargs)
def main(self, model, disable=True, zero_clip_L=True):
m = model.clone()
if disable:
m.model.diffusion_model.params.guidance_embed = False
else:
m.model.diffusion_model.params.guidance_embed = True
#m.model.diffusion_model.zero_clip_L = zero_clip_L
if zero_clip_L:
Flux.forward = types.MethodType(FluxGuidanceDisable.new_forward, m.model.diffusion_model)
return (m,)
class ModelSamplingAdvanced:
# this is used to set the "shift" using either exponential scaling (default for SD3.5M and Flux) or linear scaling (default for SD3.5L and SD3 2B beta)
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"scaling": (["exponential", "linear"], {"default": 'exponential'}),
"shift": ("FLOAT", {"default": 3.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
FUNCTION = "main"
CATEGORY = "RES4LYF/model_shift"
def sigma_exponential(self, timestep):
return time_snr_shift_exponential(self.timestep_shift, timestep / self.multiplier)
def sigma_linear(self, timestep):
return time_snr_shift_linear(self.timestep_shift, timestep / self.multiplier)
def main(self, model, scaling, shift):
m = model.clone()
self.timestep_shift = shift
self.multiplier = 1000
timesteps = 1000
sampling_base = None
if isinstance(m.model.model_config, comfy.supported_models.Flux) or isinstance(m.model.model_config, comfy.supported_models.FluxSchnell) or isinstance(m.model.model_config, comfy.supported_models.Chroma):
self.multiplier = 1
timesteps = 10000
sampling_base = comfy.model_sampling.ModelSamplingFlux
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.AuraFlow):
self.multiplier = 1
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.SD3):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.HiDream):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.HunyuanVideo):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
if isinstance(m.model.model_config, comfy.supported_models.WAN21_T2V) or isinstance(m.model.model_config, comfy.supported_models.WAN21_I2V):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.CosmosT2V) or isinstance(m.model.model_config, comfy.supported_models.CosmosI2V):
self.multiplier = 1
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingContinuousEDM
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.LTXV):
self.multiplier = 1000 # incorrect?
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingFlux
sampling_type = comfy.model_sampling.CONST
if sampling_base is None:
raise ValueError("Model not supported by ModelSamplingAdvanced")
class ModelSamplingAdvanced(sampling_base, sampling_type):
pass
m.object_patches['model_sampling'] = m.model.model_sampling = ModelSamplingAdvanced(m.model.model_config)
m.model.model_sampling.__dict__['shift'] = self.timestep_shift
m.model.model_sampling.__dict__['multiplier'] = self.multiplier
s_range = torch.arange(1, timesteps + 1, 1).to(torch.float64)
if scaling == "exponential":
ts = self.sigma_exponential((s_range / timesteps) * self.multiplier)
elif scaling == "linear":
ts = self.sigma_linear((s_range / timesteps) * self.multiplier)
m.model.model_sampling.register_buffer('sigmas', ts)
m.object_patches['model_sampling'].sigmas = m.model.model_sampling.sigmas
return (m,)
class ModelSamplingAdvancedResolution:
# this is used to set the "shift" using either exponential scaling (default for SD3.5M and Flux) or linear scaling (default for SD3.5L and SD3 2B beta)
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"scaling": (["exponential", "linear"], {"default": 'exponential'}),
"max_shift": ("FLOAT", {"default": 1.35, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
"base_shift": ("FLOAT", {"default": 0.85, "min": -100.0, "max": 100.0, "step":0.01, "round": False}),
"latent_image": ("LATENT",),
}
}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
FUNCTION = "main"
CATEGORY = "RES4LYF/model_shift"
def sigma_exponential(self, timestep):
return time_snr_shift_exponential(self.timestep_shift, timestep / self.multiplier)
def sigma_linear(self, timestep):
return time_snr_shift_linear(self.timestep_shift, timestep / self.multiplier)
def main(self, model, scaling, max_shift, base_shift, latent_image):
m = model.clone()
height, width = latent_image['samples'].shape[-2:]
frames = latent_image['samples'].shape[-3] if latent_image['samples'].ndim == 5 else 1
x1 = 256
x2 = 4096
mm = (max_shift - base_shift) / (x2 - x1)
b = base_shift - mm * x1
shift = (1 * width * height / (8 * 8 * 2 * 2)) * mm + b
self.timestep_shift = shift
self.multiplier = 1000
timesteps = 1000
if isinstance(m.model.model_config, comfy.supported_models.Flux) or isinstance(m.model.model_config, comfy.supported_models.FluxSchnell) or isinstance(m.model.model_config, comfy.supported_models.Chroma):
self.multiplier = 1
timesteps = 10000
sampling_base = comfy.model_sampling.ModelSamplingFlux
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.AuraFlow):
self.multiplier = 1
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.SD3):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.HiDream):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.HunyuanVideo):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
if isinstance(m.model.model_config, comfy.supported_models.WAN21_T2V) or isinstance(m.model.model_config, comfy.supported_models.WAN21_I2V):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.CosmosT2V) or isinstance(m.model.model_config, comfy.supported_models.CosmosI2V):
self.multiplier = 1
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingContinuousEDM
sampling_type = comfy.model_sampling.CONST
elif isinstance(m.model.model_config, comfy.supported_models.LTXV):
self.multiplier = 1000
timesteps = 1000
sampling_base = comfy.model_sampling.ModelSamplingFlux
sampling_type = comfy.model_sampling.CONST
class ModelSamplingAdvanced(sampling_base, sampling_type):
pass
m.object_patches['model_sampling'] = m.model.model_sampling = ModelSamplingAdvanced(m.model.model_config)
m.model.model_sampling.__dict__['shift'] = self.timestep_shift
m.model.model_sampling.__dict__['multiplier'] = self.multiplier
s_range = torch.arange(1, timesteps + 1, 1).to(torch.float64)
if scaling == "exponential":
ts = self.sigma_exponential((s_range / timesteps) * self.multiplier)
elif scaling == "linear":
ts = self.sigma_linear((s_range / timesteps) * self.multiplier)
m.model.model_sampling.register_buffer('sigmas', ts)
m.object_patches['model_sampling'].sigmas = m.model.model_sampling.sigmas
return (m,)
# Code adapted from https://github.com/comfyanonymous/ComfyUI/
class UNetSave:
def __init__(self):
self.output_dir = folder_paths.get_output_directory()
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model": ("MODEL",),
"filename_prefix": ("STRING", {"default": "models/ComfyUI"}),
},
"hidden": {
"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"
},
}
RETURN_TYPES = ()
FUNCTION = "save"
OUTPUT_NODE = True
CATEGORY = "RES4LYF/model_merging"
DESCRIPTION = "Save a .safetensors containing only the model data."
def save(self, model, filename_prefix, prompt=None, extra_pnginfo=None):
save_checkpoint(
model,
clip = None,
vae = None,
filename_prefix = filename_prefix,
output_dir = self.output_dir,
prompt = prompt,
extra_pnginfo = extra_pnginfo,
)
return {}
def save_checkpoint(
model,
clip = None,
vae = None,
clip_vision = None,
filename_prefix = None,
output_dir = None,
prompt = None,
extra_pnginfo = None,
):
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, output_dir)
prompt_info = ""
if prompt is not None:
prompt_info = json.dumps(prompt)
metadata = {}
enable_modelspec = True
if isinstance(model.model, comfy.model_base.SDXL):
if isinstance(model.model, comfy.model_base.SDXL_instructpix2pix):
metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-edit"
else:
metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-base"
elif isinstance(model.model, comfy.model_base.SDXLRefiner):
metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-refiner"
elif isinstance(model.model, comfy.model_base.SVD_img2vid):
metadata["modelspec.architecture"] = "stable-video-diffusion-img2vid-v1"
elif isinstance(model.model, comfy.model_base.SD3):
metadata["modelspec.architecture"] = "stable-diffusion-v3-medium" #TODO: other SD3 variants
else:
enable_modelspec = False
if enable_modelspec:
metadata["modelspec.sai_model_spec"] = "1.0.0"
metadata["modelspec.implementation"] = "sgm"
metadata["modelspec.title"] = "{} {}".format(filename, counter)
#TODO:
# "stable-diffusion-v1", "stable-diffusion-v1-inpainting", "stable-diffusion-v2-512",
# "stable-diffusion-v2-768-v", "stable-diffusion-v2-unclip-l", "stable-diffusion-v2-unclip-h",
# "v2-inpainting"
extra_keys = {}
model_sampling = model.get_model_object("model_sampling")
if isinstance(model_sampling, comfy.model_sampling.ModelSamplingContinuousEDM):
if isinstance(model_sampling, comfy.model_sampling.V_PREDICTION):
extra_keys["edm_vpred.sigma_max"] = torch.tensor(model_sampling.sigma_max).float()
extra_keys["edm_vpred.sigma_min"] = torch.tensor(model_sampling.sigma_min).float()
if model.model.model_type == comfy.model_base.ModelType.EPS:
metadata["modelspec.predict_key"] = "epsilon"
elif model.model.model_type == comfy.model_base.ModelType.V_PREDICTION:
metadata["modelspec.predict_key"] = "v"
if not args.disable_metadata:
metadata["prompt"] = prompt_info
if extra_pnginfo is not None:
for x in extra_pnginfo:
metadata[x] = json.dumps(extra_pnginfo[x])
output_checkpoint = f"{filename}_{counter:05}_.safetensors"
output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
sd_save_checkpoint(output_checkpoint, model, clip, vae, clip_vision, metadata=metadata, extra_keys=extra_keys)
def sd_save_checkpoint(output_path, model, clip=None, vae=None, clip_vision=None, metadata=None, extra_keys={}):
clip_sd = None
load_models = [model]
if clip is not None:
load_models.append(clip.load_model())
clip_sd = clip.get_sd()
comfy.model_management.load_models_gpu(load_models, force_patch_weights=True)
clip_vision_sd = clip_vision.get_sd() if clip_vision is not None else None
vae_sd = vae.get_sd() if vae is not None else None #THIS ALLOWS SAVING UNET ONLY
sd = model.model.state_dict_for_saving(clip_sd, vae_sd, clip_vision_sd)
for k in extra_keys:
sd[k] = extra_keys[k]
for k in sd:
t = sd[k]
if not t.is_contiguous():
sd[k] = t.contiguous()
comfy.utils.save_torch_file(sd, output_path, metadata=metadata)
# Code adapted from https://github.com/kijai/ComfyUI-KJNodes
class TorchCompileModelFluxAdvanced:
def __init__(self):
self._compiled = False
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"backend": (["inductor", "cudagraphs"],),
"fullgraph": ("BOOLEAN", {"default": False, "tooltip": "Enable full graph mode"}),
"mode": (["default", "max-autotune", "max-autotune-no-cudagraphs", "reduce-overhead"], {"default": "default"}),
"double_blocks": ("STRING", {"default": "0-18", "multiline": True}),
"single_blocks": ("STRING", {"default": "0-37", "multiline": True}),
"dynamic": ("BOOLEAN", {"default": False, "tooltip": "Enable dynamic mode"}),
}}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
FUNCTION = "main"
CATEGORY = "RES4LYF/model_patches"
def parse_blocks(self, blocks_str):
blocks = []
for part in blocks_str.split(','):
part = part.strip()
if '-' in part:
start, end = map(int, part.split('-'))
blocks.extend(range(start, end + 1))
else:
blocks.append(int(part))
return blocks
def main(self,
model,
backend = "inductor",
mode = "default",
fullgraph = False,
single_blocks = "0-37",
double_blocks = "0-18",
dynamic = False,
):
single_block_list = self.parse_blocks(single_blocks)
double_block_list = self.parse_blocks(double_blocks)
m = model.clone()
diffusion_model = m.get_model_object("diffusion_model")
if not self._compiled:
try:
for i, block in enumerate(diffusion_model.double_blocks):
if i in double_block_list:
m.add_object_patch(f"diffusion_model.double_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
for i, block in enumerate(diffusion_model.single_blocks):
if i in single_block_list:
m.add_object_patch(f"diffusion_model.single_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
compile_settings = {
"backend": backend,
"mode": mode,
"fullgraph": fullgraph,
"dynamic": dynamic,
}
setattr(m.model, "compile_settings", compile_settings)
except:
raise RuntimeError("Failed to compile model. Verify that this is a Flux model!")
return (m, )
# rest of the layers that are not patched
# diffusion_model.final_layer = torch.compile(diffusion_model.final_layer, mode=mode, fullgraph=fullgraph, backend=backend)
# diffusion_model.guidance_in = torch.compile(diffusion_model.guidance_in, mode=mode, fullgraph=fullgraph, backend=backend)
# diffusion_model.img_in = torch.compile(diffusion_model.img_in, mode=mode, fullgraph=fullgraph, backend=backend)
# diffusion_model.time_in = torch.compile(diffusion_model.time_in, mode=mode, fullgraph=fullgraph, backend=backend)
# diffusion_model.txt_in = torch.compile(diffusion_model.txt_in, mode=mode, fullgraph=fullgraph, backend=backend)
# diffusion_model.vector_in = torch.compile(diffusion_model.vector_in, mode=mode, fullgraph=fullgraph, backend=backend)
# @torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor")
class TorchCompileModelAura:
def __init__(self):
self._compiled = False
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"backend": (["inductor", "cudagraphs"],),
"fullgraph": ("BOOLEAN", {"default": False, "tooltip": "Enable full graph mode"}),
"mode": (COMPILE_MODES , {"default": "default"}),
"dynamic": ("BOOLEAN", {"default": False, "tooltip": "Enable dynamic mode"}),
"dynamo_cache_size_limit": ("INT", {"default": 64, "min": 0, "max": 1024, "step": 1, "tooltip": "torch._dynamo.config.cache_size_limit"}),
}}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
FUNCTION = "main"
CATEGORY = "RES4LYF/model_patches"
def main(self,
model,
backend = "inductor",
mode = "default",
fullgraph = False,
dynamic = False,
dynamo_cache_size_limit = 64,
):
m = model.clone()
diffusion_model = m.get_model_object("diffusion_model")
torch._dynamo.config.cache_size_limit = dynamo_cache_size_limit
if not self._compiled:
try:
for i, block in enumerate(diffusion_model.double_layers):
m.add_object_patch(f"diffusion_model.double_layers.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
for i, block in enumerate(diffusion_model.single_layers):
m.add_object_patch(f"diffusion_model.single_layers.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
compile_settings = {
"backend": backend,
"mode": mode,
"fullgraph": fullgraph,
"dynamic": dynamic,
}
setattr(m.model, "compile_settings", compile_settings)
except:
raise RuntimeError("Failed to compile model. Verify that this is an AuraFlow model!")
return (m, )
class TorchCompileModelSD35:
def __init__(self):
self._compiled = False
@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"backend": (["inductor", "cudagraphs"],),
"fullgraph": ("BOOLEAN", {"default": False, "tooltip": "Enable full graph mode"}),
"mode": (COMPILE_MODES , {"default": "default"}),
"dynamic": ("BOOLEAN", {"default": False, "tooltip": "Enable dynamic mode"}),
"dynamo_cache_size_limit": ("INT", {"default": 64, "min": 0, "max": 1024, "step": 1, "tooltip": "torch._dynamo.config.cache_size_limit"}),
}}
RETURN_TYPES = ("MODEL",)
RETURN_NAMES = ("model",)
FUNCTION = "main"
CATEGORY = "RES4LYF/model_patches"
def main(self,
model,
backend = "inductor",
mode = "default",
fullgraph = False,
dynamic = False,
dynamo_cache_size_limit = 64,
):
m = model.clone()
diffusion_model = m.get_model_object("diffusion_model")
torch._dynamo.config.cache_size_limit = dynamo_cache_size_limit
if not self._compiled:
try:
for i, block in enumerate(diffusion_model.joint_blocks):
m.add_object_patch(f"diffusion_model.joint_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend))
self._compiled = True
compile_settings = {
"backend" : backend,
"mode" : mode,
"fullgraph": fullgraph,
"dynamic" : dynamic,
}
setattr(m.model, "compile_settings", compile_settings)
except:
raise RuntimeError("Failed to compile model. Verify that this is a SD3.5 model!")
return (m, )
class ClownpileModelWanVideo:
def __init__(self):
self._compiled = False
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model" : ("MODEL",),
"backend" : (["inductor","cudagraphs"], {"default" : "inductor"}),
"fullgraph" : ("BOOLEAN", {"default" : False, "tooltip" : "Enable full graph mode"}),
"mode" : (COMPILE_MODES, {"default": "default"}),
"dynamic" : ("BOOLEAN", {"default" : False, "tooltip" : "Enable dynamic mode"}),
"dynamo_cache_size_limit" : ("INT", {"default" : 64, "min" : 0, "max": 1024, "step": 1, "tooltip": "torch._dynamo.config.cache_size_limit"}),
#"compile_self_attn_blocks" : ("INT", {"default" : 0, "min" : 0, "max": 100, "step" : 1, "tooltip": "Maximum blocks to compile. These use huge amounts of VRAM with large attention masks."}),
"skip_self_attn_blocks" : ("STRING", {"default" : "0,1,2,3,4,5,6,7,8,9,", "multiline": True, "tooltip": "For WAN only: select self-attn blocks to disable. Due to the size of the self-attn masks, VRAM required to compile blocks using regional WAN is excessive. List any blocks selected in the ReWanPatcher node."}),
"compile_transformer_blocks": ("BOOLEAN", {"default" : True, "tooltip" : "Compile all transformer blocks"}),
"force_recompile" : ("BOOLEAN", {"default": False, "tooltip": "Force recompile."}),
},
}
RETURN_TYPES = ("MODEL",)
FUNCTION = "patch"
CATEGORY = "RES4LYF/model"
EXPERIMENTAL = True
def patch(self, model, backend, fullgraph, mode, dynamic, dynamo_cache_size_limit, skip_self_attn_blocks, compile_transformer_blocks, force_recompile):
m = model.clone()
diffusion_model = m.get_model_object("diffusion_model")
torch._dynamo.config.cache_size_limit = dynamo_cache_size_limit
skip_self_attn_blocks = parse_range_string(skip_self_attn_blocks)
if force_recompile:
self._compiled = False
if not self._compiled:
try:
if compile_transformer_blocks:
for i, block in enumerate(diffusion_model.blocks):
#if i % 2 == 1:
if i not in skip_self_attn_blocks:
compiled_block = torch.compile(block, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode)
m.add_object_patch(f"diffusion_model.blocks.{i}", compiled_block)
#block.self_attn = torch.compile(block.self_attn, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode)
#block.cross_attn = torch.compile(block.cross_attn, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode)
#if i < compile_self_attn_blocks:
# block.self_attn = torch.compile(block.self_attn, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode)
# #compiled_block = torch.compile(block, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode)
# #m.add_object_patch(f"diffusion_model.blocks.{i}", compiled_block)
#block.cross_attn = torch.compile(block.cross_attn, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode)
self._compiled = True
compile_settings = {
"backend": backend,
"mode": mode,
"fullgraph": fullgraph,
"dynamic": dynamic,
}
setattr(m.model, "compile_settings", compile_settings)
except:
raise RuntimeError("Failed to compile model. Verify that this is a WAN model!")
return (m, )
================================================
FILE: nodes_latents.py
================================================
import torch.nn.functional as F
import copy
import comfy.samplers
import comfy.sample
import comfy.sampler_helpers
import comfy.utils
import itertools
import torch
import math
from nodes import MAX_RESOLUTION
#MAX_RESOLUTION=8192
from .helper import ExtraOptions, initialize_or_scale, extra_options_flag, get_extra_options_list
from .latents import latent_meancenter_channels, latent_stdize_channels, get_edge_mask, apply_to_state_info_tensors
from .beta.noise_classes import NOISE_GENERATOR_NAMES, NOISE_GENERATOR_CLASSES, prepare_noise
def fp_or(tensor1, tensor2):
return torch.maximum(tensor1, tensor2)
def fp_and(tensor1, tensor2):
return torch.minimum(tensor1, tensor2)
class AdvancedNoise:
@classmethod
def INPUT_TYPES(cls):
return {
"required":{
"alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}),
"k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}),
"noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
"noise_type": (NOISE_GENERATOR_NAMES, ),
},
}
RETURN_TYPES = ("NOISE",)
FUNCTION = "get_noise"
CATEGORY = "RES4LYF/noise"
def get_noise(self, noise_seed, noise_type, alpha, k):
return (Noise_RandomNoise(noise_seed, noise_type, alpha, k),)
class Noise_RandomNoise:
def __init__(self, seed, noise_type, alpha, k):
self.seed = seed
self.noise_type = noise_type
self.alpha = alpha
self.k = k
def generate_noise(self, input_latent):
latent_image = input_latent["samples"]
batch_inds = input_latent["batch_index"] if "batch_index" in input_latent else None
return prepare_noise(latent_image, self.seed, self.noise_type, batch_inds, self.alpha, self.k)
class LatentNoised:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"add_noise": ("BOOLEAN", {"default": True}),
"noise_is_latent": ("BOOLEAN", {"default": False}),
"noise_type": (NOISE_GENERATOR_NAMES, ),
"alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}),
"k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}),
"noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
"latent_image": ("LATENT", ),
"noise_strength": ("FLOAT", {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.01, "round": 0.01}),
"normalize": (["false", "true"], {"default": "false"}),
},
"optional":
{
"latent_noise": ("LATENT", ),
"mask": ("MASK", ),
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent_noised",)
FUNCTION = "main"
CATEGORY = "RES4LYF/noise"
def main(self,
add_noise,
noise_is_latent,
noise_type,
noise_seed,
alpha,
k,
latent_image,
noise_strength,
normalize,
latent_noise = None,
mask = None
):
latent_out = latent_image.copy()
samples = latent_out["samples"].clone()
torch.manual_seed(noise_seed)
if not add_noise:
noise = torch.zeros(samples.size(), dtype=samples.dtype, layout=samples.layout, device="cpu")
elif latent_noise is None:
batch_inds = latent_out["batch_index"] if "batch_index" in latent_out else None
noise = prepare_noise(samples, noise_seed, noise_type, batch_inds, alpha, k)
else:
noise = latent_noise["samples"]
if normalize == "true":
latent_mean = samples.mean()
latent_std = samples.std()
noise = noise * latent_std + latent_mean
if noise_is_latent:
noise += samples.cpu()
noise.sub_(noise.mean()).div_(noise.std())
noise = noise * noise_strength
if mask is not None:
if len(samples.shape) == 5:
b, c, t, h, w = samples.shape
mask_resized = F.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])),
size=(h, w),
mode="bilinear")
if mask_resized.shape[0] < b:
mask_resized = mask_resized.repeat((b - 1) // mask_resized.shape[0] + 1, 1, 1, 1)[:b]
elif mask_resized.shape[0] > b:
mask_resized = mask_resized[:b]
mask_expanded = mask_resized.expand((-1, c, -1, -1))
mask_temporal = mask_expanded.unsqueeze(2).expand(-1, -1, t, -1, -1).to(samples.device)
noise = mask_temporal * noise + (1 - mask_temporal) * torch.zeros_like(noise)
else:
mask = F.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])),
size=(samples.shape[2], samples.shape[3]),
mode="bilinear")
mask = mask.expand((-1, samples.shape[1], -1, -1)).to(samples.device)
if mask.shape[0] < samples.shape[0]:
mask = mask.repeat((samples.shape[0] - 1) // mask.shape[0] + 1, 1, 1, 1)[:samples.shape[0]]
elif mask.shape[0] > samples.shape[0]:
mask = mask[:samples.shape[0]]
noise = mask * noise + (1 - mask) * torch.zeros_like(noise)
latent_out["samples"] = samples.cpu() + noise
return (latent_out,)
class LatentNoiseList:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT",),
"alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"k_flip": ("BOOLEAN", {"default": False}),
"steps": ("INT", {"default": 0, "min": -10000, "max": 10000}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
},
"optional": {
"alphas": ("SIGMAS", ),
"ks": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent_list",)
OUTPUT_IS_LIST = (True,)
FUNCTION = "main"
CATEGORY = "RES4LYF/noise"
def main(self,
seed,
latent,
alpha,
k_flip,
steps,
alphas = None,
ks = None
):
alphas = initialize_or_scale(alphas, alpha, steps)
k_flip = -1 if k_flip else 1
ks = initialize_or_scale(ks, k_flip, steps)
latent_samples = latent["samples"]
latents = []
size = latent_samples.shape
steps = len(alphas) if steps == 0 else steps
noise_sampler = NOISE_GENERATOR_CLASSES.get('fractal')(x=latent_samples, seed=seed)
for i in range(steps):
noise = noise_sampler(alpha=alphas[i].item(), k=ks[i].item(), scale=0.1)
noisy_latent = latent_samples + noise
new_latent = {"samples": noisy_latent}
latents.append(new_latent)
return (latents, )
class MaskToggle:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"enable": ("BOOLEAN", {"default": True}),
"mask": ("MASK", ),
},
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("mask",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
def main(self, enable=True, mask=None):
if enable == False:
mask = None
return (mask, )
class latent_to_raw_x:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT", ),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent_raw_x",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, latent,):
if 'state_info' not in latent:
latent['state_info'] = {}
latent['state_info']['raw_x'] = latent['samples'].to(torch.float64)
return (latent,)
# Adapted from https://github.com/comfyanonymous/ComfyUI/blob/5ee381c058d606209dcafb568af20196e7884fc8/comfy_extras/nodes_wan.py
class TrimVideoLatent_state_info:
@classmethod
def INPUT_TYPES(s):
return {"required": {"samples": ("LATENT",),
"trim_amount": ("INT", {"default": 0, "min": 0, "max": 99999}),
}}
RETURN_TYPES = ("LATENT",)
FUNCTION = "op"
CATEGORY = "RES4LYF/latents"
EXPERIMENTAL = True
@staticmethod
def _trim_tensor(tensor, trim_amount):
"""Trim frames from beginning of tensor along temporal dimension (-3)"""
if tensor.shape[-3] > trim_amount:
return tensor.narrow(-3, trim_amount, tensor.shape[-3] - trim_amount)
return tensor
def op(self, samples, trim_amount):
ref_shape = samples["samples"].shape
samples_out = apply_to_state_info_tensors(samples, ref_shape, self._trim_tensor, trim_amount)
return (samples_out,)
# Adapted from https://github.com/comfyanonymous/ComfyUI/blob/05df2df489f6b237f63c5f7d42a943ae2be417e9/nodes.py
class LatentUpscaleBy_state_info:
upscale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "bislerp"]
@classmethod
def INPUT_TYPES(s):
return {"required": { "samples": ("LATENT",), "upscale_method": (s.upscale_methods,),
"scale_by": ("FLOAT", {"default": 1.5, "min": 0.01, "max": 8.0, "step": 0.01}),}}
RETURN_TYPES = ("LATENT",)
FUNCTION = "op"
CATEGORY = "latent"
def _upscale_tensor(tensor, upscale_method, scale_by):
width = round(tensor.shape[-1] * scale_by)
height = round(tensor.shape[-2] * scale_by)
tensor = comfy.utils.common_upscale(tensor, width, height, upscale_method, "disabled")
return tensor
def op(self, samples, upscale_method, scale_by):
ref_shape = samples["samples"].shape
samples_out = apply_to_state_info_tensors(samples, ref_shape, self._upscale_tensor, upscale_method, scale_by)
return (samples_out,)
class latent_clear_state_info:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT", ),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, latent,):
latent_out = {}
if 'samples' in latent:
latent_out['samples'] = latent['samples']
return (latent_out,)
class latent_replace_state_info:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT", ),
"clear_raw_x": ("BOOLEAN", {"default": False}),
"replace_end_step": ("INT", {"default": 0, "min": -10000, "max": 10000}),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, latent, clear_raw_x, replace_end_step):
latent_out = copy.deepcopy(latent)
if 'state_info' not in latent_out:
latent_out['state_info'] = {}
if clear_raw_x:
latent_out['state_info']['raw_x'] = None
if replace_end_step != 0:
latent_out['state_info']['end_step'] = replace_end_step
return (latent_out,)
class latent_display_state_info:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT", ),
},
}
RETURN_TYPES = ("STRING",)
FUNCTION = "execute"
CATEGORY = "RES4LYF/latents"
OUTPUT_NODE = True
def execute(self, latent):
text = ""
if 'state_info' in latent:
for key, value in latent['state_info'].items():
if isinstance(value, torch.Tensor):
if value.numel() == 0:
value_text = "empty tensor"
elif value.numel() == 1:
if value.dtype == torch.bool:
value_text = f"bool({value.item()})"
else:
value_text = f"str({value.item():.3f}), dtype: {value.dtype}"
else:
shape_str = str(list(value.shape)).replace(" ", "")
dtype = value.dtype
if torch.is_floating_point(value) is False:
if value.dtype == torch.bool:
value_text = f"shape: {shape_str}, dtype: {dtype}, true: {value.sum().item()}, false: {(~value).sum().item()}"
else:
max_val = value.float().max().item()
min_val = value.float().min().item()
value_text = f"shape: {shape_str}, dtype: {dtype}, max: {max_val}, min: {min_val}"
else:
mean = value.float().mean().item()
std = value.float().std().item()
value_text = f"shape: {shape_str}, dtype: {dtype}, mean: {mean:.3f}, std: {std:.3f}"
else:
value_text = str(value)
text += f"{key}: {value_text}\n"
else:
text = "No state info in latent"
return {"ui": {"text": text}, "result": (text,)}
class latent_transfer_state_info:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent_to": ("LATENT", ),
"latent_from": ("LATENT", ),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, latent_to, latent_from):
#if 'state_info' not in latent:
# latent['state_info'] = {}
latent_to['state_info'] = copy.deepcopy(latent_from['state_info'])
return (latent_to,)
class latent_mean_channels_from_to:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent_to": ("LATENT", ),
"latent_from": ("LATENT", ),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, latent_to, latent_from):
latent_to['samples'] = latent_to['samples'] - latent_to['samples'].mean(dim=(-2,-1), keepdim=True) + latent_from['samples'].mean(dim=(-2,-1), keepdim=True)
return (latent_to,)
class latent_get_channel_means:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT", ),
},
}
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("channel_means",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, latent):
channel_means = latent['samples'].mean(dim=(-2,-1)).squeeze(0)
return (channel_means,)
class latent_to_cuda:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT", ),
"to_cuda": ("BOOLEAN", {"default": True}),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("passthrough",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, latent, to_cuda):
match to_cuda:
case "True":
latent = latent.to('cuda')
case "False":
latent = latent.to('cpu')
return (latent,)
class latent_batch:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT", ),
"batch_size": ("INT", {"default": 0, "min": -10000, "max": 10000}),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent_batch",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, latent, batch_size):
latent = latent["samples"]
b, c, h, w = latent.shape
batch_latents = torch.zeros([batch_size, 4, h, w], device=latent.device)
for i in range(batch_size):
batch_latents[i] = latent
return ({"samples": batch_latents}, )
class MaskFloatToBoolean:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"mask": ("MASK",),
},
"optional": {
},
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("binary_mask",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
def main(self, mask=None,):
return (mask.bool().to(mask.dtype),)
class MaskEdge:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"dilation": ("INT", {"default": 20, "min": -10000, "max": 10000}),
"mode": [["percent", "absolute"], {"default": "percent"}],
"internal": ("FLOAT", {"default": 1.0, "min": -1.0, "max": 10000.0, "step": 0.01}),
"external": ("FLOAT", {"default": 1.0, "min": -1.0, "max": 10000.0, "step": 0.01}),
#"blur": ("BOOLEAN", {"default": False}),
"mask": ("MASK",),
},
"optional": {
},
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("edge_mask",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
def main(self, dilation=20, mode="percent", internal=1.0, external=1.0, blur=False, mask=None,):
mask_dtype = mask.dtype
mask = mask.float()
if mode == "percent":
dilation = (dilation/100) * int(mask.sum() ** 0.5)
#if not blur:
if int(internal * dilation) > 0:
edge_mask_internal = get_edge_mask(mask, int(internal * dilation))
edge_mask_internal = fp_and(edge_mask_internal, mask)
else:
edge_mask_internal = mask
if int(external * dilation) > 0:
edge_mask_external = get_edge_mask(mask, int(external * dilation))
edge_mask_external = fp_and(edge_mask_external, 1-mask)
else:
edge_mask_external = 1-mask
edge_mask = fp_or(edge_mask_internal, edge_mask_external)
return (edge_mask.to(mask_dtype),)
class Frame_Select_Latent_Raw:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"frames": ("IMAGE",),
"select": ("INT", {"default": 0, "min": 0, "max": 10000}),
},
"optional": {
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, frames=None, select=0):
frame = frames['state_info']['raw_x'][:,:,select,:,:].clone().unsqueeze(dim=2)
return (frame,)
class Frames_Slice_Latent_Raw:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"frames": ("LATENT",),
"start": ("INT", {"default": 0, "min": 0, "max": 10000}),
"stop": ("INT", {"default": 1, "min": 1, "max": 10000}),
},
"optional": {
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, frames=None, start=0, stop=1):
frames_slice = frames['state_info']['raw_x'][:,:,start:stop,:,:].clone()
return (frames_slice,)
class Frames_Concat_Latent_Raw:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"frames_0": ("LATENT",),
"frames_1": ("LATENT",),
},
"optional": {
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, frames_0, frames_1):
frames_concat = torch.cat((frames_0, frames_1), dim=2).clone()
return (frames_concat,)
class Frame_Select_Latent:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"frames": ("IMAGE",),
"select": ("INT", {"default": 0, "min": 0, "max": 10000}),
},
"optional": {
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, frames=None, select=0):
frame = frames['samples'][:,:,select,:,:].clone().unsqueeze(dim=2)
return ({"samples": frame},)
class Frames_Slice_Latent:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"frames": ("LATENT",),
"start": ("INT", {"default": 0, "min": 0, "max": 10000}),
"stop": ("INT", {"default": 1, "min": 1, "max": 10000}),
},
"optional": {
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, frames=None, start=0, stop=1):
frames_slice = frames['samples'][:,:,start:stop,:,:].clone()
return ({"samples": frames_slice},)
class Frames_Concat_Latent:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"frames_0": ("LATENT",),
"frames_1": ("LATENT",),
},
"optional": {
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, frames_0, frames_1):
frames_concat = torch.cat((frames_0['samples'], frames_1['samples']), dim=2).clone()
return ({"samples": frames_concat},)
class Frames_Concat_Masks:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"frames_0": ("MASK",),
"frames_1": ("MASK",),
},
"optional": {
"frames_2": ("MASK",),
"frames_3": ("MASK",),
"frames_4": ("MASK",),
"frames_5": ("MASK",),
"frames_6": ("MASK",),
"frames_7": ("MASK",),
"frames_8": ("MASK",),
"frames_9": ("MASK",),
},
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("temporal_mask",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
def main(self, frames_0, frames_1, frames_2=None, frames_3=None, frames_4=None, frames_5=None, frames_6=None, frames_7=None, frames_8=None, frames_9=None):
frames_concat = torch.cat((frames_0, frames_1), dim=-3).clone()
frames_concat = torch.cat((frames_concat, frames_2), dim=-3).clone() if frames_2 is not None else frames_concat
frames_concat = torch.cat((frames_concat, frames_3), dim=-3).clone() if frames_3 is not None else frames_concat
frames_concat = torch.cat((frames_concat, frames_4), dim=-3).clone() if frames_4 is not None else frames_concat
frames_concat = torch.cat((frames_concat, frames_5), dim=-3).clone() if frames_5 is not None else frames_concat
frames_concat = torch.cat((frames_concat, frames_6), dim=-3).clone() if frames_6 is not None else frames_concat
frames_concat = torch.cat((frames_concat, frames_7), dim=-3).clone() if frames_7 is not None else frames_concat
frames_concat = torch.cat((frames_concat, frames_8), dim=-3).clone() if frames_8 is not None else frames_concat
frames_concat = torch.cat((frames_concat, frames_9), dim=-3).clone() if frames_9 is not None else frames_concat
if frames_concat.ndim == 3:
frames_concat.unsqueeze_(0)
return (frames_concat,)
class Frames_Masks_Uninterpolate:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"raw_temporal_mask": ("MASK",),
"frame_chunk_size" : ("INT", {"default": 4, "min": 1, "max": 10000, "step": 1}),
},
"optional": {
},
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("temporal_mask",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
def main(self, raw_temporal_mask, frame_chunk_size):
#assert raw_temporal_mask.ndim == 3, "Not a raw temporal mask!"
raw_frames = raw_temporal_mask.shape[-3]
raw_frames_offset = raw_frames - 1
frames = raw_frames_offset // frame_chunk_size + 1
indices = torch.linspace(0, raw_frames_offset, steps=frames).long()
temporal_mask = raw_temporal_mask[...,indices,:,:].unsqueeze(0)
return (temporal_mask,)
class Frames_Masks_ZeroOut:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"temporal_mask": ("MASK",),
"zero_out_frame" : ("INT", {"default": 0, "min": 0, "max": 10000, "step": 1}),
},
"optional": {
},
}
RETURN_TYPES = ("MASK",)
RETURN_NAMES = ("temporal_mask",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
def main(self, temporal_mask, zero_out_frame):
temporal_mask[...,zero_out_frame:zero_out_frame+1,:,:] = 1.0
return (temporal_mask,)
class Frames_Latent_ReverseOrder:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"frames": ("LATENT",),
},
"optional": {
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("frames_reversed",)
FUNCTION = "main"
CATEGORY = "RES4LYF/masks"
def main(self, frames,):
samples = frames['samples']
flipped_frames = torch.zeros_like(samples)
t_len = samples.shape[-3]
for i in range(t_len):
flipped_frames[:,:,t_len-i-1,:,:] = samples[:,:,i,:,:]
return ( {"samples": flipped_frames },)
#return ( {"samples": torch.flip(frames['samples'], dims=[-3]) },)
class LatentPhaseMagnitude:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent_0_batch": ("LATENT",),
"latent_1_batch": ("LATENT",),
"phase_mix_power": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_mix_power": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"latent_0_normal": ("BOOLEAN", {"default": True}),
"latent_1_normal": ("BOOLEAN", {"default": True}),
"latent_out_normal": ("BOOLEAN", {"default": True}),
"latent_0_stdize": ("BOOLEAN", {"default": True}),
"latent_1_stdize": ("BOOLEAN", {"default": True}),
"latent_out_stdize": ("BOOLEAN", {"default": True}),
"latent_0_meancenter": ("BOOLEAN", {"default": True}),
"latent_1_meancenter": ("BOOLEAN", {"default": True}),
"latent_out_meancenter": ("BOOLEAN", {"default": True}),
},
"optional": {
"phase_mix_powers": ("SIGMAS", ),
"magnitude_mix_powers": ("SIGMAS", ),
"phase_luminositys": ("SIGMAS", ),
"phase_cyan_reds": ("SIGMAS", ),
"phase_lime_purples": ("SIGMAS", ),
"phase_pattern_structures": ("SIGMAS", ),
"magnitude_luminositys": ("SIGMAS", ),
"magnitude_cyan_reds": ("SIGMAS", ),
"magnitude_lime_purples": ("SIGMAS", ),
"magnitude_pattern_structures": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_repeat(latent, batch_size):
b, c, h, w = latent.shape
batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device)
for i in range(batch_size):
batch_latents[i] = latent
return batch_latents
@staticmethod
def mix_latent_phase_magnitude(latent_0,
latent_1,
power_phase,
power_magnitude,
phase_luminosity,
phase_cyan_red,
phase_lime_purple,
phase_pattern_structure,
magnitude_luminosity,
magnitude_cyan_red,
magnitude_lime_purple,
magnitude_pattern_structure,
):
dtype = torch.promote_types(latent_0.dtype, latent_1.dtype)
# big accuracy problems with fp32 FFT! let's avoid that
latent_0 = latent_0.double()
latent_1 = latent_1.double()
latent_0_fft = torch.fft.fft2(latent_0)
latent_1_fft = torch.fft.fft2(latent_1)
latent_0_phase = torch.angle(latent_0_fft)
latent_1_phase = torch.angle(latent_1_fft)
latent_0_magnitude = torch.abs(latent_0_fft)
latent_1_magnitude = torch.abs(latent_1_fft)
# DC corruption...? handle separately??
#dc_index = (0, 0)
#dc_0 = latent_0_fft[:, :, dc_index[0], dc_index[1]]
#dc_1 = latent_1_fft[:, :, dc_index[0], dc_index[1]]
#mixed_dc = dc_0 * 0.5 + dc_1 * 0.5
#mixed_dc = dc_0 * (1 - phase_weight) + dc_1 * phase_weight
# create complex FFT using a weighted mix of phases
chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]]
chan_weights_magnitude = [w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]]
mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
for i in range(4):
mixed_phase[:, i] = ( (latent_0_phase[:,i] * (1-chan_weights_phase[i])) ** power_phase + (latent_1_phase[:,i] * chan_weights_phase[i]) ** power_phase) ** (1/power_phase)
mixed_magnitude[:, i] = ( (latent_0_magnitude[:,i] * (1-chan_weights_magnitude[i])) ** power_magnitude + (latent_1_magnitude[:,i] * chan_weights_magnitude[i]) ** power_magnitude) ** (1/power_magnitude)
new_fft = mixed_magnitude * torch.exp(1j * mixed_phase)
#new_fft[:, :, dc_index[0], dc_index[1]] = mixed_dc
# inverse FFT to convert back to spatial domain
mixed_phase_magnitude = torch.fft.ifft2(new_fft).real
return mixed_phase_magnitude.to(dtype)
def main(self,
#batch_size,
latent_1_repeat,
latent_0_batch,
latent_1_batch,
latent_0_normal,
latent_1_normal,
latent_out_normal,
latent_0_stdize,
latent_1_stdize,
latent_out_stdize,
latent_0_meancenter,
latent_1_meancenter,
latent_out_meancenter,
phase_mix_power,
magnitude_mix_power,
phase_luminosity,
phase_cyan_red,
phase_lime_purple,
phase_pattern_structure,
magnitude_luminosity,
magnitude_cyan_red,
magnitude_lime_purple,
magnitude_pattern_structure,
phase_mix_powers = None,
magnitude_mix_powers = None,
phase_luminositys = None,
phase_cyan_reds = None,
phase_lime_purples = None,
phase_pattern_structures = None,
magnitude_luminositys = None,
magnitude_cyan_reds = None,
magnitude_lime_purples = None,
magnitude_pattern_structures = None
):
latent_0_batch = latent_0_batch["samples"].double()
latent_1_batch = latent_1_batch["samples"].double().to(latent_0_batch.device)
#if batch_size == 0:
batch_size = latent_0_batch.shape[0]
if latent_1_batch.shape[0] == 1:
latent_1_batch = self.latent_repeat(latent_1_batch, batch_size)
magnitude_mix_powers = initialize_or_scale(magnitude_mix_powers, magnitude_mix_power, batch_size)
phase_mix_powers = initialize_or_scale(phase_mix_powers, phase_mix_power, batch_size)
phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size)
phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size)
phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size)
phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size)
magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size)
magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size)
magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size)
magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size)
mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device)
if latent_0_normal == True:
latent_0_batch = latent_normalize_channels(latent_0_batch)
if latent_1_normal == True:
latent_1_batch = latent_normalize_channels(latent_1_batch)
if latent_0_meancenter == True:
latent_0_batch = latent_meancenter_channels(latent_0_batch)
if latent_1_meancenter == True:
latent_1_batch = latent_meancenter_channels(latent_1_batch)
if latent_0_stdize == True:
latent_0_batch = latent_stdize_channels(latent_0_batch)
if latent_1_stdize == True:
latent_1_batch = latent_stdize_channels(latent_1_batch)
for i in range(batch_size):
mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1],
latent_1_batch[i:i+1],
phase_mix_powers[i] .item(),
magnitude_mix_powers[i] .item(),
phase_luminositys[i] .item(),
phase_cyan_reds[i] .item(),
phase_lime_purples[i] .item(),
phase_pattern_structures[i] .item(),
magnitude_luminositys[i] .item(),
magnitude_cyan_reds[i] .item(),
magnitude_lime_purples[i] .item(),
magnitude_pattern_structures[i].item()
)
if latent_out_normal == True:
mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude)
if latent_out_stdize == True:
mixed_phase_magnitude = latent_stdize_channels(mixed_phase_magnitude)
if latent_out_meancenter == True:
mixed_phase_magnitude = latent_meancenter_channels(mixed_phase_magnitude)
mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude
return ({"samples": mixed_phase_magnitude_batch}, )
class LatentPhaseMagnitudeMultiply:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent_0_batch": ("LATENT",),
"phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"latent_0_normal": ("BOOLEAN", {"default": False}),
"latent_out_normal": ("BOOLEAN", {"default": False}),
},
"optional": {
"phase_luminositys": ("SIGMAS", ),
"phase_cyan_reds": ("SIGMAS", ),
"phase_lime_purples": ("SIGMAS", ),
"phase_pattern_structures": ("SIGMAS", ),
"magnitude_luminositys": ("SIGMAS", ),
"magnitude_cyan_reds": ("SIGMAS", ),
"magnitude_lime_purples": ("SIGMAS", ),
"magnitude_pattern_structures": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_repeat(latent, batch_size):
b, c, h, w = latent.shape
batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device)
for i in range(batch_size):
batch_latents[i] = latent
return batch_latents
@staticmethod
def mix_latent_phase_magnitude(latent_0,
phase_luminosity,
phase_cyan_red,
phase_lime_purple,
phase_pattern_structure,
magnitude_luminosity,
magnitude_cyan_red,
magnitude_lime_purple,
magnitude_pattern_structure
):
dtype = latent_0.dtype
# avoid big accuracy problems with fp32 FFT!
latent_0 = latent_0.double()
latent_0_fft = torch.fft.fft2(latent_0)
latent_0_phase = torch.angle(latent_0_fft)
latent_0_magnitude = torch.abs (latent_0_fft)
# create new complex FFT using weighted mix of phases
chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]]
chan_weights_magnitude = [w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]]
mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
for i in range(4):
mixed_phase[:, i] = latent_0_phase[:,i] * chan_weights_phase[i]
mixed_magnitude[:, i] = latent_0_magnitude[:,i] * chan_weights_magnitude[i]
new_fft = mixed_magnitude * torch.exp(1j * mixed_phase)
# inverse FFT to convert back to spatial domain
mixed_phase_magnitude = torch.fft.ifft2(new_fft).real
return mixed_phase_magnitude.to(dtype)
def main(self,
latent_0_batch, latent_0_normal, latent_out_normal,
phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure,
magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure,
phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None,
magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None
):
latent_0_batch = latent_0_batch["samples"].double()
batch_size = latent_0_batch.shape[0]
phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size)
phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size)
phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size)
phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size)
magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size)
magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size)
magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size)
magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size)
mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device)
if latent_0_normal == True:
latent_0_batch = latent_normalize_channels(latent_0_batch)
for i in range(batch_size):
mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1],
phase_luminositys[i].item(),
phase_cyan_reds[i].item(),
phase_lime_purples[i].item(),
phase_pattern_structures[i].item(),
magnitude_luminositys[i].item(),
magnitude_cyan_reds[i].item(),
magnitude_lime_purples[i].item(),
magnitude_pattern_structures[i].item()
)
if latent_out_normal == True:
mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude)
mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude
return ({"samples": mixed_phase_magnitude_batch}, )
class LatentPhaseMagnitudeOffset:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent_0_batch": ("LATENT",),
"phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"latent_0_normal": ("BOOLEAN", {"default": False}),
"latent_out_normal": ("BOOLEAN", {"default": False}),
},
"optional": {
"phase_luminositys": ("SIGMAS", ),
"phase_cyan_reds": ("SIGMAS", ),
"phase_lime_purples": ("SIGMAS", ),
"phase_pattern_structures": ("SIGMAS", ),
"magnitude_luminositys": ("SIGMAS", ),
"magnitude_cyan_reds": ("SIGMAS", ),
"magnitude_lime_purples": ("SIGMAS", ),
"magnitude_pattern_structures": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_repeat(latent, batch_size):
b, c, h, w = latent.shape
batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device)
for i in range(batch_size):
batch_latents[i] = latent
return batch_latents
@staticmethod
def mix_latent_phase_magnitude(latent_0,
phase_luminosity,
phase_cyan_red,
phase_lime_purple,
phase_pattern_structure,
magnitude_luminosity,
magnitude_cyan_red,
magnitude_lime_purple,
magnitude_pattern_structure
):
dtype = latent_0.dtype
# avoid big accuracy problems with fp32 FFT!
latent_0 = latent_0.double()
latent_0_fft = torch.fft.fft2(latent_0)
latent_0_phase = torch.angle(latent_0_fft)
latent_0_magnitude = torch.abs(latent_0_fft)
# create new complex FFT using a weighted mix of phases
chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]]
chan_weights_magnitude = [w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]]
mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
for i in range(4):
mixed_phase[:, i] = latent_0_phase[:,i] + chan_weights_phase[i]
mixed_magnitude[:, i] = latent_0_magnitude[:,i] + chan_weights_magnitude[i]
new_fft = mixed_magnitude * torch.exp(1j * mixed_phase)
# inverse FFT to convert back to spatial domain
mixed_phase_magnitude = torch.fft.ifft2(new_fft).real
return mixed_phase_magnitude.to(dtype)
def main(self,
latent_0_batch, latent_0_normal, latent_out_normal,
phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure,
magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure,
phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None,
magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None
):
latent_0_batch = latent_0_batch["samples"].double()
batch_size = latent_0_batch.shape[0]
phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size)
phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size)
phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size)
phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size)
magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size)
magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size)
magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size)
magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size)
mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device)
if latent_0_normal == True:
latent_0_batch = latent_normalize_channels(latent_0_batch)
for i in range(batch_size):
mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1],
phase_luminositys[i] .item(),
phase_cyan_reds[i] .item(),
phase_lime_purples[i] .item(),
phase_pattern_structures[i] .item(),
magnitude_luminositys[i] .item(),
magnitude_cyan_reds[i] .item(),
magnitude_lime_purples[i] .item(),
magnitude_pattern_structures[i].item()
)
if latent_out_normal == True:
mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude)
mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude
return ({"samples": mixed_phase_magnitude_batch}, )
class LatentPhaseMagnitudePower:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent_0_batch": ("LATENT",),
"phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"latent_0_normal": ("BOOLEAN", {"default": False}),
"latent_out_normal": ("BOOLEAN", {"default": False}),
},
"optional": {
"phase_luminositys": ("SIGMAS", ),
"phase_cyan_reds": ("SIGMAS", ),
"phase_lime_purples": ("SIGMAS", ),
"phase_pattern_structures": ("SIGMAS", ),
"magnitude_luminositys": ("SIGMAS", ),
"magnitude_cyan_reds": ("SIGMAS", ),
"magnitude_lime_purples": ("SIGMAS", ),
"magnitude_pattern_structures": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_repeat(latent, batch_size):
b, c, h, w = latent.shape
batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device)
for i in range(batch_size):
batch_latents[i] = latent
return batch_latents
@staticmethod
def mix_latent_phase_magnitude(latent_0,
phase_luminosity,
phase_cyan_red,
phase_lime_purple,
phase_pattern_structure,
magnitude_luminosity,
magnitude_cyan_red,
magnitude_lime_purple,
magnitude_pattern_structure
):
dtype = latent_0.dtype
# avoid big accuracy problems with fp32 FFT!
latent_0 = latent_0.double()
latent_0_fft = torch.fft.fft2(latent_0)
latent_0_phase = torch.angle(latent_0_fft)
latent_0_magnitude = torch.abs(latent_0_fft)
# create new complex FFT using a weighted mix of phases
chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]]
chan_weights_magnitude = [w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]]
mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device)
for i in range(4):
mixed_phase[:, i] = latent_0_phase[:,i] ** chan_weights_phase[i]
mixed_magnitude[:, i] = latent_0_magnitude[:,i] ** chan_weights_magnitude[i]
new_fft = mixed_magnitude * torch.exp(1j * mixed_phase)
# inverse FFT to convert back to spatial domain
mixed_phase_magnitude = torch.fft.ifft2(new_fft).real
return mixed_phase_magnitude.to(dtype)
def main(self,
latent_0_batch, latent_0_normal, latent_out_normal,
phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure,
magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure,
phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None,
magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None
):
latent_0_batch = latent_0_batch["samples"].double()
batch_size = latent_0_batch.shape[0]
phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size)
phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size)
phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size)
phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size)
magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size)
magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size)
magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size)
magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size)
mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device)
if latent_0_normal == True:
latent_0_batch = latent_normalize_channels(latent_0_batch)
for i in range(batch_size):
mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1],
phase_luminositys[i] .item(),
phase_cyan_reds[i] .item(),
phase_lime_purples[i] .item(),
phase_pattern_structures[i] .item(),
magnitude_luminositys[i] .item(),
magnitude_cyan_reds[i] .item(),
magnitude_lime_purples[i] .item(),
magnitude_pattern_structures[i].item()
)
if latent_out_normal == True:
mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude)
mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude
return ({"samples": mixed_phase_magnitude_batch}, )
class StableCascade_StageC_VAEEncode_Exact:
def __init__(self, device="cpu"):
self.device = device
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"image": ("IMAGE",),
"vae": ("VAE", ),
"width": ("INT", {"default": 24, "min": 1, "max": 1024, "step": 1}),
"height": ("INT", {"default": 24, "min": 1, "max": 1024, "step": 1}),
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("stage_c",)
FUNCTION = "generate"
CATEGORY = "RES4LYF/vae"
def generate(self, image, vae, width, height):
out_width = (width) * vae.downscale_ratio #downscale_ratio = 32
out_height = (height) * vae.downscale_ratio
#movedim(-1,1) goes from 1,1024,1024,3 to 1,3,1024,1024
s = comfy.utils.common_upscale(image.movedim(-1,1), out_width, out_height, "lanczos", "center").movedim(1,-1)
c_latent = vae.encode(s[:,:,:,:3]) #to slice off alpha channel?
return ({
"samples": c_latent,
},)
class StableCascade_StageC_VAEEncode_Exact_Tiled:
def __init__(self, device="cpu"):
self.device = device
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"image": ("IMAGE",),
"vae": ("VAE", ),
"tile_size": ("INT", {"default": 512, "min": 320, "max": 4096, "step": 64}),
"overlap": ("INT", {"default": 16, "min": 8, "max": 128, "step": 8}),
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("stage_c",)
FUNCTION = "generate"
CATEGORY = "RES4LYF/vae"
def generate(self, image, vae, tile_size, overlap):
upscale_amount = vae.downscale_ratio # downscale_ratio = 32
image = image.movedim(-1, 1) # bhwc -> bchw
encode_fn = lambda img: vae.encode(img.to(vae.device)).to("cpu")
c_latent = tiled_scale_multidim(image,
encode_fn,
tile = (tile_size // 8, tile_size // 8),
overlap = overlap,
upscale_amount = upscale_amount,
out_channels = 16,
output_device = self.device
)
return ({"samples": c_latent,},)
@torch.inference_mode()
def tiled_scale_multidim(samples,
function,
tile = (64, 64),
overlap = 8,
upscale_amount = 4,
out_channels = 3,
output_device = "cpu",
pbar = None
):
dims = len(tile)
output_shape = [samples.shape[0], out_channels] + list(map(lambda a: round(a * upscale_amount), samples.shape[2:]))
output = torch.zeros(output_shape, device=output_device)
for b in range(samples.shape[0]):
for it in itertools.product(*map(lambda a: range(0, a[0], a[1] - overlap), zip(samples.shape[2:], tile))):
s_in = samples[b:b+1]
upscaled = []
for d in range(dims):
pos = max(0, min(s_in.shape[d + 2] - overlap, it[d]))
l = min(tile[d], s_in.shape[d + 2] - pos)
s_in = s_in.narrow(d + 2, pos, l)
upscaled.append(round(pos * upscale_amount))
ps = function(s_in).to(output_device)
mask = torch.ones_like(ps)
feather = round(overlap * upscale_amount)
for t in range(feather):
for d in range(2, dims + 2):
mask.narrow(d, t, 1).mul_((1.0 / feather) * (t + 1))
mask.narrow(d, mask.shape[d] - 1 - t, 1).mul_((1.0 / feather) * (t + 1))
o = output[b:b+1]
for d in range(dims):
o = o.narrow(d + 2, upscaled[d], mask.shape[d + 2])
o.add_(ps * mask)
if pbar is not None:
pbar.update(1)
return output
class EmptyLatentImageCustom:
def __init__(self):
self.device = comfy.model_management.intermediate_device()
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"width": ("INT", {"default": 24, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
"height": ("INT", {"default": 24, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
"channels": (['4', '16'], {"default": '4'}),
"mode": (['sdxl', 'cascade_b', 'cascade_c', 'exact'], {"default": 'default'}),
"compression": ("INT", {"default": 42, "min": 4, "max": 128, "step": 1}),
"precision": (['fp16', 'fp32', 'fp64'], {"default": 'fp32'}),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "generate"
CATEGORY = "RES4LYF/latents"
def generate(self,
width,
height,
batch_size,
channels,
mode,
compression,
precision
):
c = int(channels)
ratio = 1
match mode:
case "sdxl":
ratio = 8
case "cascade_b":
ratio = 4
case "cascade_c":
ratio = compression
case "exact":
ratio = 1
dtype=torch.float32
match precision:
case "fp16":
dtype=torch.float16
case "fp32":
dtype=torch.float32
case "fp64":
dtype=torch.float64
latent = torch.zeros([batch_size,
c,
height // ratio,
width // ratio],
dtype=dtype,
device=self.device)
return ({"samples":latent}, )
class EmptyLatentImage64:
def __init__(self):
self.device = comfy.model_management.intermediate_device()
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"width": ("INT", {"default": 1024, "min": 16, "max": MAX_RESOLUTION, "step": 8}),
"height": ("INT", {"default": 1024, "min": 16, "max": MAX_RESOLUTION, "step": 8}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "generate"
CATEGORY = "RES4LYF/latents"
def generate(self, width, height, batch_size=1):
latent = torch.zeros([batch_size, 4, height // 8, width // 8], dtype=torch.float64, device=self.device)
return ({"samples":latent}, )
class LatentNoiseBatch_perlin:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {"required": {
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
"width": ("INT", {"default": 1024, "min": 8, "max": MAX_RESOLUTION, "step": 8}),
"height": ("INT", {"default": 1024, "min": 8, "max": MAX_RESOLUTION, "step": 8}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 256}),
"detail_level": ("FLOAT", {"default": 0, "min":-1, "max": 1.0, "step": 0.1}),
},
"optional": {
"details": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "create_noisy_latents_perlin"
CATEGORY = "RES4LYF/noise"
# found at https://gist.github.com/vadimkantorov/ac1b097753f217c5c11bc2ff396e0a57
# which was ported from https://github.com/pvigier/perlin-numpy/blob/master/perlin2d.py
def rand_perlin_2d(self, shape, res, fade = lambda t: 6*t**5 - 15*t**4 + 10*t**3):
delta = (res[0] / shape[0], res[1] / shape[1])
d = (shape[0] // res[0], shape[1] // res[1])
grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1])), dim = -1) % 1
angles = 2*math.pi*torch.rand(res[0]+1, res[1]+1)
gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim = -1)
tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1)
dot = lambda grad, shift: (torch.stack((grid[:shape[0],:shape[1],0] + shift[0], grid[:shape[0],:shape[1], 1] + shift[1] ), dim = -1) * grad[:shape[0], :shape[1]]).sum(dim = -1)
n00 = dot(tile_grads([0, -1], [0, -1]), [0, 0])
n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
n01 = dot(tile_grads([0, -1],[1, None]), [0, -1])
n11 = dot(tile_grads([1, None], [1, None]), [-1,-1])
t = fade(grid[:shape[0], :shape[1]])
return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1])
def rand_perlin_2d_octaves(self, shape, res, octaves=1, persistence=0.5):
noise = torch.zeros(shape)
frequency = 1
amplitude = 1
for _ in range(octaves):
noise += amplitude * self.rand_perlin_2d(shape, (frequency*res[0], frequency*res[1]))
frequency *= 2
amplitude *= persistence
noise = torch.remainder(torch.abs(noise)*1000000,11)/11
# noise = (torch.sin(torch.remainder(noise*1000000,83))+1)/2
return noise
def scale_tensor(self, x):
min_value = x.min()
max_value = x.max()
x = (x - min_value) / (max_value - min_value)
return x
def create_noisy_latents_perlin(self, seed, width, height, batch_size, detail_level, details=None):
if details is None:
details = torch.full((10000,), detail_level)
else:
details = detail_level * details
torch.manual_seed(seed)
noise = torch.zeros((batch_size, 4, height // 8, width // 8), dtype=torch.float32, device="cpu").cpu()
for i in range(batch_size):
for j in range(4):
noise_values = self.rand_perlin_2d_octaves((height // 8, width // 8), (1,1), 1, 1)
result = (1+details[i]/10)*torch.erfinv(2 * noise_values - 1) * (2 ** 0.5)
result = torch.clamp(result,-5,5)
noise[i, j, :, :] = result
return ({"samples": noise},)
class LatentNoiseBatch_gaussian_channels:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT",),
"mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"mean_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"mean_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"mean_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"mean_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"std": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"steps": ("INT", {"default": 0, "min": -10000, "max": 10000}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
},
"optional": {
"means": ("SIGMAS", ),
"mean_luminositys": ("SIGMAS", ),
"mean_cyan_reds": ("SIGMAS", ),
"mean_lime_purples": ("SIGMAS", ),
"mean_pattern_structures": ("SIGMAS", ),
"stds": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent",)
FUNCTION = "main"
CATEGORY = "RES4LYF/noise"
@staticmethod
def gaussian_noise_channels(x, mean_luminosity = -0.1, mean_cyan_red = 0.0, mean_lime_purple=0.0, mean_pattern_structure=0.0):
x = x.squeeze(0)
luminosity = x[0:1] + mean_luminosity
cyan_red = x[1:2] + mean_cyan_red
lime_purple = x[2:3] + mean_lime_purple
pattern_structure = x[3:4] + mean_pattern_structure
x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0)
return x
def main(self, latent, steps, seed,
mean, mean_luminosity, mean_cyan_red, mean_lime_purple, mean_pattern_structure, std,
means=None, mean_luminositys=None, mean_cyan_reds=None, mean_lime_purples=None, mean_pattern_structures=None, stds=None):
if steps == 0:
steps = len(means)
x = latent["samples"]
b, c, h, w = x.shape
noise_latents = torch.zeros([steps, 4, h, w], dtype=x.dtype, layout=x.layout, device=x.device)
noise_sampler = NOISE_GENERATOR_CLASSES.get('gaussian')(x=x, seed = seed)
means = initialize_or_scale(means , mean , steps)
mean_luminositys = initialize_or_scale(mean_luminositys , mean_luminosity , steps)
mean_cyan_reds = initialize_or_scale(mean_cyan_reds , mean_cyan_red , steps)
mean_lime_purples = initialize_or_scale(mean_lime_purples , mean_lime_purple , steps)
mean_pattern_structures = initialize_or_scale(mean_pattern_structures, mean_pattern_structure, steps)
stds = initialize_or_scale(stds, std, steps)
for i in range(steps):
noise = noise_sampler(mean=means[i].item(), std=stds[i].item())
noise = self.gaussian_noise_channels(noise, mean_luminositys[i].item(), mean_cyan_reds[i].item(), mean_lime_purples[i].item(), mean_pattern_structures[i].item())
noise_latents[i] = x + noise
return ({"samples": noise_latents}, )
class LatentNoiseBatch_gaussian:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT",),
"mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"std": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"steps": ("INT", {"default": 0, "min": -10000, "max": 10000}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
},
"optional": {
"means": ("SIGMAS", ),
"stds": ("SIGMAS", ),
"steps_": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/noise"
def main(self, latent, mean, std, steps, seed, means=None, stds=None, steps_=None):
if steps_ is not None:
steps = len(steps_)
means = initialize_or_scale(means, mean, steps)
stds = initialize_or_scale(stds, std, steps)
latent_samples = latent["samples"]
b, c, h, w = latent_samples.shape
noise_latents = torch.zeros([steps, c, h, w], dtype=latent_samples.dtype, layout=latent_samples.layout, device=latent_samples.device)
noise_sampler = NOISE_GENERATOR_CLASSES.get('gaussian')(x=latent_samples, seed = seed)
for i in range(steps):
noise_latents[i] = noise_sampler(mean=means[i].item(), std=stds[i].item())
return ({"samples": noise_latents}, )
class LatentNoiseBatch_fractal:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT",),
"alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}),
"k_flip": ("BOOLEAN", {"default": False}),
"steps": ("INT", {"default": 0, "min": -10000, "max": 10000}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
},
"optional": {
"alphas": ("SIGMAS", ),
"ks": ("SIGMAS", ),
"steps_": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/noise"
def main(self,
latent,
alpha,
k_flip,
steps,
seed = 42,
alphas = None,
ks = None,
sigmas_ = None,
steps_ = None
):
if steps_ is not None:
steps = len(steps_)
alphas = initialize_or_scale(alphas, alpha, steps)
k_flip = -1 if k_flip else 1
ks = initialize_or_scale(ks , k_flip, steps)
latent_samples = latent["samples"]
b, c, h, w = latent_samples.shape
noise_latents = torch.zeros([steps, c, h, w], dtype=latent_samples.dtype, layout=latent_samples.layout, device=latent_samples.device)
noise_sampler = NOISE_GENERATOR_CLASSES.get('fractal')(x=latent_samples, seed = seed)
for i in range(steps):
noise_latents[i] = noise_sampler(alpha=alphas[i].item(), k=ks[i].item(), scale=0.1)
return ({"samples": noise_latents}, )
class LatentBatch_channels:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT",),
"mode": (["offset", "multiply", "power"],),
"luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
},
"optional": {
"luminositys": ("SIGMAS", ),
"cyan_reds": ("SIGMAS", ),
"lime_purples": ("SIGMAS", ),
"pattern_structures": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_channels_multiply(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0):
luminosity = x[0:1] * luminosity
cyan_red = x[1:2] * cyan_red
lime_purple = x[2:3] * lime_purple
pattern_structure = x[3:4] * pattern_structure
x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0)
return x
@staticmethod
def latent_channels_offset(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0):
luminosity = x[0:1] + luminosity
cyan_red = x[1:2] + cyan_red
lime_purple = x[2:3] + lime_purple
pattern_structure = x[3:4] + pattern_structure
x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0)
return x
@staticmethod
def latent_channels_power(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0):
luminosity = x[0:1] ** luminosity
cyan_red = x[1:2] ** cyan_red
lime_purple = x[2:3] ** lime_purple
pattern_structure = x[3:4] ** pattern_structure
x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0)
return x
def main(self,
latent,
mode,
luminosity,
cyan_red,
lime_purple,
pattern_structure,
luminositys = None,
cyan_reds = None,
lime_purples = None,
pattern_structures = None):
x = latent["samples"]
b, c, h, w = x.shape
noise_latents = torch.zeros([b, c, h, w], dtype=x.dtype, layout=x.layout, device=x.device)
luminositys = initialize_or_scale(luminositys, luminosity, b)
cyan_reds = initialize_or_scale(cyan_reds, cyan_red, b)
lime_purples = initialize_or_scale(lime_purples, lime_purple, b)
pattern_structures = initialize_or_scale(pattern_structures, pattern_structure, b)
for i in range(b):
if mode == "offset":
noise = self.latent_channels_offset(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item())
elif mode == "multiply":
noise = self.latent_channels_multiply(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item())
elif mode == "power":
noise = self.latent_channels_power(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item())
noise_latents[i] = noise
return ({"samples": noise_latents}, )
class LatentBatch_channels_16:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT",),
"mode": (["offset", "multiply", "power"],),
"chan_1": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_2": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_3": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_4": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_5": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_6": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_7": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_8": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_9": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_10": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_11": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_12": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_13": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_14": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_15": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"chan_16": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
},
"optional": {
"chan_1s": ("SIGMAS", ),
"chan_2s": ("SIGMAS", ),
"chan_3s": ("SIGMAS", ),
"chan_4s": ("SIGMAS", ),
"chan_5s": ("SIGMAS", ),
"chan_6s": ("SIGMAS", ),
"chan_7s": ("SIGMAS", ),
"chan_8s": ("SIGMAS", ),
"chan_9s": ("SIGMAS", ),
"chan_10s": ("SIGMAS", ),
"chan_11s": ("SIGMAS", ),
"chan_12s": ("SIGMAS", ),
"chan_13s": ("SIGMAS", ),
"chan_14s": ("SIGMAS", ),
"chan_15s": ("SIGMAS", ),
"chan_16s": ("SIGMAS", ),
}
}
RETURN_TYPES = ("LATENT",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
@staticmethod
def latent_channels_multiply(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0):
chan_1 = x[0:1] * chan_1
chan_2 = x[1:2] * chan_2
chan_3 = x[2:3] * chan_3
chan_4 = x[3:4] * chan_4
chan_5 = x[4:5] * chan_5
chan_6 = x[5:6] * chan_6
chan_7 = x[6:7] * chan_7
chan_8 = x[7:8] * chan_8
chan_9 = x[8:9] * chan_9
chan_10 = x[9:10] * chan_10
chan_11 = x[10:11] * chan_11
chan_12 = x[11:12] * chan_12
chan_13 = x[12:13] * chan_13
chan_14 = x[13:14] * chan_14
chan_15 = x[14:15] * chan_15
chan_16 = x[15:16] * chan_16
x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0)
return x
@staticmethod
def latent_channels_offset(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0):
chan_1 = x[0:1] + chan_1
chan_2 = x[1:2] + chan_2
chan_3 = x[2:3] + chan_3
chan_4 = x[3:4] + chan_4
chan_5 = x[4:5] + chan_5
chan_6 = x[5:6] + chan_6
chan_7 = x[6:7] + chan_7
chan_8 = x[7:8] + chan_8
chan_9 = x[8:9] + chan_9
chan_10 = x[9:10] + chan_10
chan_11 = x[10:11] + chan_11
chan_12 = x[11:12] + chan_12
chan_13 = x[12:13] + chan_13
chan_14 = x[13:14] + chan_14
chan_15 = x[14:15] + chan_15
chan_16 = x[15:16] + chan_16
x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0)
return x
@staticmethod
def latent_channels_power(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0):
chan_1 = x[0:1] ** chan_1
chan_2 = x[1:2] ** chan_2
chan_3 = x[2:3] ** chan_3
chan_4 = x[3:4] ** chan_4
chan_5 = x[4:5] ** chan_5
chan_6 = x[5:6] ** chan_6
chan_7 = x[6:7] ** chan_7
chan_8 = x[7:8] ** chan_8
chan_9 = x[8:9] ** chan_9
chan_10 = x[9:10] ** chan_10
chan_11 = x[10:11] ** chan_11
chan_12 = x[11:12] ** chan_12
chan_13 = x[12:13] ** chan_13
chan_14 = x[13:14] ** chan_14
chan_15 = x[14:15] ** chan_15
chan_16 = x[15:16] ** chan_16
x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0)
return x
def main(self, latent, mode,
chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16,
chan_1s=None, chan_2s=None, chan_3s=None, chan_4s=None, chan_5s=None, chan_6s=None, chan_7s=None, chan_8s=None, chan_9s=None, chan_10s=None, chan_11s=None, chan_12s=None, chan_13s=None, chan_14s=None, chan_15s=None, chan_16s=None):
x = latent["samples"]
b, c, h, w = x.shape
noise_latents = torch.zeros([b, c, h, w], dtype=x.dtype, layout=x.layout, device=x.device)
chan_1s = initialize_or_scale(chan_1s, chan_1, b)
chan_2s = initialize_or_scale(chan_2s, chan_2, b)
chan_3s = initialize_or_scale(chan_3s, chan_3, b)
chan_4s = initialize_or_scale(chan_4s, chan_4, b)
chan_5s = initialize_or_scale(chan_5s, chan_5, b)
chan_6s = initialize_or_scale(chan_6s, chan_6, b)
chan_7s = initialize_or_scale(chan_7s, chan_7, b)
chan_8s = initialize_or_scale(chan_8s, chan_8, b)
chan_9s = initialize_or_scale(chan_9s, chan_9, b)
chan_10s = initialize_or_scale(chan_10s, chan_10, b)
chan_11s = initialize_or_scale(chan_11s, chan_11, b)
chan_12s = initialize_or_scale(chan_12s, chan_12, b)
chan_13s = initialize_or_scale(chan_13s, chan_13, b)
chan_14s = initialize_or_scale(chan_14s, chan_14, b)
chan_15s = initialize_or_scale(chan_15s, chan_15, b)
chan_16s = initialize_or_scale(chan_16s, chan_16, b)
for i in range(b):
if mode == "offset":
noise = self.latent_channels_offset(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item())
elif mode == "multiply":
noise = self.latent_channels_multiply(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item())
elif mode == "power":
noise = self.latent_channels_power(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item())
noise_latents[i] = noise
return ({"samples": noise_latents}, )
class latent_normalize_channels:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent": ("LATENT", ),
"mode": (["full", "channels"],),
"operation": (["normalize", "center", "standardize"],),
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("passthrough",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self, latent, mode, operation):
x = latent["samples"]
b, c, h, w = x.shape
if mode == "full":
if operation == "normalize":
x = (x - x.mean()) / x.std()
elif operation == "center":
x = x - x.mean()
elif operation == "standardize":
x = x / x.std()
elif mode == "channels":
if operation == "normalize":
for i in range(b):
for j in range(c):
x[i, j] = (x[i, j] - x[i, j].mean()) / x[i, j].std()
elif operation == "center":
for i in range(b):
for j in range(c):
x[i, j] = x[i, j] - x[i, j].mean()
elif operation == "standardize":
for i in range(b):
for j in range(c):
x[i, j] = x[i, j] / x[i, j].std()
return ({"samples": x},)
class latent_channelwise_match:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"model": ("MODEL",),
"latent_target": ("LATENT", ),
"latent_source": ("LATENT", ),
},
"optional": {
"mask_target": ("MASK", ),
"mask_source": ("MASK", ),
"extra_options": ("STRING", {"default": "", "multiline": True}),
}
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("latent_matched",)
FUNCTION = "main"
CATEGORY = "RES4LYF/latents"
def main(self,
model,
latent_target,
mask_target,
latent_source,
mask_source,
extra_options
):
#EO = ExtraOptions(extra_options)
dtype = latent_target['samples'].dtype
exclude_channels = get_extra_options_list(exclude_channels, -1, extra_options)
if extra_options_flag("disable_process_latent", extra_options):
x_target = latent_target['samples'].clone()
x_source = latent_source['samples'].clone()
else:
x_target = model.model.process_latent_in(latent_target['samples']).clone().to(torch.float64)
x_source = model.model.process_latent_in(latent_source['samples']).clone().to(torch.float64)
if mask_target is None:
mask_target = torch.ones_like(x_target)
else:
mask_target = mask_target.unsqueeze(1)
mask_target = mask_target.repeat(1, x_target.shape[1], 1, 1)
mask_target = F.interpolate(mask_target, size=(x_target.shape[2], x_target.shape[3]), mode='bilinear', align_corners=False)
mask_target = mask_target.to(x_target.dtype).to(x_target.device)
if mask_source is None:
mask_source = torch.ones_like(x_target)
else:
mask_source = mask_source.unsqueeze(1)
mask_source = mask_source.repeat(1, x_target.shape[1], 1, 1)
mask_source = F.interpolate(mask_source, size=(x_target.shape[2], x_target.shape[3]), mode='bilinear', align_corners=False)
mask_source = mask_source.to(x_target.dtype).to(x_target.device)
x_target_masked = x_target * ((mask_target==1)*mask_target)
x_target_masked_inv = x_target - x_target_masked
#x_source_masked = x_source * ((mask_source==1)*mask_source)
x_matched = torch.zeros_like(x_target)
for n in range(x_matched.shape[1]):
if n in exclude_channels:
x_matched[0][n] = x_target[0][n]
continue
x_target_masked_values = x_target[0][n][mask_target[0][n] == 1]
x_source_masked_values = x_source[0][n][mask_source[0][n] == 1]
x_target_masked_values_mean = x_target_masked_values.mean()
x_target_masked_values_std = x_target_masked_values.std()
x_target_masked_source_mean = x_source_masked_values.mean()
x_target_masked_source_std = x_source_masked_values.std()
x_target_mean = x_target.mean()
x_target_std = x_target.std()
x_source_mean = x_source.mean()
x_source_std = x_source.std()
#if re.search(r"\benable_std\b", extra_options) == None:
if not extra_options_flag("enable_std", extra_options):
x_target_std = x_target_masked_values_std = x_target_masked_source_std = 1
#if re.search(r"\bdisable_mean\b", extra_options):
if extra_options_flag("disable_mean", extra_options):
x_target_mean = x_target_masked_values_mean = x_target_masked_source_mean = 1
#if re.search(r"\bdisable_masks\b", extra_options):
if extra_options_flag("disable_masks", extra_options):
x_matched[0][n] = (x_target[0][n] - x_target_mean) / x_target_std
x_matched[0][n] = (x_matched[0][n] * x_source_std) + x_source_mean
else:
x_matched[0][n] = (x_target_masked[0][n] - x_target_masked_values_mean) / x_target_masked_values_std
x_matched[0][n] = (x_matched[0][n] * x_target_masked_source_std) + x_target_masked_source_mean
x_matched[0][n] = x_target_masked_inv[0][n] + x_matched[0][n] * ((mask_target[0][n]==1)*mask_target[0][n])
#if re.search(r"\bdisable_process_latent\b", extra_options) == None:
if not extra_options_flag("disable_process_latent", extra_options):
x_matched = model.model.process_latent_out(x_matched).clone()
return ({"samples": x_matched.to(dtype)}, )
================================================
FILE: nodes_misc.py
================================================
import folder_paths
import os
import random
class SetImageSize:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"width" : ("INT", {"default": 1024, "min": 1, "max": 10000}),
"height": ("INT", {"default": 1024, "min": 1, "max": 10000}),
},
"optional":
{
}
}
RETURN_TYPES = ("INT", "INT",)
RETURN_NAMES = ("width","height",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
DESCRIPTION = "Generate a pair of integers for image sizes."
def main(self, width, height):
return (width, height,)
class SetImageSizeWithScale:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"width" : ("INT", {"default": 1024, "min": 1, "max": 10000}),
"height": ("INT", {"default": 1024, "min": 1, "max": 10000}),
"scale_by": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10000, "step":0.01}),
},
"optional":
{
}
}
RETURN_TYPES = ("INT", "INT", "INT", "INT",)
RETURN_NAMES = ("width","height","width_scaled","height_scaled",)
FUNCTION = "main"
CATEGORY = "RES4LYF/images"
DESCRIPTION = "Generate a pair of integers for image sizes."
def main(self, width, height, scale_by):
return (width, height, int(width*scale_by), int(height*scale_by))
class TextBox1:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"text1": ("STRING", {"default": "", "multiline": True}),
},
"optional":
{
}
}
RETURN_TYPES = ("STRING",)
RETURN_NAMES = ("text1",)
FUNCTION = "main"
CATEGORY = "RES4LYF/text"
DESCRIPTION = "Multiline textbox."
def main(self, text1):
return (text1,)
class TextBox2:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"text1": ("STRING", {"default": "", "multiline": True}),
"text2": ("STRING", {"default": "", "multiline": True}),
},
"optional":
{
}
}
RETURN_TYPES = ("STRING", "STRING",)
RETURN_NAMES = ("text1", "text2",)
FUNCTION = "main"
CATEGORY = "RES4LYF/text"
DESCRIPTION = "Multiline textbox."
def main(self, text1, text2,):
return (text1, text2,)
class TextBox3:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"text1": ("STRING", {"default": "", "multiline": True}),
"text2": ("STRING", {"default": "", "multiline": True}),
"text3": ("STRING", {"default": "", "multiline": True}),
},
"optional":
{
}
}
RETURN_TYPES = ("STRING", "STRING","STRING",)
RETURN_NAMES = ("text1", "text2", "text3",)
FUNCTION = "main"
CATEGORY = "RES4LYF/text"
DESCRIPTION = "Multiline textbox."
def main(self, text1, text2, text3 ):
return (text1, text2, text3, )
class TextLoadFile:
@classmethod
def INPUT_TYPES(cls):
input_dir = folder_paths.get_input_directory()
files = [f for f in os.listdir(input_dir)
if os.path.isfile(os.path.join(input_dir, f)) and f.lower().endswith('.txt')]
return {
"required": {
"text_file": (sorted(files), {"text_upload": True})
}
}
RETURN_TYPES = ("STRING",)
RETURN_NAMES = ("text",)
FUNCTION = "main"
CATEGORY = "RES4LYF/text"
def main(self, text_file):
input_dir = folder_paths.get_input_directory()
text_file_path = os.path.join(input_dir, text_file)
if not os.path.exists(text_file_path):
print(f"Error: The file `{text_file_path}` cannot be found.")
return ("",)
with open(text_file_path, "r", encoding="utf-8") as f:
text = f.read()
return (text,)
class TextShuffle:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"text": ("STRING", {"forceInput": True}),
"separator": ("STRING", {"default": " ", "multiline": False}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
},
"optional": {
}
}
RETURN_TYPES = ("STRING",)
RETURN_NAMES = ("shuffled_text",)
FUNCTION = "main"
CATEGORY = "RES4LYF/text"
def main(self, text, separator, seed, ):
if seed is not None:
random.seed(seed)
parts = text.split(separator)
random.shuffle(parts)
shuffled_text = separator.join(parts)
return (shuffled_text, )
def truncate_tokens(text, truncate_to, clip, clip_type, stop_token):
if truncate_to == 0:
return ""
truncate_words_to = truncate_to
total = truncate_to + 1
tokens = {}
while total > truncate_to:
words = text.split()
truncated_words = words[:truncate_words_to]
truncated_text = " ".join(truncated_words)
try:
tokens[clip_type] = clip.tokenize(truncated_text)[clip_type]
except:
return ""
if clip_type not in tokens:
return truncated_text
clip_end=0
for b in range(len(tokens[clip_type])):
for i in range(len(tokens[clip_type][b])):
clip_end += 1
if tokens[clip_type][b][i][0] == stop_token:
break
if clip_type == 'l' or clip_type == 'g':
clip_end -= 2
elif clip_type == 't5xxl':
clip_end -= 1
total = clip_end
truncate_words_to -= 1
return truncated_text
class TextShuffleAndTruncate:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"text": ("STRING", {"forceInput": True}),
"separator": ("STRING", {"default": " ", "multiline": False}),
"truncate_words_to": ("INT", {"default": 77, "min": 1, "max": 10000}),
"truncate_tokens_to": ("INT", {"default": 77, "min": 1, "max": 10000}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
},
"optional": {
"clip": ("CLIP", ),
}
}
RETURN_TYPES = ("STRING","STRING","STRING","STRING","STRING",)
RETURN_NAMES = ("shuffled_text", "text_words", "text_clip_l", "text_clip_g", "text_t5",)
FUNCTION = "main"
CATEGORY = "RES4LYF/text"
def main(self, text, separator, truncate_words_to, truncate_tokens_to, seed, clip=None):
if seed is not None:
random.seed(seed)
parts = text.split(separator)
random.shuffle(parts)
shuffled_text = separator.join(parts)
words = shuffled_text.split()
truncated_words = words[:truncate_words_to]
truncated_text = " ".join(truncated_words)
#t5_name = "t5xxl" if not hasattr(clip.tokenizer, "pile_t5xl") else "pile_t5xl"
t5_name = "t5xxl"
if hasattr(clip.tokenizer, "clip_name"):
t5_name = "t5xxl" if clip.tokenizer.clip_name != "pile_t5xl" else "pile_t5xl"
text_clip_l = truncate_tokens(truncated_text, truncate_tokens_to, clip, "l", 49407)
text_clip_g = truncate_tokens(truncated_text, truncate_tokens_to, clip, "g", 49407)
text_t5 = truncate_tokens(truncated_text, truncate_tokens_to, clip, t5_name, 1)
return (shuffled_text, truncated_text, text_clip_l, text_clip_g, text_t5,)
class TextTruncateTokens:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"text": ("STRING", {"forceInput": True}),
"truncate_words_to": ("INT", {"default": 30, "min": 0, "max": 10000}),
"truncate_clip_l_to": ("INT", {"default": 77, "min": 0, "max": 10000}),
"truncate_clip_g_to": ("INT", {"default": 77, "min": 0, "max": 10000}),
"truncate_t5_to": ("INT", {"default": 77, "min": 0, "max": 10000}),
},
"optional": {
"clip": ("CLIP", ),
}
}
RETURN_TYPES = ("STRING","STRING","STRING","STRING",)
RETURN_NAMES = ("text_words","text_clip_l","text_clip_g","text_t5",)
FUNCTION = "main"
CATEGORY = "RES4LYF/text"
def main(self, text, truncate_words_to, truncate_clip_l_to, truncate_clip_g_to, truncate_t5_to, clip=None):
words = text.split()
truncated_words = words[:truncate_words_to]
truncated_text = " ".join(truncated_words)
#t5_name = "t5xxl" if not hasattr(clip.tokenizer, "pile_t5xl") else "pile_t5xl"
t5_name = "t5xxl"
if hasattr(clip.tokenizer, "clip_name"):
t5_name = "t5xxl" if clip.tokenizer.clip_name != "pile_t5xl" else "pile_t5xl"
if clip is not None:
text_clip_l = truncate_tokens(text, truncate_clip_l_to, clip, "l", 49407)
text_clip_g = truncate_tokens(text, truncate_clip_g_to, clip, "g", 49407)
text_t5 = truncate_tokens(truncated_text, truncate_t5_to, clip, t5_name, 1)
else:
text_clip_l = None
text_clip_g = None
text_t5 = None
return (truncated_text, text_clip_l, text_clip_g, text_t5,)
class TextConcatenate:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
},
"optional": {
"text_1": ("STRING", {"multiline": False, "default": "", "forceInput": True}),
"text_2": ("STRING", {"multiline": False, "default": "", "forceInput": True}),
"separator": ("STRING", {"multiline": False, "default": ""}),
},
}
RETURN_TYPES = ("STRING",)
RETURN_NAMES = ("text",)
FUNCTION = "main"
CATEGORY = "RES4LYF/text"
def main(self, text_1="", text_2="", separator=""):
return (text_1 + separator + text_2, )
class TextBoxConcatenate:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"text": ("STRING", {"default": "", "multiline": True}),
},
"optional": {
"text_external": ("STRING", {"multiline": False, "default": "", "forceInput": True}),
"separator": ("STRING", {"multiline": False, "default": ""}),
"mode": (['append_external_input', 'prepend_external_input',],),
},
}
RETURN_TYPES = ("STRING",)
RETURN_NAMES = ("text",)
FUNCTION = "main"
CATEGORY = "RES4LYF/text"
DESCRIPTION = "Multiline textbox with concatenate functionality."
def main(self, text="", text_external="", separator="", mode="append_external_input"):
if mode == "append_external_input":
text = text + separator + text_external
elif mode == "prepend_external_input":
text = text_external + separator + text
return (text, )
class SeedGenerator:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
},
"optional": {
}
}
RETURN_TYPES = ("INT", "INT",)
RETURN_NAMES = ("seed", "seed+1",)
FUNCTION = "main"
CATEGORY = "RES4LYF/utilities"
def main(self, seed,):
return (seed, seed+1,)
================================================
FILE: nodes_precision.py
================================================
import torch
from .helper import precision_tool
class set_precision:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent_image": ("LATENT", ),
"precision": (["16", "32", "64"], ),
"set_default": ("BOOLEAN", {"default": False})
},
}
RETURN_TYPES = ("LATENT",)
RETURN_NAMES = ("passthrough",)
FUNCTION = "main"
CATEGORY = "RES4LYF/precision"
def main(self,
precision = "32",
latent_image = None,
set_default = False
):
match precision:
case "16":
if set_default is True:
torch.set_default_dtype(torch.float16)
x = latent_image["samples"].to(torch.float16)
case "32":
if set_default is True:
torch.set_default_dtype(torch.float32)
x = latent_image["samples"].to(torch.float32)
case "64":
if set_default is True:
torch.set_default_dtype(torch.float64)
x = latent_image["samples"].to(torch.float64)
return ({"samples": x}, )
class set_precision_universal:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"precision": (["bf16", "fp16", "fp32", "fp64", "passthrough"], {"default": "fp32"}),
"set_default": ("BOOLEAN", {"default": False})
},
"optional": {
"cond_pos": ("CONDITIONING",),
"cond_neg": ("CONDITIONING",),
"sigmas": ("SIGMAS", ),
"latent_image": ("LATENT", ),
},
}
RETURN_TYPES = ("CONDITIONING",
"CONDITIONING",
"SIGMAS",
"LATENT",)
RETURN_NAMES = ("cond_pos",
"cond_neg",
"sigmas",
"latent_image",)
FUNCTION = "main"
CATEGORY = "RES4LYF/precision"
def main(self,
precision = "fp32",
cond_pos = None,
cond_neg = None,
sigmas = None,
latent_image = None,
set_default = False
):
dtype = None
match precision:
case "bf16":
dtype = torch.bfloat16
case "fp16":
dtype = torch.float16
case "fp32":
dtype = torch.float32
case "fp64":
dtype = torch.float64
case "passthrough":
return (cond_pos, cond_neg, sigmas, latent_image, )
if cond_pos is not None:
cond_pos[0][0] = cond_pos[0][0].clone().to(dtype)
cond_pos[0][1]["pooled_output"] = cond_pos[0][1]["pooled_output"].clone().to(dtype)
if cond_neg is not None:
cond_neg[0][0] = cond_neg[0][0].clone().to(dtype)
cond_neg[0][1]["pooled_output"] = cond_neg[0][1]["pooled_output"].clone().to(dtype)
if sigmas is not None:
sigmas = sigmas.clone().to(dtype)
if latent_image is not None:
x = latent_image["samples"].clone().to(dtype)
latent_image = {"samples": x}
if set_default is True:
torch.set_default_dtype(dtype)
return (cond_pos, cond_neg, sigmas, latent_image, )
class set_precision_advanced:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"latent_image": ("LATENT", ),
"global_precision": (["64", "32", "16"], ),
"shark_precision": (["64", "32", "16"], ),
},
}
RETURN_TYPES = ("LATENT","LATENT","LATENT","LATENT","LATENT",)
RETURN_NAMES = ("passthrough",
"latent_cast_to_global",
"latent_16",
"latent_32",
"latent_64",
)
FUNCTION = "main"
CATEGORY = "RES4LYF/precision"
def main(self,
global_precision = "32",
shark_precision = "64",
latent_image = None
):
dtype_map = {
"16": torch.float16,
"32": torch.float32,
"64": torch.float64
}
precision_map = {
"16": 'fp16',
"32": 'fp32',
"64": 'fp64'
}
torch.set_default_dtype(dtype_map[global_precision])
precision_tool.set_cast_type(precision_map[shark_precision])
latent_passthrough = latent_image["samples"]
latent_out16 = latent_image["samples"].to(torch.float16)
latent_out32 = latent_image["samples"].to(torch.float32)
latent_out64 = latent_image["samples"].to(torch.float64)
target_dtype = dtype_map[global_precision]
if latent_image["samples"].dtype != target_dtype:
latent_image["samples"] = latent_image["samples"].to(target_dtype)
latent_cast_to_global = latent_image["samples"]
return ({"samples": latent_passthrough},
{"samples": latent_cast_to_global},
{"samples": latent_out16},
{"samples": latent_out32},
{"samples": latent_out64}
)
================================================
FILE: requirements.txt
================================================
opencv-python
matplotlib
pywavelets
numpy>=1.26.4
================================================
FILE: res4lyf.py
================================================
# Code adapted from https://github.com/pythongosssss/ComfyUI-Custom-Scripts
import asyncio
import os
import json
import shutil
import inspect
import aiohttp
import math
import comfy.model_sampling
import comfy.samplers
from aiohttp import web
from server import PromptServer
from tqdm import tqdm
CONFIG_FILE_NAME = "res4lyf.config.json"
DEFAULT_CONFIG_FILE_NAME = "web/js/res4lyf.default.json"
config = None
using_RES4LYF_time_snr_shift = False
original_time_snr_shift = comfy.model_sampling.time_snr_shift
def time_snr_shift_RES4LYF(alpha, t):
if using_RES4LYF_time_snr_shift and get_config_value("updatedTimestepScaling", False):
out = math.exp(alpha) / (math.exp(alpha) + (1 / t - 1) ** 1.0)
else:
out = original_time_snr_shift(alpha, t)
return out
display_sampler_category = False
def get_display_sampler_category():
global display_sampler_category
return display_sampler_category
@PromptServer.instance.routes.post("/reslyf/settings")
async def update_settings(request):
try:
json_data = await request.json()
setting = json_data.get("setting")
value = json_data.get("value")
if setting:
save_config_value(setting, value)
if setting == "updatedTimestepScaling":
global using_RES4LYF_time_snr_shift
using_RES4LYF_time_snr_shift = value
if ( using_RES4LYF_time_snr_shift is True ):
RESplain("Using RES4LYF time SNR shift")
else:
RESplain("Disabled RES4LYF time SNR shift")
elif setting == "displayCategory":
global display_sampler_category
display_sampler_category = value
if ( display_sampler_category is True ):
RESplain("Displaying sampler category", debug=True)
else:
RESplain("Not displaying sampler category", debug=True)
return web.Response(status=200)
except Exception as e:
return web.Response(status=500, text=str(e))
@PromptServer.instance.routes.post("/reslyf/log")
async def log_message(request):
try:
json_data = await request.json()
log_text = json_data.get("log")
if log_text:
RESplain(log_text, debug=True)
return web.Response(status=200)
else:
return web.Response(status=400, text="No log text provided")
except Exception as e:
return web.Response(status=500, text=str(e))
original_calculate_sigmas = comfy.samplers.calculate_sigmas
def calculate_sigmas_RES4LYF(model_sampling, scheduler_name, steps):
if scheduler_name == "beta57":
sigmas = comfy.samplers.beta_scheduler(model_sampling, steps, alpha=0.5, beta=0.7)
else:
return original_calculate_sigmas(model_sampling, scheduler_name, steps)
return sigmas
def init(check_imports=None):
RESplain("Init")
# initialize display category
global display_sampler_category
display_sampler_category = get_config_value("displayCategory", False)
if ( display_sampler_category is True ):
RESplain("Displaying sampler category", debug=True)
# Initialize using_RES4LYF_time_snr_shift from config (deprecated, disabled by default)
global using_RES4LYF_time_snr_shift
using_RES4LYF_time_snr_shift = get_config_value("updatedTimestepScaling", False)
if using_RES4LYF_time_snr_shift:
comfy.model_sampling.time_snr_shift = time_snr_shift_RES4LYF
RESplain("Using RES4LYF time SNR shift but this is deprecated and will be disabled at some completely unpredictable point in the future")
# monkey patch comfy.samplers.calculate_sigmas with custom implementation
comfy.samplers.calculate_sigmas = calculate_sigmas_RES4LYF
if "beta57" not in comfy.samplers.SCHEDULER_NAMES:
comfy.samplers.SCHEDULER_NAMES = comfy.samplers.SCHEDULER_NAMES + ["beta57"]
if "beta57" not in comfy.samplers.KSampler.SCHEDULERS:
comfy.samplers.KSampler.SCHEDULERS = comfy.samplers.KSampler.SCHEDULERS + ["beta57"]
return True
def save_config_value(key, value):
config = get_extension_config()
keys = key.split(".")
d = config
for k in keys[:-1]:
if k not in d:
d[k] = {}
d = d[k]
d[keys[-1]] = value
config_path = get_ext_dir(CONFIG_FILE_NAME)
with open(config_path, "w") as f:
json.dump(config, f, indent=4)
def get_config_value(key, default=None, throw=False):
config = get_extension_config()
keys = key.split(".")
d = config
for k in keys[:-1]:
if k not in d:
if throw:
raise KeyError("Configuration key missing: " + key)
else:
return default
d = d[k]
return d.get(keys[-1], default)
def is_debug_logging_enabled():
logging_enabled = get_config_value("enableDebugLogs", False)
return logging_enabled
def RESplain(*args, debug='info'):
if isinstance(debug, bool):
type = 'debug' if debug else 'info'
else:
type = debug
if type == 'debug' and not is_debug_logging_enabled():
return
if not args:
return
name = get_extension_config()["name"]
message = " ".join(map(str, args))
if type != 'debug' and type != 'warning':
print(f"({name}) {message}")
else:
print(f"({name} {type}) {message}")
def get_ext_dir(subpath=None, mkdir=False):
dir = os.path.dirname(__file__)
if subpath is not None:
dir = os.path.join(dir, subpath)
dir = os.path.abspath(dir)
if mkdir and not os.path.exists(dir):
os.makedirs(dir)
return dir
def merge_default_config(config, default_config):
for key, value in default_config.items():
if key not in config:
config[key] = value
elif isinstance(value, dict):
config[key] = merge_default_config(config.get(key, {}), value)
return config
def get_extension_config(reload=False):
global config
if not reload and config is not None:
return config
config_path = get_ext_dir(CONFIG_FILE_NAME)
default_config_path = get_ext_dir(DEFAULT_CONFIG_FILE_NAME)
if os.path.exists(default_config_path):
with open(default_config_path, "r") as f:
default_config = json.loads(f.read())
else:
default_config = {}
if not os.path.exists(config_path):
config = default_config
with open(config_path, "w") as f:
json.dump(config, f, indent=4)
else:
with open(config_path, "r") as f:
config = json.loads(f.read())
config = merge_default_config(config, default_config)
with open(config_path, "w") as f:
json.dump(config, f, indent=4)
return config
def get_comfy_dir(subpath=None, mkdir=False):
dir = os.path.dirname(inspect.getfile(PromptServer))
if subpath is not None:
dir = os.path.join(dir, subpath)
dir = os.path.abspath(dir)
if mkdir and not os.path.exists(dir):
os.makedirs(dir)
return dir
def get_web_ext_dir():
config = get_extension_config()
name = config["name"]
dir = get_comfy_dir("web/extensions/res4lyf")
if not os.path.exists(dir):
os.makedirs(dir)
dir = os.path.join(dir, name)
return dir
def link_js(src, dst):
src = os.path.abspath(src)
dst = os.path.abspath(dst)
if os.name == "nt":
try:
import _winapi
_winapi.CreateJunction(src, dst)
return True
except:
pass
try:
os.symlink(src, dst)
return True
except:
import logging
logging.exception('')
return False
def is_junction(path):
if os.name != "nt":
return False
try:
return bool(os.readlink(path))
except OSError:
return False
def install_js():
src_dir = get_ext_dir("web/js")
if not os.path.exists(src_dir):
RESplain("No JS")
return
should_install = should_install_js()
if should_install:
RESplain("it looks like you're running an old version of ComfyUI that requires manual setup of web files, it is recommended you update your installation.", "warning", True)
dst_dir = get_web_ext_dir()
linked = os.path.islink(dst_dir) or is_junction(dst_dir)
if linked or os.path.exists(dst_dir):
if linked:
if should_install:
RESplain("JS already linked")
else:
os.unlink(dst_dir)
RESplain("JS unlinked, PromptServer will serve extension")
elif not should_install:
shutil.rmtree(dst_dir)
RESplain("JS deleted, PromptServer will serve extension")
return
if not should_install:
RESplain("JS skipped, PromptServer will serve extension")
return
if link_js(src_dir, dst_dir):
RESplain("JS linked")
return
RESplain("Copying JS files")
shutil.copytree(src_dir, dst_dir, dirs_exist_ok=True)
def should_install_js():
return not hasattr(PromptServer.instance, "supports") or "custom_nodes_from_web" not in PromptServer.instance.supports
def get_async_loop():
loop = None
try:
loop = asyncio.get_event_loop()
except:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
return loop
def get_http_session():
loop = get_async_loop()
return aiohttp.ClientSession(loop=loop)
async def download(url, stream, update_callback=None, session=None):
close_session = False
if session is None:
close_session = True
session = get_http_session()
try:
async with session.get(url) as response:
size = int(response.headers.get('content-length', 0)) or None
with tqdm(
unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1], total=size,
) as progressbar:
perc = 0
async for chunk in response.content.iter_chunked(2048):
stream.write(chunk)
progressbar.update(len(chunk))
if update_callback is not None and progressbar.total is not None and progressbar.total != 0:
last = perc
perc = round(progressbar.n / progressbar.total, 2)
if perc != last:
last = perc
await update_callback(perc)
finally:
if close_session and session is not None:
await session.close()
async def download_to_file(url, destination, update_callback=None, is_ext_subpath=True, session=None):
if is_ext_subpath:
destination = get_ext_dir(destination)
with open(destination, mode='wb') as f:
download(url, f, update_callback, session)
def wait_for_async(async_fn, loop=None):
res = []
async def run_async():
r = await async_fn()
res.append(r)
if loop is None:
try:
loop = asyncio.get_event_loop()
except:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(run_async())
return res[0]
def update_node_status(client_id, node, text, progress=None):
if client_id is None:
client_id = PromptServer.instance.client_id
if client_id is None:
return
PromptServer.instance.send_sync("res4lyf/update_status", {
"node": node,
"progress": progress,
"text": text
}, client_id)
async def update_node_status_async(client_id, node, text, progress=None):
if client_id is None:
client_id = PromptServer.instance.client_id
if client_id is None:
return
await PromptServer.instance.send("res4lyf/update_status", {
"node": node,
"progress": progress,
"text": text
}, client_id)
def get_config_value(key, default=None, throw=False):
split = key.split(".")
obj = get_extension_config()
for s in split:
if s in obj:
obj = obj[s]
else:
if throw:
raise KeyError("Configuration key missing: " + key)
else:
return default
return obj
def is_inside_dir(root_dir, check_path):
root_dir = os.path.abspath(root_dir)
if not os.path.isabs(check_path):
check_path = os.path.abspath(os.path.join(root_dir, check_path))
return os.path.commonpath([check_path, root_dir]) == root_dir
def get_child_dir(root_dir, child_path, throw_if_outside=True):
child_path = os.path.abspath(os.path.join(root_dir, child_path))
if is_inside_dir(root_dir, child_path):
return child_path
if throw_if_outside:
raise NotADirectoryError(
"Saving outside the target folder is not allowed.")
return None
================================================
FILE: rk_method_beta.py
================================================
import torch
from torch import Tensor
from typing import Optional, Callable, Tuple, List, Dict, Any, Union
import comfy.model_patcher
import comfy.supported_models
import itertools
from .phi_functions import Phi
from .rk_coefficients_beta import get_implicit_sampler_name_list, get_rk_methods_beta
from ..helper import ExtraOptions
from ..latents import get_orthogonal, get_collinear, get_cosine_similarity, tile_latent, untile_latent
from ..res4lyf import RESplain
MAX_STEPS = 10000
def get_data_from_step (x:Tensor, x_next:Tensor, sigma:Tensor, sigma_next:Tensor) -> Tensor:
h = sigma_next - sigma
return (sigma_next * x - sigma * x_next) / h
def get_epsilon_from_step(x:Tensor, x_next:Tensor, sigma:Tensor, sigma_next:Tensor) -> Tensor:
h = sigma_next - sigma
return (x - x_next) / h
class RK_Method_Beta:
def __init__(self,
model,
rk_type : str,
noise_anchor : float,
noise_boost_normalize : bool = True,
model_device : str = 'cuda',
work_device : str = 'cpu',
dtype : torch.dtype = torch.float64,
extra_options : str = ""
):
self.work_device = work_device
self.model_device = model_device
self.dtype : torch.dtype = dtype
self.model = model
if hasattr(model, "model"):
model_sampling = model.model.model_sampling
elif hasattr(model, "inner_model"):
model_sampling = model.inner_model.inner_model.model_sampling
self.sigma_min : Tensor = model_sampling.sigma_min.to(dtype=dtype, device=work_device)
self.sigma_max : Tensor = model_sampling.sigma_max.to(dtype=dtype, device=work_device)
self.rk_type : str = rk_type
self.IMPLICIT : str = rk_type in get_implicit_sampler_name_list(nameOnly=True)
self.EXPONENTIAL : bool = RK_Method_Beta.is_exponential(rk_type)
self.SYNC_SUBSTEP_MEAN_CW : bool = noise_boost_normalize
self.A : Optional[Tensor] = None
self.B : Optional[Tensor] = None
self.U : Optional[Tensor] = None
self.V : Optional[Tensor] = None
self.rows : int = 0
self.cols : int = 0
self.denoised : Optional[Tensor] = None
self.uncond : Optional[Tensor] = None
self.y0 : Optional[Tensor] = None
self.y0_inv : Optional[Tensor] = None
self.multistep_stages : int = 0
self.row_offset : Optional[int] = None
self.cfg_cw : float = 1.0
self.extra_args : Optional[Dict[str, Any]] = None
self.extra_options : str = extra_options
self.EO : ExtraOptions = ExtraOptions(extra_options)
self.reorder_tableau_indices : list[int] = self.EO("reorder_tableau_indices", [-1])
self.LINEAR_ANCHOR_X_0 : float = noise_anchor
self.tile_sizes : Optional[List[Tuple[int,int]]] = None
self.tile_cnt : int = 0
self.latent_compression_ratio : int = 8
@staticmethod
def is_exponential(rk_type:str) -> bool:
if rk_type.startswith(( "res",
"dpmpp",
"ddim",
"pec",
"etdrk",
"lawson",
"abnorsett",
)):
return True
else:
return False
@staticmethod
def create(model,
rk_type : str,
noise_anchor : float = 1.0,
noise_boost_normalize : bool = True,
model_device : str = 'cuda',
work_device : str = 'cpu',
dtype : torch.dtype = torch.float64,
extra_options : str = ""
) -> "Union[RK_Method_Exponential, RK_Method_Linear]":
if RK_Method_Beta.is_exponential(rk_type):
return RK_Method_Exponential(model, rk_type, noise_anchor, noise_boost_normalize, model_device, work_device, dtype, extra_options)
else:
return RK_Method_Linear (model, rk_type, noise_anchor, noise_boost_normalize, model_device, work_device, dtype, extra_options)
def __call__(self):
raise NotImplementedError("This method got clownsharked!")
def model_epsilon(self, x:Tensor, sigma:Tensor, **extra_args) -> Tuple[Tensor, Tensor]:
s_in = x.new_ones([x.shape[0]])
denoised = self.model(x, sigma * s_in, **extra_args)
denoised = self.calc_cfg_channelwise(denoised)
eps = (x - denoised) / (sigma * s_in).view(x.shape[0], 1, 1, 1) #return x0 ###################################THIS WORKS ONLY WITH THE MODEL SAMPLING PATCH
return eps, denoised
def model_denoised(self, x:Tensor, sigma:Tensor, **extra_args) -> Tensor:
s_in = x.new_ones([x.shape[0]])
control_tiles = None
y0_style_pos = self.extra_args['model_options']['transformer_options'].get("y0_style_pos")
y0_style_neg = self.extra_args['model_options']['transformer_options'].get("y0_style_neg")
y0_style_pos_tile, sy0_style_neg_tiles = None, None
if self.EO("tile_model_calls"):
tile_h = self.EO("tile_h", 128)
tile_w = self.EO("tile_w", 128)
denoised_tiles = []
tiles, orig_shape, grid, strides = tile_latent(x, tile_size=(tile_h,tile_w))
for i in range(tiles.shape[0]):
tile = tiles[i].unsqueeze(0)
denoised_tile = self.model(tile, sigma * s_in, **extra_args)
denoised_tiles.append(denoised_tile)
denoised_tiles = torch.cat(denoised_tiles, dim=0)
denoised = untile_latent(denoised_tiles, orig_shape, grid, strides)
elif self.tile_sizes is not None:
tile_h_full = self.tile_sizes[self.tile_cnt % len(self.tile_sizes)][0]
tile_w_full = self.tile_sizes[self.tile_cnt % len(self.tile_sizes)][1]
if tile_h_full == -1:
tile_h = x.shape[-2]
tile_h_full = tile_h * self.latent_compression_ratio
else:
tile_h = tile_h_full // self.latent_compression_ratio
if tile_w_full == -1:
tile_w = x.shape[-1]
tile_w_full = tile_w * self.latent_compression_ratio
else:
tile_w = tile_w_full // self.latent_compression_ratio
#tile_h = tile_h_full // self.latent_compression_ratio
#tile_w = tile_w_full // self.latent_compression_ratio
self.tile_cnt += 1
#if len(self.tile_sizes) == 1 and self.tile_cnt % 2 == 1:
# tile_h, tile_w = tile_w, tile_h
# tile_h_full, tile_w_full = tile_w_full, tile_h_full
if (self.tile_cnt // len(self.tile_sizes)) % 2 == 1 and self.EO("tiles_autorotate"):
tile_h, tile_w = tile_w, tile_h
tile_h_full, tile_w_full = tile_w_full, tile_h_full
xt_negative = self.model.inner_model.conds.get('xt_negative', self.model.inner_model.conds.get('negative'))
negative_control = xt_negative[0].get('control')
if negative_control is not None and hasattr(negative_control, 'cond_hint_original'):
negative_cond_hint_init = negative_control.cond_hint.clone() if negative_control.cond_hint is not None else None
xt_positive = self.model.inner_model.conds.get('xt_positive', self.model.inner_model.conds.get('positive'))
positive_control = xt_positive[0].get('control')
if positive_control is not None and hasattr(positive_control, 'cond_hint_original'):
positive_cond_hint_init = positive_control.cond_hint.clone() if positive_control.cond_hint is not None else None
if positive_control.cond_hint_original.shape[-1] != x.shape[-2] * self.latent_compression_ratio or positive_control.cond_hint_original.shape[-2] != x.shape[-1] * self.latent_compression_ratio:
positive_control_pretile = comfy.utils.bislerp(positive_control.cond_hint_original.clone().to(torch.float16).to('cuda'), x.shape[-1] * self.latent_compression_ratio, x.shape[-2] * self.latent_compression_ratio)
positive_control.cond_hint_original = positive_control_pretile.to(positive_control.cond_hint_original)
positive_control_pretile = positive_control.cond_hint_original.clone().to(torch.float16).to('cuda')
control_tiles, control_orig_shape, control_grid, control_strides = tile_latent(positive_control_pretile, tile_size=(tile_h_full,tile_w_full))
control_tiles = control_tiles
denoised_tiles = []
tiles, orig_shape, grid, strides = tile_latent(x, tile_size=(tile_h,tile_w))
if y0_style_pos is not None:
y0_style_pos_tiles, _, _, _ = tile_latent(y0_style_pos, tile_size=(tile_h,tile_w))
if y0_style_neg is not None:
y0_style_neg_tiles, _, _, _ = tile_latent(y0_style_neg, tile_size=(tile_h,tile_w))
for i in range(tiles.shape[0]):
tile = tiles[i].unsqueeze(0)
if control_tiles is not None:
positive_control.cond_hint = control_tiles[i].unsqueeze(0).to(positive_control.cond_hint)
if negative_control is not None:
negative_control.cond_hint = control_tiles[i].unsqueeze(0).to(positive_control.cond_hint)
if y0_style_pos is not None:
self.extra_args['model_options']['transformer_options']['y0_style_pos'] = y0_style_pos_tiles[i].unsqueeze(0)
if y0_style_neg is not None:
self.extra_args['model_options']['transformer_options']['y0_style_neg'] = y0_style_neg_tiles[i].unsqueeze(0)
denoised_tile = self.model(tile, sigma * s_in, **extra_args)
denoised_tiles.append(denoised_tile)
denoised_tiles = torch.cat(denoised_tiles, dim=0)
denoised = untile_latent(denoised_tiles, orig_shape, grid, strides)
else:
denoised = self.model(x, sigma * s_in, **extra_args)
if control_tiles is not None:
positive_control.cond_hint = positive_cond_hint_init
if negative_control is not None:
negative_control.cond_hint = negative_cond_hint_init
if y0_style_pos is not None:
self.extra_args['model_options']['transformer_options']['y0_style_pos'] = y0_style_pos
if y0_style_neg is not None:
self.extra_args['model_options']['transformer_options']['y0_style_neg'] = y0_style_neg
denoised = self.calc_cfg_channelwise(denoised)
return denoised
def update_transformer_options(self,
transformer_options : Optional[dict] = None,
):
self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options)
return
def set_coeff(self,
rk_type : str,
h : Tensor,
c1 : float = 0.0,
c2 : float = 0.5,
c3 : float = 1.0,
step : int = 0,
sigmas : Optional[Tensor] = None,
sigma_down : Optional[Tensor] = None,
) -> None:
self.rk_type = rk_type
self.IMPLICIT = rk_type in get_implicit_sampler_name_list(nameOnly=True)
self.EXPONENTIAL = RK_Method_Beta.is_exponential(rk_type)
sigma = sigmas[step]
sigma_next = sigmas[step+1]
h_prev = []
a, b, u, v, ci, multistep_stages, hybrid_stages, FSAL = get_rk_methods_beta(rk_type,
h,
c1,
c2,
c3,
h_prev,
step,
sigmas,
sigma,
sigma_next,
sigma_down,
self.extra_options,
)
self.multistep_stages = multistep_stages
self.hybrid_stages = hybrid_stages
self.A = torch.tensor(a, dtype=h.dtype, device=h.device)
self.B = torch.tensor(b, dtype=h.dtype, device=h.device)
self.C = torch.tensor(ci, dtype=h.dtype, device=h.device)
self.U = torch.tensor(u, dtype=h.dtype, device=h.device) if u is not None else None
self.V = torch.tensor(v, dtype=h.dtype, device=h.device) if v is not None else None
self.rows = self.A.shape[0]
self.cols = self.A.shape[1]
self.row_offset = 1 if not self.IMPLICIT and self.A[0].sum() == 0 else 0
if self.IMPLICIT and self.reorder_tableau_indices[0] != -1:
self.reorder_tableau(self.reorder_tableau_indices)
def reorder_tableau(self, indices:list[int]) -> None:
#if indices[0]:
self.A = self.A [indices]
self.B[0] = self.B[0][indices]
self.C = self.C [indices]
self.C = torch.cat((self.C, self.C[-1:]))
return
def update_substep(self,
x_0 : Tensor,
x_ : Tensor,
eps_ : Tensor,
eps_prev_ : Tensor,
row : int,
row_offset : int,
h_new : Tensor,
h_new_orig : Tensor,
lying_eps_row_factor : float = 1.0,
) -> Tensor:
if row < self.rows - row_offset and self.multistep_stages == 0:
row_tmp_offset = row + row_offset
else:
row_tmp_offset = row + 1
zr_base = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_)
if self.SYNC_SUBSTEP_MEAN_CW and lying_eps_row_factor != 1.0:
zr_orig = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_)
x_orig_row = x_0 + h_new * zr_orig
#eps_row = eps_ [row].clone()
#eps_prev_row = eps_prev_[row].clone()
eps_ [row] *= lying_eps_row_factor
eps_prev_[row] *= lying_eps_row_factor
zr = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_)
x_[row_tmp_offset] = x_0 + h_new * zr
if self.SYNC_SUBSTEP_MEAN_CW and lying_eps_row_factor != 1.0:
x_[row_tmp_offset] = x_[row_tmp_offset] - x_[row_tmp_offset].mean(dim=(-2,-1), keepdim=True) + x_orig_row.mean(dim=(-2,-1), keepdim=True)
#eps_ [row] = eps_row
#eps_prev_[row] = eps_prev_row
if (self.SYNC_SUBSTEP_MEAN_CW and h_new != h_new_orig) or self.EO("sync_mean_noise"):
if not self.EO("disable_sync_mean_noise"):
x_row_down = x_0 + h_new_orig * zr
x_[row_tmp_offset] = x_[row_tmp_offset] - x_[row_tmp_offset].mean(dim=(-2,-1), keepdim=True) + x_row_down.mean(dim=(-2,-1), keepdim=True)
return x_
def a_k_einsum(self, row:int, k :Tensor) -> Tensor:
return torch.einsum('i, i... -> ...', self.A[row], k[:self.cols])
def b_k_einsum(self, row:int, k :Tensor) -> Tensor:
return torch.einsum('i, i... -> ...', self.B[row], k[:self.cols])
def u_k_einsum(self, row:int, k_prev:Tensor) -> Tensor:
return torch.einsum('i, i... -> ...', self.U[row], k_prev[:self.cols]) if (self.U is not None and k_prev is not None) else 0
def v_k_einsum(self, row:int, k_prev:Tensor) -> Tensor:
return torch.einsum('i, i... -> ...', self.V[row], k_prev[:self.cols]) if (self.V is not None and k_prev is not None) else 0
def zum(self, row:int, k:Tensor, k_prev:Tensor=None,) -> Tensor:
if row < self.rows:
return self.a_k_einsum(row, k) + self.u_k_einsum(row, k_prev)
else:
row = row - self.rows
return self.b_k_einsum(row, k) + self.v_k_einsum(row, k_prev)
def zum_tableau(self, k:Tensor, k_prev:Tensor=None,) -> Tensor:
a_k_sum = torch.einsum('ij, j... -> i...', self.A, k[:self.cols])
u_k_sum = torch.einsum('ij, j... -> i...', self.U, k_prev[:self.cols]) if (self.U is not None and k_prev is not None) else 0
return a_k_sum + u_k_sum
def init_cfg_channelwise(self, x:Tensor, cfg_cw:float=1.0, **extra_args) -> Dict[str, Any]:
self.uncond = [torch.full_like(x, 0.0)]
self.cfg_cw = cfg_cw
if cfg_cw != 1.0:
def post_cfg_function(args):
self.uncond[0] = args["uncond_denoised"]
return args["denoised"]
model_options = extra_args.get("model_options", {}).copy()
extra_args["model_options"] = comfy.model_patcher.set_model_options_post_cfg_function(model_options, post_cfg_function, disable_cfg1_optimization=True)
return extra_args
def calc_cfg_channelwise(self, denoised:Tensor) -> Tensor:
if self.cfg_cw != 1.0:
avg = 0
for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])):
avg += torch.norm(denoised[b][c] - self.uncond[0][b][c])
avg /= denoised.shape[1]
for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])):
ratio = torch.nan_to_num(torch.norm(denoised[b][c] - self.uncond[0][b][c]) / avg, 0)
denoised_new = self.uncond[0] + ratio * self.cfg_cw * (denoised - self.uncond[0])
return denoised_new
else:
return denoised
@staticmethod
def calculate_res_2m_step(
x_0 : Tensor,
denoised_ : Tensor,
sigma_down : Tensor,
sigmas : Tensor,
step : int,
) -> Tuple[Tensor, Tensor]:
if denoised_[2].sum() == 0:
return None, None
sigma = sigmas[step]
sigma_prev = sigmas[step-1]
h_prev = -torch.log(sigma/sigma_prev)
h = -torch.log(sigma_down/sigma)
c1 = 0
c2 = (-h_prev / h).item()
ci = [c1,c2]
φ = Phi(h, ci, analytic_solution=True)
b2 = φ(2)/c2
b1 = φ(1) - b2
eps_2 = denoised_[1] - x_0
eps_1 = denoised_[0] - x_0
h_a_k_sum = h * (b1 * eps_1 + b2 * eps_2)
x = torch.exp(-h) * x_0 + h_a_k_sum
denoised = x_0 + (sigma / (sigma - sigma_down)) * h_a_k_sum
return x, denoised
@staticmethod
def calculate_res_3m_step(
x_0 : Tensor,
denoised_ : Tensor,
sigma_down : Tensor,
sigmas : Tensor,
step : int,
) -> Tuple[Tensor, Tensor]:
if denoised_[3].sum() == 0:
return None, None
sigma = sigmas[step]
sigma_prev = sigmas[step-1]
sigma_prev2 = sigmas[step-2]
h = -torch.log(sigma_down/sigma)
h_prev = -torch.log(sigma/sigma_prev)
h_prev2 = -torch.log(sigma/sigma_prev2)
c1 = 0
c2 = (-h_prev / h).item()
c3 = (-h_prev2 / h).item()
ci = [c1,c2,c3]
φ = Phi(h, ci, analytic_solution=True)
gamma = (3*(c3**3) - 2*c3) / (c2*(2 - 3*c2))
b3 = (1 / (gamma * c2 + c3)) * φ(2, -h)
b2 = gamma * b3
b1 = φ(1, -h) - b2 - b3
eps_3 = denoised_[2] - x_0
eps_2 = denoised_[1] - x_0
eps_1 = denoised_[0] - x_0
h_a_k_sum = h * (b1 * eps_1 + b2 * eps_2 + b3 * eps_3)
x = torch.exp(-h) * x_0 + h_a_k_sum
denoised = x_0 + (sigma / (sigma - sigma_down)) * h_a_k_sum
return x, denoised
def swap_rk_type_at_step_or_threshold(self,
x_0 : Tensor,
data_prev_ : Tensor,
NS,
sigmas : Tensor,
step : Tensor,
rk_swap_step : int,
rk_swap_threshold : float,
rk_swap_type : str,
rk_swap_print : bool,
) -> str:
if rk_swap_type == "":
if self.EXPONENTIAL:
rk_swap_type = "res_3m"
else:
rk_swap_type = "deis_3m"
if step > rk_swap_step and self.rk_type != rk_swap_type:
RESplain("Switching rk_type to:", rk_swap_type)
self.rk_type = rk_swap_type
if RK_Method_Beta.is_exponential(rk_swap_type):
self.__class__ = RK_Method_Exponential
else:
self.__class__ = RK_Method_Linear
if rk_swap_type in get_implicit_sampler_name_list(nameOnly=True):
self.IMPLICIT = True
self.row_offset = 0
NS.row_offset = 0
else:
self.IMPLICIT = False
self.row_offset = 1
NS.row_offset = 1
NS.h_fn = self.h_fn
NS.t_fn = self.t_fn
NS.sigma_fn = self.sigma_fn
if step > 2 and sigmas[step+1] > 0 and self.rk_type != rk_swap_type and rk_swap_threshold > 0:
x_res_2m, denoised_res_2m = self.calculate_res_2m_step(x_0, data_prev_, NS.sigma_down, sigmas, step)
x_res_3m, denoised_res_3m = self.calculate_res_3m_step(x_0, data_prev_, NS.sigma_down, sigmas, step)
if denoised_res_2m is not None:
if rk_swap_print:
RESplain("res_3m - res_2m:", torch.norm(denoised_res_3m - denoised_res_2m).item())
if rk_swap_threshold > torch.norm(denoised_res_2m - denoised_res_3m):
RESplain("Switching rk_type to:", rk_swap_type, "at step:", step)
self.rk_type = rk_swap_type
if RK_Method_Beta.is_exponential(rk_swap_type):
self.__class__ = RK_Method_Exponential
else:
self.__class__ = RK_Method_Linear
if rk_swap_type in get_implicit_sampler_name_list(nameOnly=True):
self.IMPLICIT = True
self.row_offset = 0
NS.row_offset = 0
else:
self.IMPLICIT = False
self.row_offset = 1
NS.row_offset = 1
NS.h_fn = self.h_fn
NS.t_fn = self.t_fn
NS.sigma_fn = self.sigma_fn
return self.rk_type
def bong_iter(self,
x_0 : Tensor,
x_ : Tensor,
eps_ : Tensor,
eps_prev_ : Tensor,
data_ : Tensor,
sigma : Tensor,
s_ : Tensor,
row : int,
row_offset: int,
h : Tensor,
step : int,
) -> Tuple[Tensor, Tensor, Tensor]:
if x_0.ndim == 4:
norm_dim = (-2,-1)
elif x_0.ndim == 5:
norm_dim = (-4,-2,-1)
if self.EO("bong_start_step", 0) > step or step > self.EO("bong_stop_step", 10000):
return x_0, x_, eps_
bong_iter_max_row = self.rows - row_offset
if self.EO("bong_iter_max_row_full"):
bong_iter_max_row = self.rows
if self.EO("bong_iter_lock_x_0_ch_means"):
x_0_ch_means = x_0.mean(dim=norm_dim, keepdim=True)
if self.EO("bong_iter_lock_x_row_ch_means"):
x_row_means = []
for rr in range(row+row_offset):
x_row_mean = x_[rr].mean(dim=norm_dim, keepdim=True)
x_row_means.append(x_row_mean)
if row < bong_iter_max_row and self.multistep_stages == 0:
bong_strength = self.EO("bong_strength", 1.0)
if bong_strength != 1.0:
x_0_tmp = x_0.clone()
x_tmp_ = x_.clone()
eps_tmp_ = eps_.clone()
for i in range(100):
x_0 = x_[row+row_offset] - h * self.zum(row+row_offset, eps_, eps_prev_)
if self.EO("bong_iter_lock_x_0_ch_means"):
x_0 = x_0 - x_0.mean(dim=norm_dim, keepdim=True) + x_0_ch_means
for rr in range(row+row_offset):
x_[rr] = x_0 + h * self.zum(rr, eps_, eps_prev_)
if self.EO("bong_iter_lock_x_row_ch_means"):
for rr in range(row+row_offset):
x_[rr] = x_[rr] - x_[rr].mean(dim=norm_dim, keepdim=True) + x_row_means[rr]
for rr in range(row+row_offset):
if self.EO("zonkytar"):
#eps_[rr] = self.get_unsample_epsilon(x_[rr], x_0, data_[rr], sigma, s_[rr])
eps_[rr] = self.get_epsilon(x_[rr], x_0, data_[rr], sigma, s_[rr])
else:
eps_[rr] = self.get_epsilon(x_0, x_[rr], data_[rr], sigma, s_[rr])
if bong_strength != 1.0:
x_0 = x_0_tmp + bong_strength * (x_0 - x_0_tmp)
x_ = x_tmp_ + bong_strength * (x_ - x_tmp_)
eps_ = eps_tmp_ + bong_strength * (eps_ - eps_tmp_)
return x_0, x_, eps_
def newton_iter(self,
x_0 : Tensor,
x_ : Tensor,
eps_ : Tensor,
eps_prev_ : Tensor,
data_ : Tensor,
s_ : Tensor,
row : int,
h : Tensor,
sigmas : Tensor,
step : int,
newton_name: str,
) -> Tuple[Tensor, Tensor]:
newton_iter_name = "newton_iter_" + newton_name
default_anchor_x_all = False
if newton_name == "lying":
default_anchor_x_all = True
newton_iter = self.EO(newton_iter_name, 100)
newton_iter_skip_last_steps = self.EO(newton_iter_name + "_skip_last_steps", 0)
newton_iter_mixing_rate = self.EO(newton_iter_name + "_mixing_rate", 1.0)
newton_iter_anchor = self.EO(newton_iter_name + "_anchor", 0)
newton_iter_anchor_x_all = self.EO(newton_iter_name + "_anchor_x_all", default_anchor_x_all)
newton_iter_type = self.EO(newton_iter_name + "_type", "from_epsilon")
newton_iter_sequence = self.EO(newton_iter_name + "_sequence", "double")
row_b_offset = 0
if self.EO(newton_iter_name + "_include_row_b"):
row_b_offset = 1
if step >= len(sigmas)-1-newton_iter_skip_last_steps or sigmas[step+1] == 0 or not self.IMPLICIT:
return x_, eps_
sigma = sigmas[step]
start, stop = 0, self.rows+row_b_offset
if newton_name == "pre":
start = row
elif newton_name == "post":
start = row + 1
if newton_iter_anchor >= 0:
eps_anchor = eps_[newton_iter_anchor].clone()
if newton_iter_anchor_x_all:
x_orig_ = x_.clone()
for n_iter in range(newton_iter):
for r in range(start, stop):
if newton_iter_anchor >= 0:
eps_[newton_iter_anchor] = eps_anchor.clone()
if newton_iter_anchor_x_all:
x_ = x_orig_.clone()
x_tmp, eps_tmp = x_[r].clone(), eps_[r].clone()
seq_start, seq_stop = r, r+1
if newton_iter_sequence == "double":
seq_start, seq_stop = start, stop
for r_ in range(seq_start, seq_stop):
x_[r_] = x_0 + h * self.zum(r_, eps_, eps_prev_)
for r_ in range(seq_start, seq_stop):
if newton_iter_type == "from_data":
data_[r_] = get_data_from_step(x_0, x_[r_], sigma, s_[r_])
eps_ [r_] = self.get_epsilon(x_0, x_[r_], data_[r_], sigma, s_[r_])
elif newton_iter_type == "from_step":
eps_ [r_] = get_epsilon_from_step(x_0, x_[r_], sigma, s_[r_])
elif newton_iter_type == "from_alt":
eps_ [r_] = x_0/sigma - x_[r_]/s_[r_]
elif newton_iter_type == "from_epsilon":
eps_ [r_] = self.get_epsilon(x_0, x_[r_], data_[r_], sigma, s_[r_])
if self.EO(newton_iter_name + "_opt"):
opt_timing, opt_type, opt_subtype = self.EO(newton_iter_name+"_opt", [str])
opt_start, opt_stop = 0, self.rows+row_b_offset
if opt_timing == "early":
opt_stop = row + 1
elif opt_timing == "late":
opt_start = row + 1
for r2 in range(opt_start, opt_stop):
if r_ != r2:
if opt_subtype == "a":
eps_a = eps_[r2]
eps_b = eps_[r_]
elif opt_subtype == "b":
eps_a = eps_[r_]
eps_b = eps_[r2]
if opt_type == "ortho":
eps_ [r_] = get_orthogonal(eps_a, eps_b)
elif opt_type == "collin":
eps_ [r_] = get_collinear (eps_a, eps_b)
elif opt_type == "proj":
eps_ [r_] = get_collinear (eps_a, eps_b) + get_orthogonal(eps_b, eps_a)
x_ [r_] = x_tmp + newton_iter_mixing_rate * (x_ [r_] - x_tmp)
eps_[r_] = eps_tmp + newton_iter_mixing_rate * (eps_[r_] - eps_tmp)
if newton_iter_sequence == "double":
break
return x_, eps_
class RK_Method_Exponential(RK_Method_Beta):
def __init__(self,
model,
rk_type : str,
noise_anchor : float,
noise_boost_normalize : bool,
model_device : str = 'cuda',
work_device : str = 'cpu',
dtype : torch.dtype = torch.float64,
extra_options : str = "",
):
super().__init__(model,
rk_type,
noise_anchor,
noise_boost_normalize,
model_device = model_device,
work_device = work_device,
dtype = dtype,
extra_options = extra_options,
)
@staticmethod
def alpha_fn(neg_h:Tensor) -> Tensor:
return torch.exp(neg_h)
@staticmethod
def sigma_fn(t:Tensor) -> Tensor:
return t.neg().exp()
@staticmethod
def t_fn(sigma:Tensor) -> Tensor:
return sigma.log().neg()
@staticmethod
def h_fn(sigma_down:Tensor, sigma:Tensor) -> Tensor:
return -torch.log(sigma_down/sigma)
def __call__(self,
x : Tensor,
sub_sigma : Tensor,
x_0 : Optional[Tensor] = None,
sigma : Optional[Tensor] = None,
transformer_options : Optional[dict] = None,
) -> Tuple[Tensor, Tensor]:
x_0 = x if x_0 is None else x_0
sigma = sub_sigma if sigma is None else sigma
if transformer_options is not None:
self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options)
denoised = self.model_denoised(x.to(self.model_device), sub_sigma.to(self.model_device), **self.extra_args).to(sigma.device)
eps_anchored = (x_0 - denoised) / sigma
eps_unmoored = (x - denoised) / sub_sigma
eps = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored)
denoised = x_0 - sigma * eps
epsilon = denoised - x_0
return epsilon, denoised
def get_epsilon(self,
x_0 : Tensor,
x : Tensor,
denoised : Tensor,
sigma : Tensor,
sub_sigma : Tensor,
) -> Tensor:
eps_anchored = (x_0 - denoised) / sigma
eps_unmoored = (x - denoised) / sub_sigma
eps = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored)
denoised = x_0 - sigma * eps
return denoised - x_0
def get_epsilon_anchored(self, x_0:Tensor, denoised:Tensor, sigma:Tensor) -> Tensor:
return denoised - x_0
def get_guide_epsilon(self,
x_0 : Tensor,
x : Tensor,
y : Tensor,
sigma : Tensor,
sigma_cur : Tensor,
sigma_down : Optional[Tensor] = None,
epsilon_scale : Optional[Tensor] = None,
) -> Tensor:
sigma_cur = epsilon_scale if epsilon_scale is not None else sigma_cur
if sigma_down > sigma:
eps_unmoored = (sigma_cur/(self.sigma_max - sigma_cur)) * (x - y)
else:
eps_unmoored = y - x
if self.EO("manually_anchor_unsampler"):
if sigma_down > sigma:
eps_anchored = (sigma /(self.sigma_max - sigma)) * (x_0 - y)
else:
eps_anchored = y - x_0
eps_guide = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored)
else:
eps_guide = eps_unmoored
return eps_guide
class RK_Method_Linear(RK_Method_Beta):
def __init__(self,
model,
rk_type : str,
noise_anchor : float,
noise_boost_normalize : bool,
model_device : str = 'cuda',
work_device : str = 'cpu',
dtype : torch.dtype = torch.float64,
extra_options : str = "",
):
super().__init__(model,
rk_type,
noise_anchor,
noise_boost_normalize,
model_device = model_device,
work_device = work_device,
dtype = dtype,
extra_options = extra_options,
)
@staticmethod
def alpha_fn(neg_h:Tensor) -> Tensor:
return torch.ones_like(neg_h)
@staticmethod
def sigma_fn(t:Tensor) -> Tensor:
return t
@staticmethod
def t_fn(sigma:Tensor) -> Tensor:
return sigma
@staticmethod
def h_fn(sigma_down:Tensor, sigma:Tensor) -> Tensor:
return sigma_down - sigma
def __call__(self,
x : Tensor,
sub_sigma : Tensor,
x_0 : Optional[Tensor] = None,
sigma : Optional[Tensor] = None,
transformer_options : Optional[dict] = None,
) -> Tuple[Tensor, Tensor]:
x_0 = x if x_0 is None else x_0
sigma = sub_sigma if sigma is None else sigma
if transformer_options is not None:
self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options)
denoised = self.model_denoised(x.to(self.model_device), sub_sigma.to(self.model_device), **self.extra_args).to(sigma.device)
epsilon_anchor = (x_0 - denoised) / sigma
epsilon_unmoored = (x - denoised) / sub_sigma
epsilon = epsilon_unmoored + self.LINEAR_ANCHOR_X_0 * (epsilon_anchor - epsilon_unmoored)
return epsilon, denoised
def get_epsilon(self,
x_0 : Tensor,
x : Tensor,
denoised : Tensor,
sigma : Tensor,
sub_sigma : Tensor,
) -> Tensor:
eps_anchor = (x_0 - denoised) / sigma
eps_unmoored = (x - denoised) / sub_sigma
return eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchor - eps_unmoored)
def get_epsilon_anchored(self, x_0:Tensor, denoised:Tensor, sigma:Tensor) -> Tensor:
return (x_0 - denoised) / sigma
def get_guide_epsilon(self,
x_0 : Tensor,
x : Tensor,
y : Tensor,
sigma : Tensor,
sigma_cur : Tensor,
sigma_down : Optional[Tensor] = None,
epsilon_scale : Optional[Tensor] = None,
) -> Tensor:
if sigma_down > sigma:
sigma_ratio = self.sigma_max - sigma_cur.clone()
else:
sigma_ratio = sigma_cur.clone()
sigma_ratio = epsilon_scale if epsilon_scale is not None else sigma_ratio
if sigma_down is None:
return (x - y) / sigma_ratio
else:
if sigma_down > sigma:
return (y - x) / sigma_ratio
else:
return (x - y) / sigma_ratio
================================================
FILE: samplers_extensions.py
================================================
import torch
from torch import Tensor
import torch.nn.functional as F
from dataclasses import dataclass, asdict
from typing import Optional, Callable, Tuple, Dict, Any, Union
import copy
from nodes import MAX_RESOLUTION
from ..helper import OptionsManager, FrameWeightsManager, initialize_or_scale, get_res4lyf_scheduler_list, parse_range_string, parse_tile_sizes
from .rk_coefficients_beta import RK_SAMPLER_NAMES_BETA_FOLDERS, get_default_sampler_name, get_sampler_name_list, process_sampler_name
from .noise_classes import NOISE_GENERATOR_NAMES_SIMPLE
from .rk_noise_sampler_beta import NOISE_MODE_NAMES
from .constants import IMPLICIT_TYPE_NAMES, GUIDE_MODE_NAMES_BETA_SIMPLE, MAX_STEPS, FRAME_WEIGHTS_CONFIG_NAMES, FRAME_WEIGHTS_DYNAMICS_NAMES, FRAME_WEIGHTS_SCHEDULE_NAMES
class ClownSamplerSelector_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}),
},
"optional":
{
}
}
RETURN_TYPES = (RK_SAMPLER_NAMES_BETA_FOLDERS,)
RETURN_NAMES = ("sampler_name",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
sampler_name = "res_2m",
):
sampler_name, implicit_sampler_name = process_sampler_name(sampler_name)
sampler_name = sampler_name if implicit_sampler_name == "use_explicit" else implicit_sampler_name
return (sampler_name,)
class ClownOptions_SDE_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_type_sde_substep": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"noise_mode_sde_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"eta_substep": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}),
"seed": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}),
},
"optional":
{
"etas": ("SIGMAS", ),
"etas_substep": ("SIGMAS", ),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
noise_type_sde = "gaussian",
noise_type_sde_substep = "gaussian",
noise_mode_sde = "hard",
noise_mode_sde_substep = "hard",
eta = 0.5,
eta_substep = 0.5,
seed : int = -1,
etas : Optional[Tensor] = None,
etas_substep : Optional[Tensor] = None,
options = None,
):
options = options if options is not None else {}
if noise_mode_sde == "none":
noise_mode_sde = "hard"
eta = 0.0
if noise_mode_sde_substep == "none":
noise_mode_sde_substep = "hard"
eta_substep = 0.0
if noise_type_sde == "none":
noise_type_sde = "gaussian"
eta = 0.0
if noise_type_sde_substep == "none":
noise_type_sde_substep = "gaussian"
eta_substep = 0.0
options['noise_type_sde'] = noise_type_sde
options['noise_type_sde_substep'] = noise_type_sde_substep
options['noise_mode_sde'] = noise_mode_sde
options['noise_mode_sde_substep'] = noise_mode_sde_substep
options['eta'] = eta
options['eta_substep'] = eta_substep
options['noise_seed_sde'] = seed
options['etas'] = etas
options['etas_substep'] = etas_substep
return (options,)
class ClownOptions_StepSize_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"overshoot_mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How step size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"overshoot_mode_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How substep size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}),
"overshoot": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising step, then rescale to match the original. Has a softening effect."}),
"overshoot_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising substep, then rescale to match the original. Has a softening effect."}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
overshoot_mode = "hard",
overshoot_mode_substep = "hard",
overshoot = 0.0,
overshoot_substep = 0.0,
options = None,
):
options = options if options is not None else {}
options['overshoot_mode'] = overshoot_mode
options['overshoot_mode_substep'] = overshoot_mode_substep
options['overshoot'] = overshoot
options['overshoot_substep'] = overshoot_substep
return (options,
)
@dataclass
class DetailBoostOptions:
noise_scaling_weight : float = 0.0
noise_boost_step : float = 0.0
noise_boost_substep : float = 0.0
noise_anchor : float = 1.0
s_noise : float = 1.0
s_noise_substep : float = 1.0
d_noise : float = 1.0
DETAIL_BOOST_METHODS = [
'sampler',
'sampler_normal',
'sampler_substep',
'sampler_substep_normal',
'model',
'model_alpha',
]
class ClownOptions_DetailBoost_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}),
"method": (DETAIL_BOOST_METHODS, {"default": "model", "tooltip": "Determines whether the sampler or the model underestimates the noise level."}),
#"noise_scaling_mode": (['linear'] + NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "Changes the steps where the effect is greatest. Most affect early steps, sinusoidal affects middle steps."}),
"mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "Changes the steps where the effect is greatest. Most affect early steps, sinusoidal affects middle steps."}),
"eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "The strength of the effect of the noise_scaling_mode. Linear ignores this parameter."}),
"start_step": ("INT", {"default": 3, "min": 0, "max": MAX_STEPS}),
"end_step": ("INT", {"default": 10, "min": -1, "max": MAX_STEPS}),
#"noise_scaling_cycles": ("INT", {"default": 1, "min": 1, "max": MAX_STEPS}),
#"noise_boost_step": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}),
#"noise_boost_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}),
#"sampler_scaling_normalize":("BOOLEAN", {"default": False, "tooltip": "Limit saturation and luminosity drift."}),
},
"optional":
{
"weights": ("SIGMAS", ),
"etas": ("SIGMAS", ),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
weight : float = 0.0,
method : str = "sampler",
mode : str = "linear",
eta : float = 0.5,
start_step : int = 0,
end_step : int = -1,
noise_scaling_cycles : int = 1,
noise_boost_step : float = 0.0,
noise_boost_substep : float = 0.0,
sampler_scaling_normalize : bool = False,
weights : Optional[Tensor] = None,
etas : Optional[Tensor] = None,
options = None
):
noise_scaling_weight = weight
noise_scaling_type = method
noise_scaling_mode = mode
noise_scaling_eta = eta
noise_scaling_start_step = start_step
noise_scaling_end_step = end_step
noise_scaling_weights = weights
noise_scaling_etas = etas
options = options if options is not None else {}
default_dtype = torch.float64
default_device = torch.device('cuda')
if noise_scaling_type.endswith("_normal"):
sampler_scaling_normalize = True
noise_scaling_type = noise_scaling_type[:-7]
if noise_scaling_end_step == -1:
noise_scaling_end_step = MAX_STEPS
if noise_scaling_weights == None:
noise_scaling_weights = initialize_or_scale(None, noise_scaling_weight, MAX_STEPS).to(default_dtype).to(default_device)
if noise_scaling_etas == None:
noise_scaling_etas = initialize_or_scale(None, noise_scaling_eta, MAX_STEPS).to(default_dtype).to(default_device)
noise_scaling_prepend = torch.zeros((noise_scaling_start_step,), dtype=default_dtype, device=default_device)
noise_scaling_weights = torch.cat((noise_scaling_prepend, noise_scaling_weights), dim=0)
noise_scaling_etas = torch.cat((noise_scaling_prepend, noise_scaling_etas), dim=0)
if noise_scaling_weights.shape[-1] > noise_scaling_end_step:
noise_scaling_weights = noise_scaling_weights[:noise_scaling_end_step]
if noise_scaling_etas.shape[-1] > noise_scaling_end_step:
noise_scaling_etas = noise_scaling_etas[:noise_scaling_end_step]
noise_scaling_weights = F.pad(noise_scaling_weights, (0, MAX_STEPS), value=0.0)
noise_scaling_etas = F.pad(noise_scaling_etas, (0, MAX_STEPS), value=0.0)
options['noise_scaling_weight'] = noise_scaling_weight
options['noise_scaling_type'] = noise_scaling_type
options['noise_scaling_mode'] = noise_scaling_mode
options['noise_scaling_eta'] = noise_scaling_eta
options['noise_scaling_cycles'] = noise_scaling_cycles
options['noise_scaling_weights'] = noise_scaling_weights
options['noise_scaling_etas'] = noise_scaling_etas
options['noise_boost_step'] = noise_boost_step
options['noise_boost_substep'] = noise_boost_substep
options['noise_boost_normalize'] = sampler_scaling_normalize
"""options['DetailBoostOptions'] = DetailBoostOptions(
noise_scaling_weight = noise_scaling_weight,
noise_scaling_type = noise_scaling_type,
noise_scaling_mode = noise_scaling_mode,
noise_scaling_eta = noise_scaling_eta,
noise_boost_step = noise_boost_step,
noise_boost_substep = noise_boost_substep,
noise_boost_normalize = noise_boost_normalize,
noise_anchor = noise_anchor,
s_noise = s_noise,
s_noise_substep = s_noise_substep,
d_noise = d_noise
d_noise_start_step = d_noise_start_step
)"""
return (options,)
class ClownOptions_SigmaScaling_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}),
"s_noise_substep": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}),
"noise_anchor_sde": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Typically set to between 1.0 and 0.0. Lower values cerate a grittier, more detailed image."}),
"lying": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Downscales the sigma schedule. Values around 0.98-0.95 can lead to a large boost in detail and paint textures."}),
"lying_inv": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Upscales the sigma schedule. Will soften the image and deepen colors. Use after d_noise to counteract desaturation."}),
"lying_start_step": ("INT", {"default": 0, "min": 0, "max": MAX_STEPS}),
"lying_inv_start_step": ("INT", {"default": 1, "min": 0, "max": MAX_STEPS}),
},
"optional":
{
"s_noises": ("SIGMAS", ),
"s_noises_substep": ("SIGMAS", ),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
noise_anchor_sde : float = 1.0,
s_noise : float = 1.0,
s_noise_substep : float = 1.0,
lying : float = 1.0,
lying_start_step : int = 0,
lying_inv : float = 1.0,
lying_inv_start_step : int = 1,
s_noises : Optional[Tensor] = None,
s_noises_substep : Optional[Tensor] = None,
options = None
):
options = options if options is not None else {}
default_dtype = torch.float64
default_device = torch.device('cuda')
options['noise_anchor'] = noise_anchor_sde
options['s_noise'] = s_noise
options['s_noise_substep'] = s_noise_substep
options['d_noise'] = lying
options['d_noise_start_step'] = lying_start_step
options['d_noise_inv'] = lying_inv
options['d_noise_inv_start_step'] = lying_inv_start_step
options['s_noises'] = s_noises
options['s_noises_substep'] = s_noises_substep
return (options,)
class ClownOptions_Momentum_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"momentum": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Accelerate convergence with positive values when sampling, negative values when unsampling."}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
momentum = 0.0,
options = None
):
options = options if options is not None else {}
options['momentum'] = momentum
return (options,)
class ClownOptions_ImplicitSteps_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"implicit_type": (IMPLICIT_TYPE_NAMES, {"default": "bongmath"}),
"implicit_type_substeps": (IMPLICIT_TYPE_NAMES, {"default": "bongmath"}),
"implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}),
"implicit_substeps": ("INT", {"default": 0, "min": 0, "max": 10000}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
implicit_type = "bongmath",
implicit_type_substeps = "bongmath",
implicit_steps = 0,
implicit_substeps = 0,
options = None
):
options = options if options is not None else {}
options['implicit_type'] = implicit_type
options['implicit_type_substeps'] = implicit_type_substeps
options['implicit_steps'] = implicit_steps
options['implicit_substeps'] = implicit_substeps
return (options,)
class ClownOptions_Cycles_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"cycles" : ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10000, "step":0.5, "round": 0.5}),
"eta_decay_scale" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Multiplies etas by this number after every cycle. May help drive convergence." }),
"unsample_eta" : ("FLOAT", {"default": 0.5, "min": -10000, "max": 10000, "step":0.01}),
"unsampler_override" : (get_sampler_name_list(), {"default": "none"}),
"unsample_cfg" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
cycles = 0,
unsample_eta = 0.5,
eta_decay_scale = 1.0,
unsample_cfg = 1.0,
unsampler_override = "none",
options = None
):
options = options if options is not None else {}
options['rebounds'] = int(cycles * 2)
options['unsample_eta'] = unsample_eta
options['unsampler_name'] = unsampler_override
options['eta_decay_scale'] = eta_decay_scale
options['unsample_cfg'] = unsample_cfg
return (options,)
class SharkOptions_StartStep_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"start_at_step": ("INT", {"default": 0, "min": -1, "max": 10000, "step":1,}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
start_at_step = 0,
options = None
):
options = options if options is not None else {}
options['start_at_step'] = start_at_step
return (options,)
class ClownOptions_Tile_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"tile_width" : ("INT", {"default": 1024, "min": -1, "max": 10000, "step":1,}),
"tile_height": ("INT", {"default": 1024, "min": -1, "max": 10000, "step":1,}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
tile_height = 1024,
tile_width = 1024,
options = None
):
options = options if options is not None else {}
tile_sizes = options.get('tile_sizes', [])
tile_sizes.append((tile_height, tile_width))
options['tile_sizes'] = tile_sizes
return (options,)
class ClownOptions_Tile_Advanced_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"tile_sizes": ("STRING", {"default": "1024,1024", "multiline": True}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
tile_sizes = "1024,1024",
options = None
):
options = options if options is not None else {}
tiles_height_width = parse_tile_sizes(tile_sizes)
options['tile_sizes'] = [(tile[-1], tile[-2]) for tile in ptile] # swap height and width to be consistent... width, height
return (options,)
class ClownOptions_ExtraOptions_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"extra_options": ("STRING", {"default": "", "multiline": True}),
},
"optional":
{
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
extra_options = "",
options = None
):
options = options if options is not None else {}
options['extra_options'] = extra_options
return (options, )
class ClownOptions_Automation_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required": {},
"optional": {
"etas": ("SIGMAS", ),
"etas_substep": ("SIGMAS", ),
"s_noises": ("SIGMAS", ),
"s_noises_substep": ("SIGMAS", ),
"epsilon_scales": ("SIGMAS", ),
"frame_weights": ("SIGMAS", ),
"options": ("OPTIONS",),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
etas = None,
etas_substep = None,
s_noises = None,
s_noises_substep = None,
epsilon_scales = None,
frame_weights = None,
options = None
):
options = options if options is not None else {}
frame_weights_mgr = (frame_weights, frame_weights)
automation = {
"etas" : etas,
"etas_substep" : etas_substep,
"s_noises" : s_noises,
"s_noises_substep" : s_noises_substep,
"epsilon_scales" : epsilon_scales,
"frame_weights_mgr" : frame_weights_mgr,
}
options["automation"] = automation
return (options, )
class SharkOptions_GuideCond_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required": {},
"optional": {
"positive" : ("CONDITIONING", ),
"negative" : ("CONDITIONING", ),
"cfg" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"options" : ("OPTIONS",),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
positive = None,
negative = None,
cfg = 1.0,
options = None,
):
options = options if options is not None else {}
flow_cond = {
"yt_positive" : positive,
"yt_negative" : negative,
"yt_cfg" : cfg,
}
options["flow_cond"] = flow_cond
return (options, )
class SharkOptions_GuideConds_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required": {},
"optional": {
"positive_masked" : ("CONDITIONING", ),
"positive_unmasked" : ("CONDITIONING", ),
"negative_masked" : ("CONDITIONING", ),
"negative_unmasked" : ("CONDITIONING", ),
"cfg_masked" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"cfg_unmasked" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"options" : ("OPTIONS",),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
positive_masked = None,
negative_masked = None,
cfg_masked = 1.0,
positive_unmasked = None,
negative_unmasked = None,
cfg_unmasked = 1.0,
options = None,
):
options = options if options is not None else {}
flow_cond = {
"yt_positive" : positive_masked,
"yt_negative" : negative_masked,
"yt_cfg" : cfg_masked,
"yt_inv_positive" : positive_unmasked,
"yt_inv_negative" : negative_unmasked,
"yt_inv_cfg" : cfg_unmasked,
}
options["flow_cond"] = flow_cond
return (options, )
class SharkOptions_Beta:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}),
"s_noise_init": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }),
"denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}),
"channelwise_cfg": ("BOOLEAN", {"default": False}),
},
"optional": {
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
noise_type_init = "gaussian",
s_noise_init = 1.0,
denoise_alt = 1.0,
channelwise_cfg = False,
options = None
):
options = options if options is not None else {}
options['noise_type_init'] = noise_type_init
options['noise_init_stdev'] = s_noise_init
options['denoise_alt'] = denoise_alt
options['channelwise_cfg'] = channelwise_cfg
return (options,)
class SharkOptions_UltraCascade_Latent_Beta:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"width": ("INT", {"default": 60, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
"height": ("INT", {"default": 36, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
},
"optional": {
"options": ("OPTIONS",),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
width : int = 60,
height : int = 36,
options = None,
):
options = options if options is not None else {}
options['ultracascade_latent_width'] = width
options['ultracascade_latent_height'] = height
return (options,)
class ClownOptions_SwapSampler_Beta:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}),
"swap_below_err": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Swap samplers if the error per step falls below this threshold."}),
"swap_at_step": ("INT", {"default": 30, "min": 1, "max": 10000}),
"log_err_to_console": ("BOOLEAN", {"default": False}),
},
"optional": {
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
sampler_name = "res_3m",
swap_below_err = 0.0,
swap_at_step = 30,
log_err_to_console = False,
options = None,
):
sampler_name, implicit_sampler_name = process_sampler_name(sampler_name)
sampler_name = sampler_name if implicit_sampler_name == "use_explicit" else implicit_sampler_name
options = options if options is not None else {}
options['rk_swap_type'] = sampler_name
options['rk_swap_threshold'] = swap_below_err
options['rk_swap_step'] = swap_at_step
options['rk_swap_print'] = log_err_to_console
return (options,)
class ClownOptions_SDE_Mask_Beta:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"max": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Clamp the max value for the mask."}),
"min": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Clamp the min value for the mask."}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional": {
"mask": ("MASK", ),
"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
max = 1.0,
min = 0.0,
invert_mask = False,
mask = None,
options = None,
):
options = copy.deepcopy(options) if options is not None else {}
if invert_mask:
mask = 1-mask
mask = ((mask - mask.min()) * (max - min)) / (mask.max() - mask.min()) + min
options['sde_mask'] = mask
return (options,)
class ClownGuide_Mean_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"cutoff": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide": ("LATENT", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight_scheduler = "constant",
start_step = 0,
end_step = 30,
cutoff = 1.0,
guide = None,
weight = 0.0,
channelwise_mode = False,
projection_mode = False,
weights = None,
mask = None,
invert_mask = False,
guides = None,
):
default_dtype = torch.float64
mask = 1-mask if mask is not None else None
if end_step == -1:
end_step = MAX_STEPS
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
if weight_scheduler == "constant": # and weights == None:
weights = initialize_or_scale(None, weight, end_step).to(default_dtype)
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
guides = copy.deepcopy(guides) if guides is not None else {}
guides['weight_mean'] = weight
guides['weights_mean'] = weights
guides['guide_mean'] = guide
guides['mask_mean'] = mask
guides['weight_scheduler_mean'] = weight_scheduler
guides['start_step_mean'] = start_step
guides['end_step_mean'] = end_step
guides['cutoff_mean'] = cutoff
return (guides, )
class ClownGuide_Style_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"apply_to": (["positive", "negative"], {"default": "positive", "tooltip": "When using CFG, decides whether to apply the guide to the positive or negative conditioning."}),
"method": (["AdaIN", "WCT"], {"default": "WCT"}),
"weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}),
"synweight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the relative strength of the guide on the opposite conditioning to what was selected: i.e., negative if positive in apply_to. Recommended to avoid CFG burn."}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant", "tooltip": "Selecting any scheduler except constant will cause the strength to gradually decay to zero. Try beta57 vs. linear quadratic."},),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": -1, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide": ("LATENT", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
DESCRIPTION = "Transfer some visual aspects of style from a guide (reference) image. If nothing about style is specified in the prompt, it may just transfer the lighting and color scheme." + \
"If using CFG results in burn, or a very dark/bright image in the preview followed by a bad output, try duplicating and chaining this node, so that the guide may be applied to both positive and negative conditioning." + \
"Currently supported models: SD1.5, SDXL, Stable Cascade, SD3.5, AuraFlow, Flux, HiDream, WAN, and LTXV."
def main(self,
apply_to = "all",
method = "WCT",
weight = 1.0,
synweight = 1.0,
weight_scheduler = "constant",
start_step = 0,
end_step = 15,
invert_mask = False,
guide = None,
mask = None,
weights = None,
guides = None,
):
default_dtype = torch.float64
mask = 1-mask if mask is not None else None
if end_step == -1:
end_step = MAX_STEPS
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
if weight_scheduler == "constant": # and weights == None:
weights = initialize_or_scale(None, weight, end_step).to(default_dtype)
prepend = torch.zeros(start_step).to(weights)
weights = torch.cat([prepend, weights])
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
guides = copy.deepcopy(guides) if guides is not None else {}
guides['style_method'] = method
if apply_to in {"positive", "all"}:
guides['weight_style_pos'] = weight
guides['weights_style_pos'] = weights
guides['synweight_style_pos'] = synweight
guides['guide_style_pos'] = guide
guides['mask_style_pos'] = mask
guides['weight_scheduler_style_pos'] = weight_scheduler
guides['start_step_style_pos'] = start_step
guides['end_step_style_pos'] = end_step
if apply_to in {"negative", "all"}:
guides['weight_style_neg'] = weight
guides['weights_style_neg'] = weights
guides['synweight_style_neg'] = synweight
guides['guide_style_neg'] = guide
guides['mask_style_neg'] = mask
guides['weight_scheduler_style_neg'] = weight_scheduler
guides['start_step_style_neg'] = start_step
guides['end_step_style_neg'] = end_step
return (guides, )
class ClownGuide_AdaIN_MMDiT_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"double_blocks" : ("STRING", {"default": "", "multiline": True}),
"double_weights" : ("STRING", {"default": "", "multiline": True}),
"single_blocks" : ("STRING", {"default": "20", "multiline": True}),
"single_weights" : ("STRING", {"default": "0.5", "multiline": True}),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide": ("LATENT", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight = 1.0,
weight_scheduler = "constant",
double_weights = "0.1",
single_weights = "0.0",
double_blocks = "all",
single_blocks = "all",
start_step = 0,
end_step = 15,
invert_mask = False,
guide = None,
mask = None,
weights = None,
guides = None,
):
default_dtype = torch.float64
mask = 1-mask if mask is not None else None
double_weights = parse_range_string(double_weights)
single_weights = parse_range_string(single_weights)
if len(double_weights) == 0:
double_weights.append(0.0)
if len(single_weights) == 0:
single_weights.append(0.0)
if len(double_weights) == 1:
double_weights = double_weights * 100
if len(single_weights) == 1:
single_weights = single_weights * 100
if type(double_weights[0]) == int:
double_weights = [float(val) for val in double_weights]
if type(single_weights[0]) == int:
single_weights = [float(val) for val in single_weights]
if double_blocks == "all":
double_blocks = [val for val in range(100)]
if len(double_weights) == 1:
double_weights = [double_weights[0]] * 100
else:
double_blocks = parse_range_string(double_blocks)
weights_expanded = [0.0] * 100
for b, w in zip(double_blocks, double_weights):
weights_expanded[b] = w
double_weights = weights_expanded
if single_blocks == "all":
single_blocks = [val for val in range(100)]
if len(single_weights) == 1:
single_weights = [single_weights[0]] * 100
else:
single_blocks = parse_range_string(single_blocks)
weights_expanded = [0.0] * 100
for b, w in zip(single_blocks, single_weights):
weights_expanded[b] = w
single_weights = weights_expanded
if end_step == -1:
end_step = MAX_STEPS
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
if weight_scheduler == "constant": # and weights == None:
weights = initialize_or_scale(None, weight, end_step).to(default_dtype)
prepend = torch.zeros(start_step).to(weights)
weights = torch.cat([prepend, weights])
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
guides = copy.deepcopy(guides) if guides is not None else {}
guides['weight_adain'] = weight
guides['weights_adain'] = weights
guides['blocks_adain_mmdit'] = {
"double_weights": double_weights,
"single_weights": single_weights,
"double_blocks" : double_blocks,
"single_blocks" : single_blocks,
}
guides['guide_adain'] = guide
guides['mask_adain'] = mask
guides['weight_scheduler_adain'] = weight_scheduler
guides['start_step_adain'] = start_step
guides['end_step_adain'] = end_step
return (guides, )
class ClownGuide_AttnInj_MMDiT_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"double_blocks" : ("STRING", {"default": "0,1,3", "multiline": True}),
"double_weights" : ("STRING", {"default": "1.0", "multiline": True}),
"single_blocks" : ("STRING", {"default": "20", "multiline": True}),
"single_weights" : ("STRING", {"default": "0.5", "multiline": True}),
"img_q": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"img_k": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"img_v": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_q": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_k": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_v": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"img_q_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"img_k_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"img_v_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_q_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_k_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"txt_v_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide": ("LATENT", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
"guides": ("GUIDES", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight = 1.0,
weight_scheduler = "constant",
double_weights = "0.1",
single_weights = "0.0",
double_blocks = "all",
single_blocks = "all",
img_q = 0.0,
img_k = 0.0,
img_v = 0.0,
txt_q = 0.0,
txt_k = 0.0,
txt_v = 0.0,
img_q_norm = 0.0,
img_k_norm = 0.0,
img_v_norm = 0.0,
txt_q_norm = 0.0,
txt_k_norm = 0.0,
txt_v_norm = 0.0,
start_step = 0,
end_step = 15,
invert_mask = False,
guide = None,
mask = None,
weights = None,
guides = None,
):
default_dtype = torch.float64
mask = 1-mask if mask is not None else None
double_weights = parse_range_string(double_weights)
single_weights = parse_range_string(single_weights)
if len(double_weights) == 0:
double_weights.append(0.0)
if len(single_weights) == 0:
single_weights.append(0.0)
if len(double_weights) == 1:
double_weights = double_weights * 100
if len(single_weights) == 1:
single_weights = single_weights * 100
if type(double_weights[0]) == int:
double_weights = [float(val) for val in double_weights]
if type(single_weights[0]) == int:
single_weights = [float(val) for val in single_weights]
if double_blocks == "all":
double_blocks = [val for val in range(100)]
if len(double_weights) == 1:
double_weights = [double_weights[0]] * 100
else:
double_blocks = parse_range_string(double_blocks)
weights_expanded = [0.0] * 100
for b, w in zip(double_blocks, double_weights):
weights_expanded[b] = w
double_weights = weights_expanded
if single_blocks == "all":
single_blocks = [val for val in range(100)]
if len(single_weights) == 1:
single_weights = [single_weights[0]] * 100
else:
single_blocks = parse_range_string(single_blocks)
weights_expanded = [0.0] * 100
for b, w in zip(single_blocks, single_weights):
weights_expanded[b] = w
single_weights = weights_expanded
if end_step == -1:
end_step = MAX_STEPS
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
if weight_scheduler == "constant": # and weights == None:
weights = initialize_or_scale(None, weight, end_step).to(default_dtype)
prepend = torch.zeros(start_step).to(weights)
weights = torch.cat([prepend, weights])
weights = F.pad(weights, (0, MAX_STEPS), value=0.0)
guides = copy.deepcopy(guides) if guides is not None else {}
guides['weight_attninj'] = weight
guides['weights_attninj'] = weights
guides['blocks_attninj_mmdit'] = {
"double_weights": double_weights,
"single_weights": single_weights,
"double_blocks" : double_blocks,
"single_blocks" : single_blocks,
}
guides['blocks_attninj_qkv'] = {
"img_q": img_q,
"img_k": img_k,
"img_v": img_v,
"txt_q": txt_q,
"txt_k": txt_k,
"txt_v": txt_v,
"img_q_norm": img_q_norm,
"img_k_norm": img_k_norm,
"img_v_norm": img_v_norm,
"txt_q_norm": txt_q_norm,
"txt_k_norm": txt_k_norm,
"txt_v_norm": txt_v_norm,
}
guides['guide_attninj'] = guide
guides['mask_attninj'] = mask
guides['weight_scheduler_attninj'] = weight_scheduler
guides['start_step_attninj'] = start_step
guides['end_step_attninj'] = end_step
return (guides, )
class ClownGuide_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}),
"channelwise_mode": ("BOOLEAN", {"default": True}),
"projection_mode": ("BOOLEAN", {"default": True}),
"weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"cutoff": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide": ("LATENT", ),
"mask": ("MASK", ),
"weights": ("SIGMAS", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight_scheduler = "constant",
weight_scheduler_unmasked = "constant",
start_step = 0,
start_step_unmasked = 0,
end_step = 30,
end_step_unmasked = 30,
cutoff = 1.0,
cutoff_unmasked = 1.0,
guide = None,
guide_unmasked = None,
weight = 0.0,
weight_unmasked = 0.0,
guide_mode = "epsilon",
channelwise_mode = False,
projection_mode = False,
weights = None,
weights_unmasked = None,
mask = None,
unmask = None,
invert_mask = False,
):
CG = ClownGuides_Beta()
mask = 1-mask if mask is not None else None
if end_step == -1:
end_step = MAX_STEPS
if guide is not None:
raw_x = guide.get('state_info', {}).get('raw_x', None)
if False: # raw_x is not None:
guide = {'samples': guide['state_info']['raw_x'].clone()}
else:
guide = {'samples': guide['samples'].clone()}
if guide_unmasked is not None:
raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()}
else:
guide_unmasked = {'samples': guide_unmasked['samples'].clone()}
guides, = CG.main(
weight_scheduler_masked = weight_scheduler,
weight_scheduler_unmasked = weight_scheduler_unmasked,
start_step_masked = start_step,
start_step_unmasked = start_step_unmasked,
end_step_masked = end_step,
end_step_unmasked = end_step_unmasked,
cutoff_masked = cutoff,
cutoff_unmasked = cutoff_unmasked,
guide_masked = guide,
guide_unmasked = guide_unmasked,
weight_masked = weight,
weight_unmasked = weight_unmasked,
guide_mode = guide_mode,
channelwise_mode = channelwise_mode,
projection_mode = projection_mode,
weights_masked = weights,
weights_unmasked = weights_unmasked,
mask = mask,
unmask = unmask,
invert_mask = invert_mask
)
return (guides, )
#return (guides[0], )
class ClownGuides_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}),
"channelwise_mode": ("BOOLEAN", {"default": True}),
"projection_mode": ("BOOLEAN", {"default": True}),
"weight_masked": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"weight_unmasked": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"cutoff_masked": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"cutoff_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"weight_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"weight_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step_masked": ("INT", {"default": 15, "min": -1, "max": 10000}),
"end_step_unmasked": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_mask": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide_masked": ("LATENT", ),
"guide_unmasked": ("LATENT", ),
"mask": ("MASK", ),
"weights_masked": ("SIGMAS", ),
"weights_unmasked": ("SIGMAS", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight_scheduler_masked = "constant",
weight_scheduler_unmasked = "constant",
start_step_masked = 0,
start_step_unmasked = 0,
end_step_masked = 30,
end_step_unmasked = 30,
cutoff_masked = 1.0,
cutoff_unmasked = 1.0,
guide_masked = None,
guide_unmasked = None,
weight_masked = 0.0,
weight_unmasked = 0.0,
guide_mode = "epsilon",
channelwise_mode = False,
projection_mode = False,
weights_masked = None,
weights_unmasked = None,
mask = None,
unmask = None,
invert_mask = False,
):
default_dtype = torch.float64
if end_step_masked == -1:
end_step_masked = MAX_STEPS
if end_step_unmasked == -1:
end_step_unmasked = MAX_STEPS
if guide_masked is None:
weight_scheduler_masked = "constant"
start_step_masked = 0
end_step_masked = 30
cutoff_masked = 1.0
guide_masked = None
weight_masked = 0.0
weights_masked = None
#mask = None
if guide_unmasked is None:
weight_scheduler_unmasked = "constant"
start_step_unmasked = 0
end_step_unmasked = 30
cutoff_unmasked = 1.0
guide_unmasked = None
weight_unmasked = 0.0
weights_unmasked = None
#unmask = None
if guide_masked is not None:
raw_x = guide_masked.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_masked = {'samples': guide_masked['state_info']['raw_x'].clone()}
else:
guide_masked = {'samples': guide_masked['samples'].clone()}
if guide_unmasked is not None:
raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()}
else:
guide_unmasked = {'samples': guide_unmasked['samples'].clone()}
if invert_mask and mask is not None:
mask = 1-mask
if projection_mode:
guide_mode = guide_mode + "_projection"
if channelwise_mode:
guide_mode = guide_mode + "_cw"
if guide_mode == "unsample_cw":
guide_mode = "unsample"
if guide_mode == "resample_cw":
guide_mode = "resample"
if weight_scheduler_masked == "constant" and weights_masked == None:
weights_masked = initialize_or_scale(None, weight_masked, end_step_masked).to(default_dtype)
weights_masked = F.pad(weights_masked, (0, MAX_STEPS), value=0.0)
if weight_scheduler_unmasked == "constant" and weights_unmasked == None:
weights_unmasked = initialize_or_scale(None, weight_unmasked, end_step_unmasked).to(default_dtype)
weights_unmasked = F.pad(weights_unmasked, (0, MAX_STEPS), value=0.0)
guides = {
"guide_mode" : guide_mode,
"weight_masked" : weight_masked,
"weight_unmasked" : weight_unmasked,
"weights_masked" : weights_masked,
"weights_unmasked" : weights_unmasked,
"guide_masked" : guide_masked,
"guide_unmasked" : guide_unmasked,
"mask" : mask,
"unmask" : unmask,
"weight_scheduler_masked" : weight_scheduler_masked,
"weight_scheduler_unmasked" : weight_scheduler_unmasked,
"start_step_masked" : start_step_masked,
"start_step_unmasked" : start_step_unmasked,
"end_step_masked" : end_step_masked,
"end_step_unmasked" : end_step_unmasked,
"cutoff_masked" : cutoff_masked,
"cutoff_unmasked" : cutoff_unmasked
}
return (guides, )
class ClownGuidesAB_Beta:
@classmethod
def INPUT_TYPES(cls):
return {"required":
{
"guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}),
"channelwise_mode": ("BOOLEAN", {"default": False}),
"projection_mode": ("BOOLEAN", {"default": False}),
"weight_A": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}),
"weight_B": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}),
"cutoff_A": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"cutoff_B": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}),
"weight_scheduler_A": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"weight_scheduler_B": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},),
"start_step_A": ("INT", {"default": 0, "min": 0, "max": 10000}),
"start_step_B": ("INT", {"default": 0, "min": 0, "max": 10000}),
"end_step_A": ("INT", {"default": 15, "min": -1, "max": 10000}),
"end_step_B": ("INT", {"default": 15, "min": -1, "max": 10000}),
"invert_masks": ("BOOLEAN", {"default": False}),
},
"optional":
{
"guide_A": ("LATENT", ),
"guide_B": ("LATENT", ),
"mask_A": ("MASK", ),
"mask_B": ("MASK", ),
"weights_A": ("SIGMAS", ),
"weights_B": ("SIGMAS", ),
}
}
RETURN_TYPES = ("GUIDES",)
RETURN_NAMES = ("guides",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_extensions"
def main(self,
weight_scheduler_A = "constant",
weight_scheduler_B = "constant",
start_step_A = 0,
start_step_B = 0,
end_step_A = 30,
end_step_B = 30,
cutoff_A = 1.0,
cutoff_B = 1.0,
guide_A = None,
guide_B = None,
weight_A = 0.0,
weight_B = 0.0,
guide_mode = "epsilon",
channelwise_mode = False,
projection_mode = False,
weights_A = None,
weights_B = None,
mask_A = None,
mask_B = None,
invert_masks : bool = False,
):
default_dtype = torch.float64
if end_step_A == -1:
end_step_A = MAX_STEPS
if end_step_B == -1:
end_step_B = MAX_STEPS
if guide_A is not None:
raw_x = guide_A.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_A = {'samples': guide_A['state_info']['raw_x'].clone()}
else:
guide_A = {'samples': guide_A['samples'].clone()}
if guide_B is not None:
raw_x = guide_B.get('state_info', {}).get('raw_x', None)
if False: #raw_x is not None:
guide_B = {'samples': guide_B['state_info']['raw_x'].clone()}
else:
guide_B = {'samples': guide_B['samples'].clone()}
if guide_A is None:
guide_A = guide_B
guide_B = None
mask_A = mask_B
mask_B = None
weight_B = 0.0
if guide_B is None:
weight_B = 0.0
if mask_A is None and mask_B is not None:
mask_A = 1-mask_B
if projection_mode:
guide_mode = guide_mode + "_projection"
if channelwise_mode:
guide_mode = guide_mode + "_cw"
if guide_mode == "unsample_cw":
guide_mode = "unsample"
if guide_mode == "resample_cw":
guide_mode = "resample"
if weight_scheduler_A == "constant" and weights_A == None:
weights_A = initialize_or_scale(None, weight_A, end_step_A).to(default_dtype)
weights_A = F.pad(weights_A, (0, MAX_STEPS), value=0.0)
if weight_scheduler_B == "constant" and weights_B == None:
weights_B = initialize_or_scale(None, weight_B, end_step_B).to(default_dtype)
weights_B = F.pad(weights_B, (0, MAX_STEPS), value=0.0)
if invert_masks:
mask_A = 1-mask_A if mask_A is not None else None
mask_B = 1-mask_B if mask_B is not None else None
guides = {
"guide_mode" : guide_mode,
"weight_masked" : weight_A,
"weight_unmasked" : weight_B,
"weights_masked" : weights_A,
"weights_unmasked" : weights_B,
"guide_masked" : guide_A,
"guide_unmasked" : guide_B,
"mask" : mask_A,
"unmask" : mask_B,
"weight_scheduler_masked" : weight_scheduler_A,
"weight_scheduler_unmasked" : weight_scheduler_B,
"start_step_masked" : start_step_A,
"start_step_unmasked" : start_step_B,
"end_step_masked" : end_step_A,
"end_step_unmasked" : end_step_B,
"cutoff_masked" : cutoff_A,
"cutoff_unmasked" : cutoff_B
}
return (guides, )
class ClownOptions_Combine:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"options": ("OPTIONS",),
},
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self, options, **kwargs):
options_mgr = OptionsManager(options, **kwargs)
return (options_mgr.as_dict(),)
class ClownOptions_Frameweights:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"config_name": (FRAME_WEIGHTS_CONFIG_NAMES, {"default": "frame_weights", "tooltip": "Apply to specific type of per-frame weights."}),
"dynamics": (FRAME_WEIGHTS_DYNAMICS_NAMES, {"default": "ease_out", "tooltip": "The function type used for the dynamic period. constant: no change, linear: steady change, ease_out: starts fast, ease_in: starts slow"}),
"schedule": (FRAME_WEIGHTS_SCHEDULE_NAMES, {"default": "moderate_early", "tooltip": "fast_early: fast change starts immediately, slow_late: slow change starts later"}),
"scale": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The amount of change over the course of the frame weights. 1.0 means that the guides have no influence by the end."}),
"reverse": ("BOOLEAN", {"default": False, "tooltip": "Reverse the frame weights"}),
},
"optional": {
"frame_weights": ("SIGMAS", {"tooltip": "Overrides all other settings EXCEPT reverse."}),
"custom_string": ("STRING", {"tooltip": "Overrides all other settings EXCEPT reverse.", "multiline": True}),
"options": ("OPTIONS",),
},
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self,
config_name,
dynamics,
schedule,
scale,
reverse,
frame_weights = None,
custom_string = None,
options = None,
):
options_mgr = OptionsManager(options if options is not None else {})
frame_weights_mgr = options_mgr.get("frame_weights_mgr")
if frame_weights_mgr is None:
frame_weights_mgr = FrameWeightsManager()
if custom_string is not None and custom_string.strip() == "":
custom_string = None
frame_weights_mgr.add_weight_config(
config_name,
dynamics=dynamics,
schedule=schedule,
scale=scale,
is_reversed=reverse,
frame_weights=frame_weights,
custom_string=custom_string
)
options_mgr.update("frame_weights_mgr", frame_weights_mgr)
return (options_mgr.as_dict(),)
class SharkOptions_GuiderInput:
@classmethod
def INPUT_TYPES(s):
return {"required":
{"guider": ("GUIDER", ),
},
"optional":
{"options": ("OPTIONS", ),
}
}
RETURN_TYPES = ("OPTIONS",)
RETURN_NAMES = ("options",)
FUNCTION = "main"
CATEGORY = "RES4LYF/sampler_options"
def main(self, guider, options=None):
options_mgr = OptionsManager(options if options is not None else {})
if isinstance(guider, dict):
guider = guider.get('samples', None)
if isinstance(guider, torch.Tensor):
guider = guider.detach().cpu()
if options_mgr is None:
options_mgr = OptionsManager()
options_mgr.update("guider", guider)
return (options_mgr.as_dict(), )
================================================
FILE: sd/attention.py
================================================
import math
import sys
import torch
import torch.nn.functional as F
from torch import nn, einsum
from einops import rearrange, repeat
from typing import Optional
import logging
from comfy.ldm.modules.diffusionmodules.util import AlphaBlender, timestep_embedding
from comfy.ldm.modules.sub_quadratic_attention import efficient_dot_product_attention
from comfy import model_management
if model_management.xformers_enabled():
import xformers
import xformers.ops
if model_management.sage_attention_enabled():
try:
from sageattention import sageattn
except ModuleNotFoundError:
logging.error(f"\n\nTo use the `--use-sage-attention` feature, the `sageattention` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install sageattention")
exit(-1)
if model_management.flash_attention_enabled():
try:
from flash_attn import flash_attn_func
except ModuleNotFoundError:
logging.error(f"\n\nTo use the `--use-flash-attention` feature, the `flash-attn` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install flash-attn")
exit(-1)
from comfy.cli_args import args
import comfy.ops
ops = comfy.ops.disable_weight_init
from ..style_transfer import apply_scattersort, apply_scattersort_spatial
FORCE_UPCAST_ATTENTION_DTYPE = model_management.force_upcast_attention_dtype()
def get_attn_precision(attn_precision, current_dtype):
if args.dont_upcast_attention:
return None
if FORCE_UPCAST_ATTENTION_DTYPE is not None and current_dtype in FORCE_UPCAST_ATTENTION_DTYPE:
return FORCE_UPCAST_ATTENTION_DTYPE[current_dtype]
return attn_precision
def exists(val):
return val is not None
def default(val, d):
if exists(val):
return val
return d
# feedforward
class GEGLU(nn.Module):
def __init__(self, dim_in, dim_out, dtype=None, device=None, operations=ops):
super().__init__()
self.proj = operations.Linear(dim_in, dim_out * 2, dtype=dtype, device=device)
def forward(self, x):
x, gate = self.proj(x).chunk(2, dim=-1)
return x * F.gelu(gate)
class FeedForward(nn.Module):
def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0., dtype=None, device=None, operations=ops):
super().__init__()
inner_dim = int(dim * mult)
dim_out = default(dim_out, dim)
project_in = nn.Sequential(
operations.Linear(dim, inner_dim, dtype=dtype, device=device),
nn.GELU()
) if not glu else GEGLU(dim, inner_dim, dtype=dtype, device=device, operations=operations)
self.net = nn.Sequential(
project_in,
nn.Dropout(dropout),
operations.Linear(inner_dim, dim_out, dtype=dtype, device=device)
)
def forward(self, x):
return self.net(x)
def Normalize(in_channels, dtype=None, device=None):
return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True, dtype=dtype, device=device)
def attention_basic(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False):
attn_precision = get_attn_precision(attn_precision, q.dtype)
if skip_reshape:
b, _, _, dim_head = q.shape
else:
b, _, dim_head = q.shape
dim_head //= heads
scale = dim_head ** -0.5
h = heads
if skip_reshape:
q, k, v = map(
lambda t: t.reshape(b * heads, -1, dim_head),
(q, k, v),
)
else:
q, k, v = map(
lambda t: t.unsqueeze(3)
.reshape(b, -1, heads, dim_head)
.permute(0, 2, 1, 3)
.reshape(b * heads, -1, dim_head)
.contiguous(),
(q, k, v),
)
# force cast to fp32 to avoid overflowing
if attn_precision == torch.float32:
sim = einsum('b i d, b j d -> b i j', q.float(), k.float()) * scale
else:
sim = einsum('b i d, b j d -> b i j', q, k) * scale
del q, k
if exists(mask):
if mask.dtype == torch.bool:
mask = rearrange(mask, 'b ... -> b (...)') #TODO: check if this bool part matches pytorch attention
max_neg_value = -torch.finfo(sim.dtype).max
mask = repeat(mask, 'b j -> (b h) () j', h=h)
sim.masked_fill_(~mask, max_neg_value)
else:
if len(mask.shape) == 2:
bs = 1
else:
bs = mask.shape[0]
mask = mask.reshape(bs, -1, mask.shape[-2], mask.shape[-1]).expand(b, heads, -1, -1).reshape(-1, mask.shape[-2], mask.shape[-1])
sim.add_(mask)
# attention, what we cannot get enough of
sim = sim.softmax(dim=-1)
out = einsum('b i j, b j d -> b i d', sim.to(v.dtype), v)
if skip_output_reshape:
out = (
out.unsqueeze(0)
.reshape(b, heads, -1, dim_head)
)
else:
out = (
out.unsqueeze(0)
.reshape(b, heads, -1, dim_head)
.permute(0, 2, 1, 3)
.reshape(b, -1, heads * dim_head)
)
return out
def attention_sub_quad(query, key, value, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False):
attn_precision = get_attn_precision(attn_precision, query.dtype)
if skip_reshape:
b, _, _, dim_head = query.shape
else:
b, _, dim_head = query.shape
dim_head //= heads
if skip_reshape:
query = query.reshape(b * heads, -1, dim_head)
value = value.reshape(b * heads, -1, dim_head)
key = key.reshape(b * heads, -1, dim_head).movedim(1, 2)
else:
query = query.unsqueeze(3).reshape(b, -1, heads, dim_head).permute(0, 2, 1, 3).reshape(b * heads, -1, dim_head)
value = value.unsqueeze(3).reshape(b, -1, heads, dim_head).permute(0, 2, 1, 3).reshape(b * heads, -1, dim_head)
key = key.unsqueeze(3).reshape(b, -1, heads, dim_head).permute(0, 2, 3, 1).reshape(b * heads, dim_head, -1)
dtype = query.dtype
upcast_attention = attn_precision == torch.float32 and query.dtype != torch.float32
if upcast_attention:
bytes_per_token = torch.finfo(torch.float32).bits//8
else:
bytes_per_token = torch.finfo(query.dtype).bits//8
batch_x_heads, q_tokens, _ = query.shape
_, _, k_tokens = key.shape
mem_free_total, _ = model_management.get_free_memory(query.device, True)
kv_chunk_size_min = None
kv_chunk_size = None
query_chunk_size = None
for x in [4096, 2048, 1024, 512, 256]:
count = mem_free_total / (batch_x_heads * bytes_per_token * x * 4.0)
if count >= k_tokens:
kv_chunk_size = k_tokens
query_chunk_size = x
break
if query_chunk_size is None:
query_chunk_size = 512
if mask is not None:
if len(mask.shape) == 2:
bs = 1
else:
bs = mask.shape[0]
mask = mask.reshape(bs, -1, mask.shape[-2], mask.shape[-1]).expand(b, heads, -1, -1).reshape(-1, mask.shape[-2], mask.shape[-1])
hidden_states = efficient_dot_product_attention(
query,
key,
value,
query_chunk_size=query_chunk_size,
kv_chunk_size=kv_chunk_size,
kv_chunk_size_min=kv_chunk_size_min,
use_checkpoint=False,
upcast_attention=upcast_attention,
mask=mask,
)
hidden_states = hidden_states.to(dtype)
if skip_output_reshape:
hidden_states = hidden_states.unflatten(0, (-1, heads))
else:
hidden_states = hidden_states.unflatten(0, (-1, heads)).transpose(1,2).flatten(start_dim=2)
return hidden_states
def attention_split(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False):
attn_precision = get_attn_precision(attn_precision, q.dtype)
if skip_reshape:
b, _, _, dim_head = q.shape
else:
b, _, dim_head = q.shape
dim_head //= heads
scale = dim_head ** -0.5
if skip_reshape:
q, k, v = map(
lambda t: t.reshape(b * heads, -1, dim_head),
(q, k, v),
)
else:
q, k, v = map(
lambda t: t.unsqueeze(3)
.reshape(b, -1, heads, dim_head)
.permute(0, 2, 1, 3)
.reshape(b * heads, -1, dim_head)
.contiguous(),
(q, k, v),
)
r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype)
mem_free_total = model_management.get_free_memory(q.device)
if attn_precision == torch.float32:
element_size = 4
upcast = True
else:
element_size = q.element_size()
upcast = False
gb = 1024 ** 3
tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * element_size
modifier = 3
mem_required = tensor_size * modifier
steps = 1
if mem_required > mem_free_total:
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
# print(f"Expected tensor size:{tensor_size/gb:0.1f}GB, cuda free:{mem_free_cuda/gb:0.1f}GB "
# f"torch free:{mem_free_torch/gb:0.1f} total:{mem_free_total/gb:0.1f} steps:{steps}")
if steps > 64:
max_res = math.floor(math.sqrt(math.sqrt(mem_free_total / 2.5)) / 8) * 64
raise RuntimeError(f'Not enough memory, use lower resolution (max approx. {max_res}x{max_res}). '
f'Need: {mem_required/64/gb:0.1f}GB free, Have:{mem_free_total/gb:0.1f}GB free')
if mask is not None:
if len(mask.shape) == 2:
bs = 1
else:
bs = mask.shape[0]
mask = mask.reshape(bs, -1, mask.shape[-2], mask.shape[-1]).expand(b, heads, -1, -1).reshape(-1, mask.shape[-2], mask.shape[-1])
# print("steps", steps, mem_required, mem_free_total, modifier, q.element_size(), tensor_size)
first_op_done = False
cleared_cache = False
while True:
try:
slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]
for i in range(0, q.shape[1], slice_size):
end = i + slice_size
if upcast:
with torch.autocast(enabled=False, device_type = 'cuda'):
s1 = einsum('b i d, b j d -> b i j', q[:, i:end].float(), k.float()) * scale
else:
s1 = einsum('b i d, b j d -> b i j', q[:, i:end], k) * scale
if mask is not None:
if len(mask.shape) == 2:
s1 += mask[i:end]
else:
if mask.shape[1] == 1:
s1 += mask
else:
s1 += mask[:, i:end]
s2 = s1.softmax(dim=-1).to(v.dtype)
del s1
first_op_done = True
r1[:, i:end] = einsum('b i j, b j d -> b i d', s2, v)
del s2
break
except model_management.OOM_EXCEPTION as e:
if first_op_done == False:
model_management.soft_empty_cache(True)
if cleared_cache == False:
cleared_cache = True
logging.warning("out of memory error, emptying cache and trying again")
continue
steps *= 2
if steps > 64:
raise e
logging.warning("out of memory error, increasing steps and trying again {}".format(steps))
else:
raise e
del q, k, v
if skip_output_reshape:
r1 = (
r1.unsqueeze(0)
.reshape(b, heads, -1, dim_head)
)
else:
r1 = (
r1.unsqueeze(0)
.reshape(b, heads, -1, dim_head)
.permute(0, 2, 1, 3)
.reshape(b, -1, heads * dim_head)
)
return r1
BROKEN_XFORMERS = False
try:
x_vers = xformers.__version__
# XFormers bug confirmed on all versions from 0.0.21 to 0.0.26 (q with bs bigger than 65535 gives CUDA error)
BROKEN_XFORMERS = x_vers.startswith("0.0.2") and not x_vers.startswith("0.0.20")
except:
pass
def attention_xformers(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False):
b = q.shape[0]
dim_head = q.shape[-1]
# check to make sure xformers isn't broken
disabled_xformers = False
if BROKEN_XFORMERS:
if b * heads > 65535:
disabled_xformers = True
if not disabled_xformers:
if torch.jit.is_tracing() or torch.jit.is_scripting():
disabled_xformers = True
if disabled_xformers:
return attention_pytorch(q, k, v, heads, mask, skip_reshape=skip_reshape)
if skip_reshape:
# b h k d -> b k h d
q, k, v = map(
lambda t: t.permute(0, 2, 1, 3),
(q, k, v),
)
# actually do the reshaping
else:
dim_head //= heads
q, k, v = map(
lambda t: t.reshape(b, -1, heads, dim_head),
(q, k, v),
)
if mask is not None:
# add a singleton batch dimension
if mask.ndim == 2:
mask = mask.unsqueeze(0)
# add a singleton heads dimension
if mask.ndim == 3:
mask = mask.unsqueeze(1)
# pad to a multiple of 8
pad = 8 - mask.shape[-1] % 8
# the xformers docs says that it's allowed to have a mask of shape (1, Nq, Nk)
# but when using separated heads, the shape has to be (B, H, Nq, Nk)
# in flux, this matrix ends up being over 1GB
# here, we create a mask with the same batch/head size as the input mask (potentially singleton or full)
mask_out = torch.empty([mask.shape[0], mask.shape[1], q.shape[1], mask.shape[-1] + pad], dtype=q.dtype, device=q.device)
mask_out[..., :mask.shape[-1]] = mask
# doesn't this remove the padding again??
mask = mask_out[..., :mask.shape[-1]]
mask = mask.expand(b, heads, -1, -1)
out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=mask)
if skip_output_reshape:
out = out.permute(0, 2, 1, 3)
else:
out = (
out.reshape(b, -1, heads * dim_head)
)
return out
if model_management.is_nvidia(): #pytorch 2.3 and up seem to have this issue.
SDP_BATCH_LIMIT = 2**15
else:
#TODO: other GPUs ?
SDP_BATCH_LIMIT = 2**31
def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False):
if skip_reshape:
b, _, _, dim_head = q.shape
else:
b, _, dim_head = q.shape
dim_head //= heads
q, k, v = map(
lambda t: t.view(b, -1, heads, dim_head).transpose(1, 2),
(q, k, v),
)
if mask is not None:
# add a batch dimension if there isn't already one
if mask.ndim == 2:
mask = mask.unsqueeze(0)
# add a heads dimension if there isn't already one
if mask.ndim == 3:
mask = mask.unsqueeze(1)
if SDP_BATCH_LIMIT >= b:
out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False)
if not skip_output_reshape:
out = (
out.transpose(1, 2).reshape(b, -1, heads * dim_head)
)
else:
out = torch.empty((b, q.shape[2], heads * dim_head), dtype=q.dtype, layout=q.layout, device=q.device)
for i in range(0, b, SDP_BATCH_LIMIT):
m = mask
if mask is not None:
if mask.shape[0] > 1:
m = mask[i : i + SDP_BATCH_LIMIT]
out[i : i + SDP_BATCH_LIMIT] = torch.nn.functional.scaled_dot_product_attention(
q[i : i + SDP_BATCH_LIMIT],
k[i : i + SDP_BATCH_LIMIT],
v[i : i + SDP_BATCH_LIMIT],
attn_mask=m,
dropout_p=0.0, is_causal=False
).transpose(1, 2).reshape(-1, q.shape[2], heads * dim_head)
return out
def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False):
if skip_reshape:
b, _, _, dim_head = q.shape
tensor_layout = "HND"
else:
b, _, dim_head = q.shape
dim_head //= heads
q, k, v = map(
lambda t: t.view(b, -1, heads, dim_head),
(q, k, v),
)
tensor_layout = "NHD"
if mask is not None:
# add a batch dimension if there isn't already one
if mask.ndim == 2:
mask = mask.unsqueeze(0)
# add a heads dimension if there isn't already one
if mask.ndim == 3:
mask = mask.unsqueeze(1)
try:
out = sageattn(q, k, v, attn_mask=mask, is_causal=False, tensor_layout=tensor_layout)
except Exception as e:
logging.error("Error running sage attention: {}, using pytorch attention instead.".format(e))
if tensor_layout == "NHD":
q, k, v = map(
lambda t: t.transpose(1, 2),
(q, k, v),
)
return attention_pytorch(q, k, v, heads, mask=mask, skip_reshape=True, skip_output_reshape=skip_output_reshape)
if tensor_layout == "HND":
if not skip_output_reshape:
out = (
out.transpose(1, 2).reshape(b, -1, heads * dim_head)
)
else:
if skip_output_reshape:
out = out.transpose(1, 2)
else:
out = out.reshape(b, -1, heads * dim_head)
return out
try:
@torch.library.custom_op("flash_attention::flash_attn", mutates_args=())
def flash_attn_wrapper(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
dropout_p: float = 0.0, causal: bool = False) -> torch.Tensor:
return flash_attn_func(q, k, v, dropout_p=dropout_p, causal=causal)
@flash_attn_wrapper.register_fake
def flash_attn_fake(q, k, v, dropout_p=0.0, causal=False):
# Output shape is the same as q
return q.new_empty(q.shape)
except AttributeError as error:
FLASH_ATTN_ERROR = error
def flash_attn_wrapper(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
dropout_p: float = 0.0, causal: bool = False) -> torch.Tensor:
assert False, f"Could not define flash_attn_wrapper: {FLASH_ATTN_ERROR}"
def attention_flash(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False):
if skip_reshape:
b, _, _, dim_head = q.shape
else:
b, _, dim_head = q.shape
dim_head //= heads
q, k, v = map(
lambda t: t.view(b, -1, heads, dim_head).transpose(1, 2),
(q, k, v),
)
if mask is not None:
# add a batch dimension if there isn't already one
if mask.ndim == 2:
mask = mask.unsqueeze(0)
# add a heads dimension if there isn't already one
if mask.ndim == 3:
mask = mask.unsqueeze(1)
try:
assert mask is None
out = flash_attn_wrapper(
q.transpose(1, 2),
k.transpose(1, 2),
v.transpose(1, 2),
dropout_p=0.0,
causal=False,
).transpose(1, 2)
except Exception as e:
logging.warning(f"Flash Attention failed, using default SDPA: {e}")
out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False)
if not skip_output_reshape:
out = (
out.transpose(1, 2).reshape(b, -1, heads * dim_head)
)
return out
optimized_attention = attention_basic
if model_management.sage_attention_enabled():
logging.info("Using sage attention")
optimized_attention = attention_sage
elif model_management.xformers_enabled():
logging.info("Using xformers attention")
optimized_attention = attention_xformers
elif model_management.flash_attention_enabled():
logging.info("Using Flash Attention")
optimized_attention = attention_flash
elif model_management.pytorch_attention_enabled():
logging.info("Using pytorch attention")
optimized_attention = attention_pytorch
else:
if args.use_split_cross_attention:
logging.info("Using split optimization for attention")
optimized_attention = attention_split
else:
logging.info("Using sub quadratic optimization for attention, if you have memory or speed issues try using: --use-split-cross-attention")
optimized_attention = attention_sub_quad
optimized_attention_masked = optimized_attention
def optimized_attention_for_device(device, mask=False, small_input=False):
if small_input:
if model_management.pytorch_attention_enabled():
return attention_pytorch #TODO: need to confirm but this is probably slightly faster for small inputs in all cases
else:
return attention_basic
if device == torch.device("cpu"):
return attention_sub_quad
if mask:
return optimized_attention_masked
return optimized_attention
class ReCrossAttention(nn.Module):
def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., attn_precision=None, dtype=None, device=None, operations=ops):
super().__init__()
inner_dim = dim_head * heads
context_dim = default(context_dim, query_dim)
self.attn_precision = attn_precision
self.heads = heads
self.dim_head = dim_head
self.to_q = operations.Linear(query_dim, inner_dim, bias=False, dtype=dtype, device=device)
self.to_k = operations.Linear(context_dim, inner_dim, bias=False, dtype=dtype, device=device)
self.to_v = operations.Linear(context_dim, inner_dim, bias=False, dtype=dtype, device=device)
self.to_out = nn.Sequential(operations.Linear(inner_dim, query_dim, dtype=dtype, device=device), nn.Dropout(dropout))
def forward(self, x, context=None, value=None, mask=None, style_block=None):
q = self.to_q(x)
q = style_block(q, "q_proj")
#SELF_ATTN = True if context is None else False
context = default(context, x) # if context is None, return x
k = self.to_k(context)
k = style_block(k, "k_proj")
if value is not None:
v = self.to_v(value)
del value
else:
v = self.to_v(context)
v = style_block(v, "v_proj")
if mask is None:
out = optimized_attention(q, k, v, self.heads, attn_precision=self.attn_precision)
else:
#if SELF_ATTN and mask.shape[-2] != q.shape[-2]:
# mask = F.interpolate(mask[None, None].float(), size=(q.shape[-2], q.shape[-2]), mode='nearest')[0,0].to(mask)
#elif mask.shape[-2] != q.shape[-2]: # cross attn
# mask = F.interpolate(mask[None, None].float(), size=(q.shape[-2], mask.shape[-1]), mode='nearest')[0,0].to(mask)
out = attention_pytorch(q, k, v, self.heads, mask=mask)
#out = optimized_attention_masked(q, k, v, self.heads, mask, attn_precision=self.attn_precision)
out = style_block(out, "out")
return self.to_out(out)
class ReBasicTransformerBlock(nn.Module):
def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True, ff_in=False, inner_dim=None,
disable_self_attn=False, disable_temporal_crossattention=False, switch_temporal_ca_to_sa=False, attn_precision=None, dtype=None, device=None, operations=ops):
super().__init__()
self.ff_in = ff_in or inner_dim is not None
if inner_dim is None:
inner_dim = dim
self.is_res = inner_dim == dim
self.attn_precision = attn_precision
if self.ff_in:
self.norm_in = operations.LayerNorm(dim, dtype=dtype, device=device)
self.ff_in = FeedForward(dim, dim_out=inner_dim, dropout=dropout, glu=gated_ff, dtype=dtype, device=device, operations=operations)
self.disable_self_attn = disable_self_attn
self.attn1 = ReCrossAttention(query_dim=inner_dim, heads=n_heads, dim_head=d_head, dropout=dropout,
context_dim=context_dim if self.disable_self_attn else None, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations) # is a self-attention if not self.disable_self_attn
self.ff = FeedForward(inner_dim, dim_out=dim, dropout=dropout, glu=gated_ff, dtype=dtype, device=device, operations=operations)
if disable_temporal_crossattention:
if switch_temporal_ca_to_sa:
raise ValueError
else:
self.attn2 = None
else:
context_dim_attn2 = None
if not switch_temporal_ca_to_sa:
context_dim_attn2 = context_dim
self.attn2 = ReCrossAttention(query_dim=inner_dim, context_dim=context_dim_attn2,
heads=n_heads, dim_head=d_head, dropout=dropout, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations) # is self-attn if context is none
self.norm2 = operations.LayerNorm(inner_dim, dtype=dtype, device=device)
self.norm1 = operations.LayerNorm(inner_dim, dtype=dtype, device=device)
self.norm3 = operations.LayerNorm(inner_dim, dtype=dtype, device=device)
self.n_heads = n_heads
self.d_head = d_head
self.switch_temporal_ca_to_sa = switch_temporal_ca_to_sa
def forward(self, x, context=None, transformer_options={}, style_block=None):
extra_options = {}
block = transformer_options.get("block", None)
block_index = transformer_options.get("block_index", 0)
transformer_patches = {}
transformer_patches_replace = {}
self_mask = transformer_options.get('self_mask')
cross_mask = transformer_options.get('cross_mask')
if self_mask is not None and cross_mask is not None:
if self_mask.shape[-2] == x.shape[-2]:
pass
elif self_mask.shape[-2] < x.shape[-2]:
self_mask = transformer_options.get('self_mask_up')
cross_mask = transformer_options.get('cross_mask_up')
else:
self_mask = transformer_options.get('self_mask_down')
cross_mask = transformer_options.get('cross_mask_down')
if self_mask.shape[-2] > x.shape[-2]:
self_mask = transformer_options.get('self_mask_down2')
cross_mask = transformer_options.get('cross_mask_down2')
for k in transformer_options:
if k == "patches":
transformer_patches = transformer_options[k]
elif k == "patches_replace":
transformer_patches_replace = transformer_options[k]
else:
extra_options[k] = transformer_options[k]
extra_options["n_heads"] = self.n_heads
extra_options["dim_head"] = self.d_head
extra_options["attn_precision"] = self.attn_precision
if self.ff_in: # never true for sdxl?
x_skip = x
x = self.ff_in(self.norm_in(x))
if self.is_res:
x += x_skip
n = self.norm1(x)
n = style_block(n, "norm1")
if self.disable_self_attn:
context_attn1 = context
else:
context_attn1 = None
value_attn1 = None
if "attn1_patch" in transformer_patches:
patch = transformer_patches["attn1_patch"]
if context_attn1 is None:
context_attn1 = n
value_attn1 = context_attn1
for p in patch:
n, context_attn1, value_attn1 = p(n, context_attn1, value_attn1, extra_options)
if block is not None:
transformer_block = (block[0], block[1], block_index)
else:
transformer_block = None
attn1_replace_patch = transformer_patches_replace.get("attn1", {})
block_attn1 = transformer_block
if block_attn1 not in attn1_replace_patch:
block_attn1 = block
if block_attn1 in attn1_replace_patch:
if context_attn1 is None:
context_attn1 = n
value_attn1 = n
n = self.attn1.to_q(n)
context_attn1 = self.attn1.to_k(context_attn1)
value_attn1 = self.attn1.to_v(value_attn1)
n = attn1_replace_patch[block_attn1](n, context_attn1, value_attn1, extra_options)
n = self.attn1.to_out(n)
else:
n = self.attn1(n, context=context_attn1, value=value_attn1, mask=self_mask, style_block=style_block.ATTN1) # self attention #####
n = style_block(n, "self_attn")
if "attn1_output_patch" in transformer_patches:
patch = transformer_patches["attn1_output_patch"]
for p in patch:
n = p(n, extra_options)
x += n ###########
x = style_block(x, "self_attn_res")
if "middle_patch" in transformer_patches:
patch = transformer_patches["middle_patch"]
for p in patch:
x = p(x, extra_options)
if self.attn2 is not None:
n = self.norm2(x)
n = style_block(n, "norm2")
if self.switch_temporal_ca_to_sa:
context_attn2 = n
else:
context_attn2 = context
value_attn2 = None
if "attn2_patch" in transformer_patches:
patch = transformer_patches["attn2_patch"]
value_attn2 = context_attn2
for p in patch:
n, context_attn2, value_attn2 = p(n, context_attn2, value_attn2, extra_options)
attn2_replace_patch = transformer_patches_replace.get("attn2", {})
block_attn2 = transformer_block
if block_attn2 not in attn2_replace_patch:
block_attn2 = block
if block_attn2 in attn2_replace_patch:
if value_attn2 is None:
value_attn2 = context_attn2
n = self.attn2.to_q(n)
context_attn2 = self.attn2.to_k(context_attn2)
value_attn2 = self.attn2.to_v(value_attn2)
n = attn2_replace_patch[block_attn2](n, context_attn2, value_attn2, extra_options)
n = self.attn2.to_out(n)
else:
n = self.attn2(n, context=context_attn2, value=value_attn2, mask=cross_mask, style_block=style_block.ATTN2) # real cross attention ##### b (h w) c
n = style_block(n, "cross_attn")
if "attn2_output_patch" in transformer_patches:
patch = transformer_patches["attn2_output_patch"]
for p in patch:
n = p(n, extra_options)
x += n ###########
x = style_block(x, "cross_attn_res")
if self.is_res: # always true with sdxl?
x_skip = x
if not self.is_res:
pass
x = self.norm3(x)
x = style_block(x, "norm3")
x = self.ff(x)
x = style_block(x, "ff")
if self.is_res:
x += x_skip
x = style_block(x, "ff_res")
return x
class ReSpatialTransformer(nn.Module):
"""
Transformer block for image-like data.
First, project the input (aka embedding)
and reshape to b, t, d.
Then apply standard transformer action.
Finally, reshape to image
NEW: use_linear for more efficiency instead of the 1x1 convs
"""
def __init__(self, in_channels, n_heads, d_head,
depth=1, dropout=0., context_dim=None,
disable_self_attn=False, use_linear=False,
use_checkpoint=True, attn_precision=None, dtype=None, device=None, operations=ops):
super().__init__()
if exists(context_dim) and not isinstance(context_dim, list):
context_dim = [context_dim] * depth
self.in_channels = in_channels
inner_dim = n_heads * d_head
self.norm = operations.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True, dtype=dtype, device=device)
if not use_linear:
self.proj_in = operations.Conv2d(in_channels,
inner_dim,
kernel_size=1,
stride=1,
padding=0, dtype=dtype, device=device)
else:
self.proj_in = operations.Linear(in_channels, inner_dim, dtype=dtype, device=device)
self.transformer_blocks = nn.ModuleList(
[ReBasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim[d],
disable_self_attn=disable_self_attn, checkpoint=use_checkpoint, attn_precision=attn_precision, dtype=dtype, device=device, operations=operations)
for d in range(depth)]
)
if not use_linear:
self.proj_out = operations.Conv2d(inner_dim,in_channels,
kernel_size=1,
stride=1,
padding=0, dtype=dtype, device=device)
else:
self.proj_out = operations.Linear(in_channels, inner_dim, dtype=dtype, device=device)
self.use_linear = use_linear
def forward(self, x, context=None, style_block=None, transformer_options={}):
# note: if no context is given, cross-attention defaults to self-attention
if not isinstance(context, list):
context = [context] * len(self.transformer_blocks)
b, c, h, w = x.shape
transformer_options["activations_shape"] = list(x.shape)
x_in = x
x = self.norm(x)
x = style_block(x, "spatial_norm_in")
if not self.use_linear:
x = self.proj_in(x)
x = style_block(x, "spatial_proj_in")
x = x.movedim(1, 3).flatten(1, 2).contiguous()
if self.use_linear:
x = self.proj_in(x)
x = style_block(x, "spatial_proj_in")
for i, block in enumerate(self.transformer_blocks):
transformer_options["block_index"] = i
x = block(x, context=context[i], style_block=style_block.TFMR, transformer_options=transformer_options)
x = style_block(x, "spatial_transformer_block")
x = style_block(x, "spatial_transformer")
if self.use_linear:
x = self.proj_out(x)
x = x.reshape(x.shape[0], h, w, x.shape[-1]).movedim(3, 1).contiguous()
if not self.use_linear:
x = self.proj_out(x)
x = style_block(x, "spatial_proj_out")
x = x + x_in
x = style_block(x, "spatial_res")
return x
class SpatialVideoTransformer(ReSpatialTransformer):
def __init__(
self,
in_channels,
n_heads,
d_head,
depth=1,
dropout=0.0,
use_linear=False,
context_dim=None,
use_spatial_context=False,
timesteps=None,
merge_strategy: str = "fixed",
merge_factor: float = 0.5,
time_context_dim=None,
ff_in=False,
checkpoint=False,
time_depth=1,
disable_self_attn=False,
disable_temporal_crossattention=False,
max_time_embed_period: int = 10000,
attn_precision=None,
dtype=None, device=None, operations=ops
):
super().__init__(
in_channels,
n_heads,
d_head,
depth=depth,
dropout=dropout,
use_checkpoint=checkpoint,
context_dim=context_dim,
use_linear=use_linear,
disable_self_attn=disable_self_attn,
attn_precision=attn_precision,
dtype=dtype, device=device, operations=operations
)
self.time_depth = time_depth
self.depth = depth
self.max_time_embed_period = max_time_embed_period
time_mix_d_head = d_head
n_time_mix_heads = n_heads
time_mix_inner_dim = int(time_mix_d_head * n_time_mix_heads)
inner_dim = n_heads * d_head
if use_spatial_context:
time_context_dim = context_dim
self.time_stack = nn.ModuleList(
[
BasicTransformerBlock(
inner_dim,
n_time_mix_heads,
time_mix_d_head,
dropout=dropout,
context_dim=time_context_dim,
# timesteps=timesteps,
checkpoint=checkpoint,
ff_in=ff_in,
inner_dim=time_mix_inner_dim,
disable_self_attn=disable_self_attn,
disable_temporal_crossattention=disable_temporal_crossattention,
attn_precision=attn_precision,
dtype=dtype, device=device, operations=operations
)
for _ in range(self.depth)
]
)
assert len(self.time_stack) == len(self.transformer_blocks)
self.use_spatial_context = use_spatial_context
self.in_channels = in_channels
time_embed_dim = self.in_channels * 4
self.time_pos_embed = nn.Sequential(
operations.Linear(self.in_channels, time_embed_dim, dtype=dtype, device=device),
nn.SiLU(),
operations.Linear(time_embed_dim, self.in_channels, dtype=dtype, device=device),
)
self.time_mixer = AlphaBlender(
alpha=merge_factor, merge_strategy=merge_strategy
)
def forward(
self,
x: torch.Tensor,
context: Optional[torch.Tensor] = None,
time_context: Optional[torch.Tensor] = None,
timesteps: Optional[int] = None,
image_only_indicator: Optional[torch.Tensor] = None,
transformer_options={}
) -> torch.Tensor:
_, _, h, w = x.shape
transformer_options["activations_shape"] = list(x.shape)
x_in = x
spatial_context = None
if exists(context):
spatial_context = context
if self.use_spatial_context:
assert (
context.ndim == 3
), f"n dims of spatial context should be 3 but are {context.ndim}"
if time_context is None:
time_context = context
time_context_first_timestep = time_context[::timesteps]
time_context = repeat(
time_context_first_timestep, "b ... -> (b n) ...", n=h * w
)
elif time_context is not None and not self.use_spatial_context:
time_context = repeat(time_context, "b ... -> (b n) ...", n=h * w)
if time_context.ndim == 2:
time_context = rearrange(time_context, "b c -> b 1 c")
x = self.norm(x)
if not self.use_linear:
x = self.proj_in(x)
x = rearrange(x, "b c h w -> b (h w) c")
if self.use_linear:
x = self.proj_in(x)
num_frames = torch.arange(timesteps, device=x.device)
num_frames = repeat(num_frames, "t -> b t", b=x.shape[0] // timesteps)
num_frames = rearrange(num_frames, "b t -> (b t)")
t_emb = timestep_embedding(num_frames, self.in_channels, repeat_only=False, max_period=self.max_time_embed_period).to(x.dtype)
emb = self.time_pos_embed(t_emb)
emb = emb[:, None, :]
for it_, (block, mix_block) in enumerate(
zip(self.transformer_blocks, self.time_stack)
):
transformer_options["block_index"] = it_
x = block(
x,
context=spatial_context,
transformer_options=transformer_options,
)
x_mix = x
x_mix = x_mix + emb
B, S, C = x_mix.shape
x_mix = rearrange(x_mix, "(b t) s c -> (b s) t c", t=timesteps)
x_mix = mix_block(x_mix, context=time_context) #TODO: transformer_options
x_mix = rearrange(
x_mix, "(b s) t c -> (b t) s c", s=S, b=B // timesteps, c=C, t=timesteps
)
x = self.time_mixer(x_spatial=x, x_temporal=x_mix, image_only_indicator=image_only_indicator)
if self.use_linear:
x = self.proj_out(x)
x = rearrange(x, "b (h w) c -> b c h w", h=h, w=w)
if not self.use_linear:
x = self.proj_out(x)
out = x + x_in
return out
================================================
FILE: sd/openaimodel.py
================================================
from abc import abstractmethod
import torch
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange
import logging
import copy
from ..helper import ExtraOptions
from comfy.ldm.modules.diffusionmodules.util import (
checkpoint,
avg_pool_nd,
timestep_embedding,
AlphaBlender,
)
from comfy.ldm.modules.attention import SpatialTransformer, SpatialVideoTransformer, default
from .attention import ReSpatialTransformer, ReBasicTransformerBlock
from comfy.ldm.util import exists
import comfy.patcher_extension
import comfy.ops
ops = comfy.ops.disable_weight_init
from comfy.ldm.modules.diffusionmodules.openaimodel import TimestepBlock, TimestepEmbedSequential, Upsample, Downsample, ResBlock, VideoResBlock
from ..latents import slerp_tensor, interpolate_spd, tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d
from ..style_transfer import apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch, apply_scattersort, apply_scattersort_spatial, StyleMMDiT_Model, StyleUNet_Model
#This is needed because accelerate makes a copy of transformer_options which breaks "transformer_index"
def forward_timestep_embed(ts, x, emb, context=None, transformer_options={}, output_shape=None, time_context=None, num_video_frames=None, image_only_indicator=None, style_block=None):
for layer in ts:
if isinstance(layer, VideoResBlock): # UNUSED
x = layer(x, emb, num_video_frames, image_only_indicator)
elif isinstance(layer, TimestepBlock): # ResBlock(TimestepBlock)
x = layer(x, emb, style_block.res_block)
x = style_block(x, "res")
elif isinstance(layer, SpatialVideoTransformer): # UNUSED
x = layer(x, context, time_context, num_video_frames, image_only_indicator, transformer_options)
if "transformer_index" in transformer_options:
transformer_options["transformer_index"] += 1
elif isinstance(layer, ReSpatialTransformer): # USED
x = layer(x, context, style_block.spatial_block, transformer_options,)
x = style_block(x, "spatial")
if "transformer_index" in transformer_options:
transformer_options["transformer_index"] += 1
elif isinstance(layer, Upsample):
x = layer(x, output_shape=output_shape)
x = style_block(x, "resample")
elif isinstance(layer, Downsample):
x = layer(x)
x = style_block(x, "resample")
else:
if "patches" in transformer_options and "forward_timestep_embed_patch" in transformer_options["patches"]:
found_patched = False
for class_type, handler in transformer_options["patches"]["forward_timestep_embed_patch"]:
if isinstance(layer, class_type):
x = handler(layer, x, emb, context, transformer_options, output_shape, time_context, num_video_frames, image_only_indicator)
found_patched = True
break
if found_patched:
continue
x = layer(x)
return x
class ReResBlock(TimestepBlock):
"""
A residual block that can optionally change the number of channels.
:param channels: the number of input channels.
:param emb_channels: the number of timestep embedding channels.
:param dropout: the rate of dropout.
:param out_channels: if specified, the number of out channels.
:param use_conv: if True and out_channels is specified, use a spatial
convolution instead of a smaller 1x1 convolution to change the
channels in the skip connection.
:param dims: determines if the signal is 1D, 2D, or 3D.
:param use_checkpoint: if True, use gradient checkpointing on this module.
:param up: if True, use this block for upsampling.
:param down: if True, use this block for downsampling.
"""
def __init__(
self,
channels,
emb_channels,
dropout,
out_channels=None,
use_conv=False,
use_scale_shift_norm=False,
dims=2,
use_checkpoint=False,
up=False,
down=False,
kernel_size=3,
exchange_temb_dims=False,
skip_t_emb=False,
dtype=None,
device=None,
operations=ops
):
super().__init__()
self.channels = channels
self.emb_channels = emb_channels
self.dropout = dropout
self.out_channels = out_channels or channels
self.use_conv = use_conv
self.use_checkpoint = use_checkpoint
self.use_scale_shift_norm = use_scale_shift_norm
self.exchange_temb_dims = exchange_temb_dims
if isinstance(kernel_size, list):
padding = [k // 2 for k in kernel_size]
else:
padding = kernel_size // 2
self.in_layers = nn.Sequential(
operations.GroupNorm(32, channels, dtype=dtype, device=device),
nn.SiLU(),
operations.conv_nd(dims, channels, self.out_channels, kernel_size, padding=padding, dtype=dtype, device=device),
)
self.updown = up or down
if up:
self.h_upd = Upsample(channels, False, dims, dtype=dtype, device=device)
self.x_upd = Upsample(channels, False, dims, dtype=dtype, device=device)
elif down:
self.h_upd = Downsample(channels, False, dims, dtype=dtype, device=device)
self.x_upd = Downsample(channels, False, dims, dtype=dtype, device=device)
else:
self.h_upd = self.x_upd = nn.Identity()
self.skip_t_emb = skip_t_emb
if self.skip_t_emb:
self.emb_layers = None
self.exchange_temb_dims = False
else:
self.emb_layers = nn.Sequential(
nn.SiLU(),
operations.Linear(
emb_channels,
2 * self.out_channels if use_scale_shift_norm else self.out_channels, dtype=dtype, device=device
),
)
self.out_layers = nn.Sequential(
operations.GroupNorm(32, self.out_channels, dtype=dtype, device=device),
nn.SiLU(),
nn.Dropout(p=dropout),
operations.conv_nd(dims, self.out_channels, self.out_channels, kernel_size, padding=padding, dtype=dtype, device=device)
,
)
if self.out_channels == channels:
self.skip_connection = nn.Identity()
elif use_conv:
self.skip_connection = operations.conv_nd(
dims, channels, self.out_channels, kernel_size, padding=padding, dtype=dtype, device=device
)
else:
self.skip_connection = operations.conv_nd(dims, channels, self.out_channels, 1, dtype=dtype, device=device)
def forward(self, x, emb, style_block=None):
"""
Apply the block to a Tensor, conditioned on a timestep embedding.
:param x: an [N x C x ...] Tensor of features.
:param emb: an [N x emb_channels] Tensor of timestep embeddings.
:return: an [N x C x ...] Tensor of outputs.
"""
return checkpoint(
self._forward, (x, emb, style_block), self.parameters(), self.use_checkpoint
)
def _forward(self, x, emb, style_block=None):
#if self.updown: # not used with sdxl?
# in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1]
# h = in_rest(x)
# h = self.h_upd(h)
# x = self.x_upd(x)
# h = in_conv(h)
#else:
# h = self.in_layers(x)
h = self.in_layers[0](x)
h = style_block(h, "in_norm")
h = self.in_layers[1](h)
h = style_block(h, "in_silu")
h = self.in_layers[2](h)
h = style_block(h, "in_conv")
emb_out = None
if not self.skip_t_emb:
#emb_out = self.emb_layers(emb).type(h.dtype)
emb_out = self.emb_layers[0](emb).type(h.dtype)
emb_out = style_block(emb_out, "emb_silu")
emb_out = self.emb_layers[1](emb_out)
emb_out = style_block(emb_out, "emb_linear")
while len(emb_out.shape) < len(h.shape):
emb_out = emb_out[..., None]
if self.use_scale_shift_norm: # not used with sdxl?
out_norm, out_rest = self.out_layers[0], self.out_layers[1:]
h = out_norm(h)
if emb_out is not None:
scale, shift = th.chunk(emb_out, 2, dim=1)
h *= (1 + scale)
h += shift
h = out_rest(h)
else:
if emb_out is not None:
if self.exchange_temb_dims:
emb_out = emb_out.movedim(1, 2)
h = h + emb_out
h = style_block(h, "emb_res")
#h = self.out_layers(h)
h = self.out_layers[0](h)
h = style_block(h, "out_norm")
h = self.out_layers[1](h)
h = style_block(h, "out_silu")
h = self.out_layers[3](h) # [2] is dropout
h = style_block(h, "out_conv")
res_out = self.skip_connection(x) + h
res_out = style_block(res_out, "residual")
return res_out
#return self.skip_connection(x) + h
class Timestep(nn.Module):
def __init__(self, dim):
super().__init__()
self.dim = dim
def forward(self, t):
return timestep_embedding(t, self.dim)
def apply_control(h, control, name):
if control is not None and name in control and len(control[name]) > 0:
ctrl = control[name].pop()
if ctrl is not None:
try:
h += ctrl
except:
logging.warning("warning control could not be applied {} {}".format(h.shape, ctrl.shape))
return h
class ReUNetModel(nn.Module):
"""
The full UNet model with attention and timestep embedding.
:param in_channels: channels in the input Tensor.
:param model_channels: base channel count for the model.
:param out_channels: channels in the output Tensor.
:param num_res_blocks: number of residual blocks per downsample.
:param dropout: the dropout probability.
:param channel_mult: channel multiplier for each level of the UNet.
:param conv_resample: if True, use learned convolutions for upsampling and
downsampling.
:param dims: determines if the signal is 1D, 2D, or 3D.
:param num_classes: if specified (as an int), then this model will be
class-conditional with `num_classes` classes.
:param use_checkpoint: use gradient checkpointing to reduce memory usage.
:param num_heads: the number of attention heads in each attention layer.
:param num_heads_channels: if specified, ignore num_heads and instead use
a fixed channel width per attention head.
:param num_heads_upsample: works with num_heads to set a different number
of heads for upsampling. Deprecated.
:param use_scale_shift_norm: use a FiLM-like conditioning mechanism.
:param resblock_updown: use residual blocks for up/downsampling.
:param use_new_attention_order: use a different attention pattern for potentially
increased efficiency.
"""
def __init__(
self,
image_size,
in_channels,
model_channels,
out_channels,
num_res_blocks,
dropout = 0,
channel_mult = (1, 2, 4, 8),
conv_resample = True,
dims = 2,
num_classes = None,
use_checkpoint = False,
dtype = th.float32,
num_heads = -1,
num_head_channels = -1,
num_heads_upsample = -1,
use_scale_shift_norm = False,
resblock_updown = False,
use_new_attention_order = False,
use_spatial_transformer = False, # custom transformer support
transformer_depth = 1, # custom transformer support
context_dim = None, # custom transformer support
n_embed = None, # custom support for prediction of discrete ids into codebook of first stage vq model
legacy = True,
disable_self_attentions = None,
num_attention_blocks = None,
disable_middle_self_attn = False,
use_linear_in_transformer = False,
adm_in_channels = None,
transformer_depth_middle = None,
transformer_depth_output = None,
use_temporal_resblock = False,
use_temporal_attention = False,
time_context_dim = None,
extra_ff_mix_layer = False,
use_spatial_context = False,
merge_strategy = None,
merge_factor = 0.0,
video_kernel_size = None,
disable_temporal_crossattention = False,
max_ddpm_temb_period = 10000,
attn_precision = None,
device = None,
operations = ops,
):
super().__init__()
if context_dim is not None:
assert use_spatial_transformer, 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...'
# from omegaconf.listconfig import ListConfig
# if type(context_dim) == ListConfig:
# context_dim = list(context_dim)
if num_heads_upsample == -1:
num_heads_upsample = num_heads
if num_heads == -1:
assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set'
if num_head_channels == -1:
assert num_heads != -1, 'Either num_heads or num_head_channels has to be set'
self.in_channels = in_channels
self.model_channels = model_channels
self.out_channels = out_channels
if isinstance(num_res_blocks, int):
self.num_res_blocks = len(channel_mult) * [num_res_blocks]
else:
if len(num_res_blocks) != len(channel_mult):
raise ValueError("provide num_res_blocks either as an int (globally constant) or "
"as a list/tuple (per-level) with the same length as channel_mult")
self.num_res_blocks = num_res_blocks
if disable_self_attentions is not None:
# should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not
assert len(disable_self_attentions) == len(channel_mult)
if num_attention_blocks is not None:
assert len(num_attention_blocks) == len(self.num_res_blocks)
transformer_depth = transformer_depth[:]
transformer_depth_output = transformer_depth_output[:]
self.dropout = dropout
self.channel_mult = channel_mult
self.conv_resample = conv_resample
self.num_classes = num_classes
self.use_checkpoint = use_checkpoint
self.dtype = dtype
self.num_heads = num_heads
self.num_head_channels = num_head_channels
self.num_heads_upsample = num_heads_upsample
self.use_temporal_resblocks = use_temporal_resblock
self.predict_codebook_ids = n_embed is not None
self.default_num_video_frames = None
time_embed_dim = model_channels * 4
self.time_embed = nn.Sequential(
operations.Linear(model_channels, time_embed_dim, dtype=self.dtype, device=device),
nn.SiLU(),
operations.Linear(time_embed_dim, time_embed_dim, dtype=self.dtype, device=device),
)
if self.num_classes is not None:
if isinstance(self.num_classes, int):
self.label_emb = nn.Embedding(num_classes, time_embed_dim, dtype=self.dtype, device=device)
elif self.num_classes == "continuous":
logging.debug("setting up linear c_adm embedding layer")
self.label_emb = nn.Linear(1, time_embed_dim)
elif self.num_classes == "sequential":
assert adm_in_channels is not None
self.label_emb = nn.Sequential(
nn.Sequential(
operations.Linear(adm_in_channels, time_embed_dim, dtype=self.dtype, device=device),
nn.SiLU(),
operations.Linear(time_embed_dim, time_embed_dim, dtype=self.dtype, device=device),
)
)
else:
raise ValueError()
self.input_blocks = nn.ModuleList(
[
TimestepEmbedSequential(
operations.conv_nd(dims, in_channels, model_channels, 3, padding=1, dtype=self.dtype, device=device)
)
]
)
self._feature_size = model_channels
input_block_chans = [model_channels]
ch = model_channels
ds = 1
def get_attention_layer(
ch,
num_heads,
dim_head,
depth=1,
context_dim=None,
use_checkpoint=False,
disable_self_attn=False,
):
if use_temporal_attention:
return SpatialVideoTransformer(
ch,
num_heads,
dim_head,
depth = depth,
context_dim = context_dim,
time_context_dim = time_context_dim,
dropout = dropout,
ff_in = extra_ff_mix_layer,
use_spatial_context = use_spatial_context,
merge_strategy = merge_strategy,
merge_factor = merge_factor,
checkpoint = use_checkpoint,
use_linear = use_linear_in_transformer,
disable_self_attn = disable_self_attn,
disable_temporal_crossattention = disable_temporal_crossattention,
max_time_embed_period = max_ddpm_temb_period,
attn_precision = attn_precision,
dtype=self.dtype, device=device, operations=operations,
)
else:
return SpatialTransformer(
ch, num_heads, dim_head, depth=depth, context_dim=context_dim,
disable_self_attn=disable_self_attn, use_linear=use_linear_in_transformer,
use_checkpoint=use_checkpoint, attn_precision=attn_precision, dtype=self.dtype, device=device, operations=operations
)
def get_resblock(
merge_factor,
merge_strategy,
video_kernel_size,
ch,
time_embed_dim,
dropout,
out_channels,
dims,
use_checkpoint,
use_scale_shift_norm,
down = False,
up = False,
dtype = None,
device = None,
operations = ops
):
if self.use_temporal_resblocks:
return VideoResBlock(
merge_factor = merge_factor,
merge_strategy = merge_strategy,
video_kernel_size = video_kernel_size,
channels = ch,
emb_channels = time_embed_dim,
dropout = dropout,
out_channels = out_channels,
dims = dims,
use_checkpoint = use_checkpoint,
use_scale_shift_norm = use_scale_shift_norm,
down = down,
up = up,
dtype=dtype, device=device, operations=operations,
)
else:
return ResBlock(
channels = ch,
emb_channels = time_embed_dim,
dropout = dropout,
out_channels = out_channels,
use_checkpoint = use_checkpoint,
dims = dims,
use_scale_shift_norm = use_scale_shift_norm,
down = down,
up = up,
dtype=dtype, device=device, operations=operations,
)
for level, mult in enumerate(channel_mult):
for nr in range(self.num_res_blocks[level]):
layers = [
get_resblock(
merge_factor = merge_factor,
merge_strategy = merge_strategy,
video_kernel_size = video_kernel_size,
ch = ch,
time_embed_dim = time_embed_dim,
dropout = dropout,
out_channels = mult * model_channels,
dims = dims,
use_checkpoint = use_checkpoint,
use_scale_shift_norm = use_scale_shift_norm,
dtype=self.dtype, device=device, operations=operations,
)
]
ch = mult * model_channels
num_transformers = transformer_depth.pop(0)
if num_transformers > 0:
if num_head_channels == -1:
dim_head = ch // num_heads
else:
num_heads = ch // num_head_channels
dim_head = num_head_channels
if legacy:
#num_heads = 1
dim_head = ch // num_heads if use_spatial_transformer else num_head_channels
if exists(disable_self_attentions):
disabled_sa = disable_self_attentions[level]
else:
disabled_sa = False
if not exists(num_attention_blocks) or nr < num_attention_blocks[level]:
layers.append(get_attention_layer(
ch, num_heads, dim_head, depth=num_transformers, context_dim=context_dim,
disable_self_attn=disabled_sa, use_checkpoint=use_checkpoint)
)
self.input_blocks.append(TimestepEmbedSequential(*layers))
self._feature_size += ch
input_block_chans.append(ch)
if level != len(channel_mult) - 1:
out_ch = ch
self.input_blocks.append(
TimestepEmbedSequential(
get_resblock(
merge_factor = merge_factor,
merge_strategy = merge_strategy,
video_kernel_size = video_kernel_size,
ch = ch,
time_embed_dim = time_embed_dim,
dropout = dropout,
out_channels = out_ch,
dims = dims,
use_checkpoint = use_checkpoint,
use_scale_shift_norm = use_scale_shift_norm,
down = True,
dtype=self.dtype, device=device, operations=operations,
)
if resblock_updown
else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch, dtype=self.dtype, device=device, operations=operations)
)
)
ch = out_ch
input_block_chans.append(ch)
ds *= 2
self._feature_size += ch
if num_head_channels == -1:
dim_head = ch // num_heads
else:
num_heads = ch // num_head_channels
dim_head = num_head_channels
if legacy:
#num_heads = 1
dim_head = ch // num_heads if use_spatial_transformer else num_head_channels
mid_block = [
get_resblock(
merge_factor = merge_factor,
merge_strategy = merge_strategy,
video_kernel_size = video_kernel_size,
ch = ch,
time_embed_dim = time_embed_dim,
dropout = dropout,
out_channels = None,
dims = dims,
use_checkpoint = use_checkpoint,
use_scale_shift_norm = use_scale_shift_norm,
dtype=self.dtype, device=device, operations=operations,
)]
self.middle_block = None
if transformer_depth_middle >= -1:
if transformer_depth_middle >= 0:
mid_block += [get_attention_layer( # always uses a self-attn
ch, num_heads, dim_head, depth=transformer_depth_middle, context_dim=context_dim,
disable_self_attn=disable_middle_self_attn, use_checkpoint=use_checkpoint
),
get_resblock(
merge_factor = merge_factor,
merge_strategy = merge_strategy,
video_kernel_size = video_kernel_size,
ch = ch,
time_embed_dim = time_embed_dim,
dropout = dropout,
out_channels = None,
dims = dims,
use_checkpoint = use_checkpoint,
use_scale_shift_norm = use_scale_shift_norm,
dtype=self.dtype, device=device, operations=operations,
)]
self.middle_block = TimestepEmbedSequential(*mid_block)
self._feature_size += ch
self.output_blocks = nn.ModuleList([])
for level, mult in list(enumerate(channel_mult))[::-1]:
for i in range(self.num_res_blocks[level] + 1):
ich = input_block_chans.pop()
layers = [
get_resblock(
merge_factor = merge_factor,
merge_strategy = merge_strategy,
video_kernel_size = video_kernel_size,
ch = ch + ich,
time_embed_dim = time_embed_dim,
dropout = dropout,
out_channels = model_channels * mult,
dims = dims,
use_checkpoint = use_checkpoint,
use_scale_shift_norm = use_scale_shift_norm,
dtype=self.dtype, device=device, operations=operations,
)
]
ch = model_channels * mult
num_transformers = transformer_depth_output.pop()
if num_transformers > 0:
if num_head_channels == -1:
dim_head = ch // num_heads
else:
num_heads = ch // num_head_channels
dim_head = num_head_channels
if legacy:
#num_heads = 1
dim_head = ch // num_heads if use_spatial_transformer else num_head_channels
if exists(disable_self_attentions):
disabled_sa = disable_self_attentions[level]
else:
disabled_sa = False
if not exists(num_attention_blocks) or i < num_attention_blocks[level]:
layers.append(
get_attention_layer(
ch, num_heads, dim_head, depth=num_transformers, context_dim=context_dim,
disable_self_attn=disabled_sa, use_checkpoint=use_checkpoint
)
)
if level and i == self.num_res_blocks[level]:
out_ch = ch
layers.append(
get_resblock(
merge_factor = merge_factor,
merge_strategy = merge_strategy,
video_kernel_size = video_kernel_size,
ch = ch,
time_embed_dim = time_embed_dim,
dropout = dropout,
out_channels = out_ch,
dims = dims,
use_checkpoint = use_checkpoint,
use_scale_shift_norm = use_scale_shift_norm,
up = True,
dtype=self.dtype, device=device, operations=operations,
)
if resblock_updown
else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch, dtype=self.dtype, device=device, operations=operations)
)
ds //= 2
self.output_blocks.append(TimestepEmbedSequential(*layers))
self._feature_size += ch
self.out = nn.Sequential(
operations.GroupNorm(32, ch, dtype=self.dtype, device=device),
nn.SiLU(),
operations.conv_nd(dims, model_channels, out_channels, 3, padding=1, dtype=self.dtype, device=device),
)
if self.predict_codebook_ids:
self.id_predictor = nn.Sequential(
operations.GroupNorm(32, ch, dtype=self.dtype, device=device),
operations.conv_nd(dims, model_channels, n_embed, 1, dtype=self.dtype, device=device),
#nn.LogSoftmax(dim=1) # change to cross_entropy and produce non-normalized logits
)
def forward(self, x, timesteps=None, context=None, y=None, control=None, transformer_options={}, **kwargs):
return comfy.patcher_extension.WrapperExecutor.new_class_executor(
self._forward,
self,
comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
).execute(x, timesteps, context, y, control, transformer_options, **kwargs)
def _forward(self, x, timesteps=None, context=None, y=None, control=None, transformer_options={}, **kwargs):
"""
Apply the model to an input batch.
:param x: an [N x C x ...] Tensor of inputs.
:param timesteps: a 1-D batch of timesteps.
:param context: conditioning plugged in via crossattn
:param y: an [N] Tensor of labels, if class-conditional.
:return: an [N x C x ...] Tensor of outputs.
"""
h_len, w_len = x.shape[-2:]
img_len = h_len * w_len
transformer_options["original_shape"] = list(x.shape)
transformer_options["transformer_index"] = 0
transformer_patches = transformer_options.get("patches", {})
SIGMA = transformer_options['sigmas'].to(x) # timestep[0].unsqueeze(0) #/ 1000
img_slice = slice(None, -1) #slice(None, img_len) # for the sake of cross attn... :-1
txt_slice = slice(None, -1)
EO = transformer_options.get("ExtraOptions", ExtraOptions(""))
if EO is not None:
EO.mute = True
if EO("zero_heads"):
HEADS = 0
else:
HEADS = 10 # self.input_blocks[4][1].transformer_blocks[0].attn2.heads # HEADS = 10
StyleMMDiT = transformer_options.get('StyleMMDiT', StyleUNet_Model())
StyleMMDiT.set_len(h_len, w_len, img_slice, txt_slice, HEADS=HEADS)
StyleMMDiT.Retrojector = self.Retrojector if hasattr(self, "Retrojector") else None
transformer_options['StyleMMDiT'] = None
x_tmp = transformer_options.get("x_tmp")
if x_tmp is not None:
x_tmp = x_tmp.clone() / ((SIGMA ** 2 + 1) ** 0.5)
x_tmp = x_tmp.expand_as(x) # (x.shape[0], -1, -1, -1) # .clone().to(x)
y0_style, img_y0_style = None, None
x_orig, timesteps_orig, y_orig, context_orig = clone_inputs(x, timesteps, y, context)
h_orig = x_orig.clone()
weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0)
floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0)
#floor = min(floor, weight)
mask_zero, mask_up_zero, mask_down_zero, mask_down2_zero = None, None, None, None
txt_len = context.shape[1] # mask_obj[0].text_len
z_ = transformer_options.get("z_") # initial noise and/or image+noise from start of rk_sampler_beta()
rk_row = transformer_options.get("row") # for "smart noise"
if z_ is not None:
x_init = z_[rk_row].to(x)
elif 'x_init' in transformer_options:
x_init = transformer_options.get('x_init').to(x)
# recon loop to extract exact noise pred for scattersort guide assembly
RECON_MODE = StyleMMDiT.noise_mode == "recon"
recon_iterations = 2 if StyleMMDiT.noise_mode == "recon" else 1
for recon_iter in range(recon_iterations):
y0_style = StyleMMDiT.guides
y0_style_active = True if type(y0_style) == torch.Tensor else False
RECON_MODE = True if StyleMMDiT.noise_mode == "recon" and recon_iter == 0 else False
ISIGMA = SIGMA
if StyleMMDiT.noise_mode == "recon" and recon_iter == 1:
ISIGMA = SIGMA * EO("ISIGMA_FACTOR", 1.0)
model_sampling = transformer_options.get('model_sampling')
timesteps_orig = model_sampling.timestep(ISIGMA).expand_as(timesteps_orig)
x_recon = x_tmp if x_tmp is not None else x_orig
#noise_prediction = x_recon + (1-SIGMA.to(x_recon)) * eps.to(x_recon)
noise_prediction = eps.to(x_recon)
denoised = x_recon * ((SIGMA.to(x_recon) ** 2 + 1) ** 0.5) - SIGMA.to(x_recon) * eps.to(x_recon)
denoised = StyleMMDiT.apply_recon_lure(denoised, y0_style.to(x_recon)) # .to(denoised)
new_x = (denoised + ISIGMA.to(x_recon) * noise_prediction) / ((ISIGMA.to(x_recon) ** 2 + 1) ** 0.5)
h_orig = new_x.clone().to(x)
x_init = noise_prediction
elif StyleMMDiT.noise_mode == "bonanza":
x_init = torch.randn_like(x_init)
if y0_style_active:
if y0_style.sum() == 0.0 and y0_style.std() == 0.0:
y0_style_noised = x.clone()
else:
y0_style_noised = (y0_style + ISIGMA.to(y0_style) * x_init.expand_as(x).to(y0_style)) / ((ISIGMA.to(y0_style) ** 2 + 1) ** 0.5) #x_init.expand(x.shape[0],-1,-1,-1).to(y0_style))
out_list = []
for cond_iter in range(len(transformer_options['cond_or_uncond'])):
UNCOND = transformer_options['cond_or_uncond'][cond_iter] == 1
bsz_style = y0_style.shape[0] if y0_style_active else 0
bsz = 1 if RECON_MODE else bsz_style + 1
h, timesteps, context = clone_inputs(h_orig[cond_iter].unsqueeze(0), timesteps_orig[cond_iter].unsqueeze(0), context_orig[cond_iter].unsqueeze(0))
y = y_orig[cond_iter].unsqueeze(0).clone() if y_orig is not None else None
mask, mask_up, mask_down, mask_down2 = None, None, None, None
if not UNCOND and 'AttnMask' in transformer_options: # and weight != 0:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
mask_up = transformer_options['AttnMask'].mask_up.to('cuda')
mask_down = transformer_options['AttnMask'].mask_down.to('cuda')
if hasattr(transformer_options['AttnMask'], "mask_down2"):
mask_down2 = transformer_options['AttnMask'].mask_down2.to('cuda')
if weight == 0:
context = transformer_options['RegContext'].context.to(context.dtype).to(context.device)
mask, mask_up, mask_down, mask_down2 = None, None, None, None
else:
context = transformer_options['RegContext'].context.to(context.dtype).to(context.device)
txt_len = context.shape[1]
if mask_zero is None:
mask_zero = torch.ones_like(mask)
mask_zero[:, :txt_len] = mask[:, :txt_len]
if mask_up_zero is None:
mask_up_zero = torch.ones_like(mask_up)
mask_up_zero[:, :txt_len] = mask_up[:, :txt_len]
if mask_down_zero is None:
mask_down_zero = torch.ones_like(mask_down)
mask_down_zero[:, :txt_len] = mask_down[:, :txt_len]
if mask_down2_zero is None and mask_down2 is not None:
mask_down2_zero = torch.ones_like(mask_down2)
mask_down2_zero[:, :txt_len] = mask_down2[:, :txt_len]
if UNCOND and 'AttnMask_neg' in transformer_options: # and weight != 0:
AttnMask = transformer_options['AttnMask_neg']
mask = transformer_options['AttnMask_neg'].attn_mask.mask.to('cuda')
mask_up = transformer_options['AttnMask_neg'].mask_up.to('cuda')
mask_down = transformer_options['AttnMask_neg'].mask_down.to('cuda')
if hasattr(transformer_options['AttnMask_neg'], "mask_down2"):
mask_down2 = transformer_options['AttnMask_neg'].mask_down2.to('cuda')
if weight == 0:
context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device)
mask, mask_up, mask_down, mask_down2 = None, None, None, None
else:
context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device)
txt_len = context.shape[1]
if mask_zero is None:
mask_zero = torch.ones_like(mask)
mask_zero[:, :txt_len] = mask[:, :txt_len]
if mask_up_zero is None:
mask_up_zero = torch.ones_like(mask_up)
mask_up_zero[:, :txt_len] = mask_up[:, :txt_len]
if mask_down_zero is None:
mask_down_zero = torch.ones_like(mask_down)
mask_down_zero[:, :txt_len] = mask_down[:, :txt_len]
if mask_down2_zero is None and mask_down2 is not None:
mask_down2_zero = torch.ones_like(mask_down2)
mask_down2_zero[:, :txt_len] = mask_down2[:, :txt_len]
elif UNCOND and 'AttnMask' in transformer_options:
AttnMask = transformer_options['AttnMask']
mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda')
mask_up = transformer_options['AttnMask'].mask_up.to('cuda')
mask_down = transformer_options['AttnMask'].mask_down.to('cuda')
if hasattr(transformer_options['AttnMask'], "mask_down2"):
mask_down2 = transformer_options['AttnMask'].mask_down2.to('cuda')
A = context
B = transformer_options['RegContext'].context
context = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :]
txt_len = context.shape[1]
if mask_zero is None:
mask_zero = torch.ones_like(mask)
mask_zero[:, :txt_len] = mask[:, :txt_len]
if mask_up_zero is None:
mask_up_zero = torch.ones_like(mask_up)
mask_up_zero[:, :txt_len] = mask_up[:, :txt_len]
if mask_down_zero is None:
mask_down_zero = torch.ones_like(mask_down)
mask_down_zero[:, :txt_len] = mask_down[:, :txt_len]
if mask_down2_zero is None and mask_down2 is not None:
mask_down2_zero = torch.ones_like(mask_down2)
mask_down2_zero[:, :txt_len] = mask_down2[:, :txt_len]
if weight == 0: # ADDED 5/23/2025
mask, mask_up, mask_down, mask_down2 = None, None, None, None
if mask is not None:
if mask is not None and not type(mask[0][0] .item()) == bool:
mask = mask .to(x.dtype)
if mask_up is not None and not type(mask_up[0][0] .item()) == bool:
mask_up = mask_up .to(x.dtype)
if mask_down is not None and not type(mask_down[0][0] .item()) == bool:
mask_down = mask_down .to(x.dtype)
if mask_down2 is not None and not type(mask_down2[0][0] .item()) == bool:
mask_down2 = mask_down2 .to(x.dtype)
if mask_zero is not None and not type(mask_zero[0][0] .item()) == bool:
mask_zero = mask_zero .to(x.dtype)
if mask_up_zero is not None and not type(mask_up_zero[0][0] .item()) == bool:
mask_up_zero = mask_up_zero .to(x.dtype)
if mask_down_zero is not None and not type(mask_down_zero[0][0] .item()) == bool:
mask_down_zero = mask_down_zero .to(x.dtype)
if mask_down2_zero is not None and not type(mask_down2_zero[0][0].item()) == bool:
mask_down2_zero = mask_down2_zero.to(x.dtype)
transformer_options['cross_mask'] = mask [:,:txt_len]
transformer_options['self_mask'] = mask [:,txt_len:]
transformer_options['cross_mask_up'] = mask_up [:,:txt_len]
transformer_options['self_mask_up'] = mask_up [:,txt_len:]
transformer_options['cross_mask_down'] = mask_down [:,:txt_len]
transformer_options['self_mask_down'] = mask_down [:,txt_len:]
transformer_options['cross_mask_down2'] = mask_down2[:,:txt_len] if mask_down2 is not None else None
transformer_options['self_mask_down2'] = mask_down2[:,txt_len:] if mask_down2 is not None else None
#h = x
if y0_style_active and not RECON_MODE:
if mask is None:
context, y, _ = StyleMMDiT.apply_style_conditioning(
UNCOND = UNCOND,
base_context = context,
base_y = y,
base_llama3 = None,
)
else:
context = context.repeat(bsz_style + 1, 1, 1)
y = y.repeat(bsz_style + 1, 1) if y is not None else None
h = torch.cat([h, y0_style_noised[cond_iter:cond_iter+1]], dim=0).to(h)
total_layers = len(self.input_blocks) + len(self.middle_block) + len(self.output_blocks)
num_video_frames = kwargs.get("num_video_frames", self.default_num_video_frames)
image_only_indicator = kwargs.get("image_only_indicator", None)
time_context = kwargs.get("time_context", None)
assert (y is not None) == (
self.num_classes is not None
), "must specify y if and only if the model is class-conditional"
hs, hs_adain = [], []
t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False).to(x.dtype)
emb = self.time_embed(t_emb)
if "emb_patch" in transformer_patches:
patch = transformer_patches["emb_patch"]
for p in patch:
emb = p(emb, self.model_channels, transformer_options)
if self.num_classes is not None:
assert y.shape[0] == h.shape[0]
emb = emb + self.label_emb(y)
#for id, module in enumerate(self.input_blocks):
for id, (module, style_block) in enumerate(zip(self.input_blocks, StyleMMDiT.input_blocks)):
transformer_options["block"] = ("input", id)
if mask is not None:
transformer_options['cross_mask'] = mask [:,:txt_len]
transformer_options['self_mask'] = mask [:,txt_len:]
transformer_options['cross_mask_up'] = mask_up [:,:txt_len]
transformer_options['self_mask_up'] = mask_up [:,txt_len:]
transformer_options['cross_mask_down'] = mask_down [:,:txt_len]
transformer_options['self_mask_down'] = mask_down [:,txt_len:]
transformer_options['cross_mask_down2'] = mask_down2[:,:txt_len] if mask_down2 is not None else None
transformer_options['self_mask_down2'] = mask_down2[:,txt_len:] if mask_down2 is not None else None
if weight > 0 and mask is not None and weight < id/total_layers:
transformer_options['cross_mask'] = None
transformer_options['self_mask'] = None
elif weight < 0 and mask is not None and abs(weight) < (1 - id/total_layers):
transformer_options['cross_mask'] = None
transformer_options['self_mask'] = None
elif floor > 0 and mask is not None and floor > id/total_layers:
transformer_options['cross_mask'] = mask_zero [:,:txt_len]
transformer_options['self_mask'] = mask_zero [:,txt_len:]
transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len]
transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:]
transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len]
transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:]
transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None
transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None
elif floor < 0 and mask is not None and abs(floor) > (1 - id/total_layers):
transformer_options['cross_mask'] = mask_zero [:,:txt_len]
transformer_options['self_mask'] = mask_zero [:,txt_len:]
transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len]
transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:]
transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len]
transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:]
transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None
transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None
h = forward_timestep_embed(module, h, emb, context, transformer_options, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator, style_block=style_block)
if id == 0:
h = StyleMMDiT(h, "proj_in")
h = apply_control(h, control, 'input')
if "input_block_patch" in transformer_patches:
patch = transformer_patches["input_block_patch"]
for p in patch:
h = p(h, transformer_options)
hs.append(h)
if "input_block_patch_after_skip" in transformer_patches:
patch = transformer_patches["input_block_patch_after_skip"]
for p in patch:
h = p(h, transformer_options)
transformer_options["block"] = ("middle", 0)
if self.middle_block is not None:
style_block = StyleMMDiT.middle_blocks[0]
if mask is not None:
transformer_options['cross_mask'] = mask [:,:txt_len]
transformer_options['self_mask'] = mask [:,txt_len:]
transformer_options['cross_mask_up'] = mask_up [:,:txt_len]
transformer_options['self_mask_up'] = mask_up [:,txt_len:]
transformer_options['cross_mask_down'] = mask_down [:,:txt_len]
transformer_options['self_mask_down'] = mask_down [:,txt_len:]
transformer_options['cross_mask_down2'] = mask_down2[:,:txt_len] if mask_down2 is not None else None
transformer_options['self_mask_down2'] = mask_down2[:,txt_len:] if mask_down2 is not None else None
if weight > 0 and mask is not None and weight < (len(self.input_blocks) + 1)/total_layers:
transformer_options['cross_mask'] = None
transformer_options['self_mask'] = None
elif weight < 0 and mask is not None and abs(weight) < (1 - (len(self.input_blocks) + 1)/total_layers):
transformer_options['cross_mask'] = None
transformer_options['self_mask'] = None
elif floor > 0 and mask is not None and floor > (len(self.input_blocks) + 1)/total_layers:
transformer_options['cross_mask'] = mask_zero [:,:txt_len]
transformer_options['self_mask'] = mask_zero [:,txt_len:]
transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len]
transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:]
transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len]
transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:]
transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None
transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None
elif floor < 0 and mask is not None and abs(floor) > (1 - (len(self.input_blocks) + 1)/total_layers):
transformer_options['cross_mask'] = mask_zero [:,:txt_len]
transformer_options['self_mask'] = mask_zero [:,txt_len:]
transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len]
transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:]
transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len]
transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:]
transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None
transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None
h = forward_timestep_embed(self.middle_block, h, emb, context, transformer_options, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator, style_block=style_block)
h = apply_control(h, control, 'middle')
#for id, module in enumerate(self.output_blocks):
for id, (module, style_block) in enumerate(zip(self.output_blocks, StyleMMDiT.output_blocks)):
transformer_options["block"] = ("output", id)
hsp = hs.pop()
hsp = apply_control(hsp, control, 'output')
if "output_block_patch" in transformer_patches:
patch = transformer_patches["output_block_patch"]
for p in patch:
h, hsp = p(h, hsp, transformer_options)
h = th.cat([h, hsp], dim=1)
del hsp
if len(hs) > 0:
output_shape = hs[-1].shape
else:
output_shape = None
if mask is not None:
transformer_options['cross_mask'] = mask [:,:txt_len]
transformer_options['self_mask'] = mask [:,txt_len:]
transformer_options['cross_mask_up'] = mask_up [:,:txt_len]
transformer_options['self_mask_up'] = mask_up [:,txt_len:]
transformer_options['cross_mask_down'] = mask_down [:,:txt_len]
transformer_options['self_mask_down'] = mask_down [:,txt_len:]
transformer_options['cross_mask_down2'] = mask_down2[:,:txt_len] if mask_down2 is not None else None
transformer_options['self_mask_down2'] = mask_down2[:,txt_len:] if mask_down2 is not None else None
if weight > 0 and mask is not None and weight < (len(self.input_blocks) + 1 + id)/total_layers:
transformer_options['cross_mask'] = None
transformer_options['self_mask'] = None
elif weight < 0 and mask is not None and abs(weight) < (1 - (len(self.input_blocks) + 1 + id)/total_layers):
transformer_options['cross_mask'] = None
transformer_options['self_mask'] = None
elif floor > 0 and mask is not None and floor > (len(self.input_blocks) + 1 + id)/total_layers:
transformer_options['cross_mask'] = mask_zero [:,:txt_len]
transformer_options['self_mask'] = mask_zero [:,txt_len:]
transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len]
transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:]
transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len]
transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:]
transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None
transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None
elif floor < 0 and mask is not None and abs(floor) > (1 - (len(self.input_blocks) + 1 + id)/total_layers):
transformer_options['cross_mask'] = mask_zero [:,:txt_len]
transformer_options['self_mask'] = mask_zero [:,txt_len:]
transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len]
transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:]
transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len]
transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:]
transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None
transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None
h = forward_timestep_embed(module, h, emb, context, transformer_options, output_shape, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator, style_block=style_block)
h = h.type(x.dtype)
if self.predict_codebook_ids:
eps = self.id_predictor(h)
else:
eps = self.out(h)
eps = StyleMMDiT(eps, "proj_out")
out_list.append(eps[0:1])
eps = torch.stack(out_list, dim=0).squeeze(dim=1)
if recon_iter == 1:
denoised = new_x * ((ISIGMA ** 2 + 1) ** 0.5) - ISIGMA.to(new_x) * eps.to(new_x)
if x_tmp is not None:
eps = (x_tmp * ((SIGMA ** 2 + 1) ** 0.5) - denoised.to(x_tmp)) / SIGMA.to(x_tmp)
else:
eps = (x_orig * ((SIGMA ** 2 + 1) ** 0.5) - denoised.to(x_orig)) / SIGMA.to(x_orig)
y0_style_pos = transformer_options.get("y0_style_pos")
y0_style_neg = transformer_options.get("y0_style_neg")
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight", 0.0)
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight", 0.0)
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight", 0.0)
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight", 0.0)
y0_style_neg_synweight *= y0_style_neg_weight
freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method")
freqsep_sigma = transformer_options.get("freqsep_sigma")
freqsep_kernel_size = transformer_options.get("freqsep_kernel_size")
freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size")
freqsep_stride = transformer_options.get("freqsep_stride")
freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight")
freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight")
freqsep_mask = transformer_options.get("freqsep_mask")
dtype = eps.dtype if self.style_dtype is None else self.style_dtype
h_len //= self.Retrojector.patch_size
w_len //= self.Retrojector.patch_size
if y0_style_pos is not None:
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight")
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight")
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_pos_mask = transformer_options.get("y0_style_pos_mask")
y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge")
y0_style_pos = y0_style_pos.to(dtype)
#x = x.to(dtype)
x = x_orig.clone().to(torch.float64) * ((SIGMA ** 2 + 1) ** 0.5)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised) # 2,4,96,168 -> 2,16128,320
y0_adain_embed = self.Retrojector.embed(y0_style_pos)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if EO("scattersort_median_LP"):
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7))
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7))
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad)
denoised_spatial = denoised_spatial_LP + denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
if freqsep_mask is not None:
freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float()
freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact')
if hasattr(self, "adain_tile"):
tile_h, tile_w = self.adain_tile
denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if self.adain_flag:
h_off = tile_h // 2
w_off = tile_w // 2
denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off]
self.adain_flag = False
else:
h_off = 0
w_off = 0
self.adain_flag = True
tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w))
y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w))
tiles_out = []
for i in range(tiles.shape[0]):
tile = tiles[i].unsqueeze(0)
y0_tile = y0_tiles[i].unsqueeze(0)
tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
tile = adain_seq_inplace(tile, y0_tile)
tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w))
tiles_out_tensor = torch.cat(tiles_out, dim=0)
tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides)
if h_off == 0:
denoised_pretile = tiles_out_tensor
else:
denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor
denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"):
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median_pw":
denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight)
elif freqsep_lowpass_method == "gaussian_pw":
denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median":
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size)
elif freqsep_lowpass_method == "gaussian":
denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
if EO("adain_fs_uhp"):
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP
y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP
#denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP
denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP
denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
else:
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
if transformer_options.get('y0_standard_guide') is not None:
y0_standard_guide = transformer_options.get('y0_standard_guide')
y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide)
f_cs = self.StyleWCT.get(y0_standard_guide_embed)
self.y0_standard_guide = self.Retrojector.unembed(f_cs)
if transformer_options.get('y0_inv_standard_guide') is not None:
y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide')
y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide)
f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed)
self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs)
denoised_approx = self.Retrojector.unembed(denoised_embed)
eps = (x - denoised_approx) / sigma
if not UNCOND:
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
else:
eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
elif eps.shape[0] == 1 and UNCOND:
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
if y0_style_neg is not None:
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight")
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight")
y0_style_neg_synweight *= y0_style_neg_weight
y0_style_neg_mask = transformer_options.get("y0_style_neg_mask")
y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge")
y0_style_neg = y0_style_neg.to(dtype)
#x = x.to(dtype)
x = x_orig.clone().to(torch.float64) * ((SIGMA ** 2 + 1) ** 0.5)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised)
y0_adain_embed = self.Retrojector.embed(y0_style_neg)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
denoised_approx = self.Retrojector.unembed(denoised_embed)
if UNCOND:
eps = (x - denoised_approx) / sigma
eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
elif eps.shape[0] == 1 and not UNCOND:
eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
return eps
def clone_inputs_unsafe(*args, index: int=None):
if index is None:
return tuple(x.clone() for x in args)
else:
return tuple(x[index].unsqueeze(0).clone() for x in args)
def clone_inputs(*args, index: int = None):
if index is None:
return tuple(x.clone() if x is not None else None for x in args)
else:
return tuple(x[index].unsqueeze(0).clone() if x is not None else None for x in args)
================================================
FILE: sd35/mmdit.py
================================================
from functools import partial
from typing import Dict, Optional, List
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import copy
from comfy.ldm.modules.attention import optimized_attention
from comfy.ldm.modules.attention import attention_pytorch #as optimized_attention
from einops import rearrange, repeat
from comfy.ldm.modules.diffusionmodules.util import timestep_embedding
import comfy.ops
import comfy.ldm.common_dit
from ..helper import ExtraOptions
from ..latents import tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d
from ..style_transfer import apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch
#from .attention import optimized_attention
#from .util import timestep_embedding
#import ops
#import common_dit
def default(x, y):
if x is not None:
return x
return y
class Mlp(nn.Module):
""" MLP as used in Vision Transformer, MLP-Mixer and related networks
"""
def __init__(
self,
in_features,
hidden_features = None,
out_features = None,
act_layer = nn.GELU,
norm_layer = None,
bias = True,
drop = 0.,
use_conv = False,
dtype = None,
device = None,
operations = None,
):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
drop_probs = drop
linear_layer = partial(operations.Conv2d, kernel_size=1) if use_conv else operations.Linear
self.fc1 = linear_layer(in_features, hidden_features, bias =bias, dtype=dtype, device=device)
self.act = act_layer()
self.drop1 = nn.Dropout(drop_probs)
self.norm = norm_layer(hidden_features) if norm_layer is not None else nn.Identity()
self.fc2 = linear_layer(hidden_features, out_features, bias=bias, dtype=dtype, device=device)
self.drop2 = nn.Dropout(drop_probs)
def forward(self, x):
x = self.fc1 (x)
x = self.act (x)
x = self.drop1(x)
x = self.norm (x)
x = self.fc2 (x)
x = self.drop2(x)
return x
class PatchEmbed(nn.Module):
""" 2D Image to Patch Embedding
"""
dynamic_img_pad: torch.jit.Final[bool]
def __init__(
self,
img_size : Optional[int] = 224,
patch_size : int = 16,
in_chans : int = 3,
embed_dim : int = 768,
norm_layer = None,
flatten : bool = True,
bias : bool = True,
strict_img_size : bool = True,
dynamic_img_pad : bool = True,
padding_mode ='circular',
conv3d = False,
dtype = None,
device = None,
operations = None,
):
super().__init__()
try:
len(patch_size)
self.patch_size = patch_size
except:
if conv3d:
self.patch_size = (patch_size, patch_size, patch_size)
else:
self.patch_size = (patch_size, patch_size)
self.padding_mode = padding_mode
# flatten spatial dim and transpose to channels last, kept for bwd compat
self.flatten = flatten
self.strict_img_size = strict_img_size
self.dynamic_img_pad = dynamic_img_pad
if conv3d:
self.proj = operations.Conv3d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias=bias, dtype=dtype, device=device)
else:
self.proj = operations.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias=bias, dtype=dtype, device=device)
self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
def forward(self, x):
if self.dynamic_img_pad:
x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size, padding_mode=self.padding_mode)
x = self.proj(x)
if self.flatten:
x = x.flatten(2).transpose(1, 2) # NCHW -> NLC
x = self.norm(x)
return x
def modulate(x, shift, scale):
if shift is None:
shift = torch.zeros_like(scale)
return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
#################################################################################
# Sine/Cosine Positional Embedding Functions #
#################################################################################
def get_2d_sincos_pos_embed(
embed_dim,
grid_size,
cls_token = False,
extra_tokens = 0,
scaling_factor = None,
offset = None,
):
"""
grid_size: int of the grid height and width
return:
pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
"""
grid_h = np.arange(grid_size, dtype=np.float32)
grid_w = np.arange(grid_size, dtype=np.float32)
grid = np.meshgrid(grid_w, grid_h) # here w goes first
grid = np.stack(grid, axis=0)
if scaling_factor is not None:
grid = grid / scaling_factor
if offset is not None:
grid = grid - offset
grid = grid.reshape([2, 1, grid_size, grid_size])
pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
if cls_token and extra_tokens > 0:
pos_embed = np.concatenate(
[np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0
)
return pos_embed
def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
assert embed_dim % 2 == 0
# use half of dimensions to encode grid_h
emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2)
emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2)
emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
return emb
def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
"""
embed_dim: output dimension for each position
pos: a list of positions to be encoded: size (M,)
out: (M, D)
"""
assert embed_dim % 2 == 0
omega = np.arange(embed_dim // 2, dtype=np.float64)
omega /= embed_dim / 2.0
omega = 1.0 / 10000**omega # (D/2,)
pos = pos.reshape(-1) # (M,)
out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product
emb_sin = np.sin(out) # (M, D/2)
emb_cos = np.cos(out) # (M, D/2)
emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D)
return emb
def get_1d_sincos_pos_embed_from_grid_torch(embed_dim, pos, device=None, dtype=torch.float32):
omega = torch.arange(embed_dim // 2, device=device, dtype=dtype)
omega /= embed_dim / 2.0
omega = 1.0 / 10000**omega # (D/2,)
pos = pos.reshape(-1) # (M,)
out = torch.einsum("m,d->md", pos, omega) # (M, D/2), outer product
emb_sin = torch.sin(out) # (M, D/2)
emb_cos = torch.cos(out) # (M, D/2)
emb = torch.cat([emb_sin, emb_cos], dim=1) # (M, D)
return emb
def get_2d_sincos_pos_embed_torch(embed_dim, w, h, val_center=7.5, val_magnitude=7.5, device=None, dtype=torch.float32):
small = min(h, w)
val_h = (h / small) * val_magnitude
val_w = (w / small) * val_magnitude
grid_h, grid_w = torch.meshgrid(torch.linspace(-val_h + val_center, val_h + val_center, h, device=device, dtype=dtype), torch.linspace(-val_w + val_center, val_w + val_center, w, device=device, dtype=dtype), indexing='ij')
emb_h = get_1d_sincos_pos_embed_from_grid_torch(embed_dim // 2, grid_h, device=device, dtype=dtype)
emb_w = get_1d_sincos_pos_embed_from_grid_torch(embed_dim // 2, grid_w, device=device, dtype=dtype)
emb = torch.cat([emb_w, emb_h], dim=1) # (H*W, D)
return emb
#################################################################################
# Embedding Layers for Timesteps and Class Labels #
#################################################################################
class TimestepEmbedder(nn.Module):
"""
Embeds scalar timesteps into vector representations.
"""
def __init__(self, hidden_size, frequency_embedding_size=256, dtype=None, device=None, operations=None):
super().__init__()
self.mlp = nn.Sequential(
operations.Linear(frequency_embedding_size, hidden_size, bias=True, dtype=dtype, device=device),
nn.SiLU(),
operations.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device=device),
)
self.frequency_embedding_size = frequency_embedding_size
def forward(self, t, dtype, **kwargs):
t_freq = timestep_embedding(t, self.frequency_embedding_size).to(dtype)
t_emb = self.mlp(t_freq)
return t_emb
class VectorEmbedder(nn.Module):
"""
Embeds a flat vector of dimension input_dim
"""
def __init__(self, input_dim: int, hidden_size: int, dtype=None, device=None, operations=None):
super().__init__()
self.mlp = nn.Sequential(
operations.Linear(input_dim, hidden_size, bias=True, dtype=dtype, device=device),
nn.SiLU(),
operations.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device=device),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
emb = self.mlp(x)
return emb
#################################################################################
# Core DiT Model #
#################################################################################
def split_qkv(qkv, head_dim):
qkv = qkv.reshape(qkv.shape[0], qkv.shape[1], 3, -1, head_dim).movedim(2, 0)
return qkv[0], qkv[1], qkv[2]
class SelfAttention(nn.Module):
ATTENTION_MODES = ("xformers", "torch", "torch-hb", "math", "debug")
def __init__(
self,
dim : int,
num_heads : int = 8,
qkv_bias : bool = False,
qk_scale : Optional[float] = None,
proj_drop : float = 0.0,
attn_mode : str = "xformers",
pre_only : bool = False,
qk_norm : Optional[str] = None,
rmsnorm : bool = False,
dtype = None,
device = None,
operations = None,
):
super().__init__()
self.num_heads = num_heads
self.head_dim = dim // num_heads
self.qkv = operations.Linear(dim, dim * 3, bias=qkv_bias, dtype=dtype, device=device)
if not pre_only:
self.proj = operations.Linear(dim, dim, dtype=dtype, device=device)
self.proj_drop = nn.Dropout(proj_drop)
assert attn_mode in self.ATTENTION_MODES
self.attn_mode = attn_mode
self.pre_only = pre_only
if qk_norm == "rms":
self.ln_q = RMSNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device)
self.ln_k = RMSNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device)
elif qk_norm == "ln":
self.ln_q = operations.LayerNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device)
self.ln_k = operations.LayerNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device)
elif qk_norm is None:
self.ln_q = nn.Identity()
self.ln_k = nn.Identity()
else:
raise ValueError(qk_norm)
def pre_attention(self, x: torch.Tensor) -> torch.Tensor:
B, L, C = x.shape
qkv = self.qkv(x)
q, k, v = split_qkv(qkv, self.head_dim)
q = self.ln_q(q).reshape(q.shape[0], q.shape[1], -1)
k = self.ln_k(k).reshape(q.shape[0], q.shape[1], -1)
return (q, k, v)
def post_attention(self, x: torch.Tensor) -> torch.Tensor:
assert not self.pre_only
x = self.proj (x)
x = self.proj_drop(x)
return x
def forward(self, x: torch.Tensor) -> torch.Tensor:
q, k, v = self.pre_attention(x)
x = optimized_attention(
q, k, v, heads=self.num_heads
)
x = self.post_attention(x)
return x
class RMSNorm(torch.nn.Module):
def __init__(
self, dim: int, elementwise_affine: bool = False, eps: float = 1e-6, device=None, dtype=None
):
"""
Initialize the RMSNorm normalization layer.
Args:
dim (int): The dimension of the input tensor.
eps (float, optional): A small value added to the denominator for numerical stability. Default is 1e-6.
Attributes:
eps (float): A small value added to the denominator for numerical stability.
weight (nn.Parameter): Learnable scaling parameter.
"""
super().__init__()
self.eps = eps
self.learnable_scale = elementwise_affine
if self.learnable_scale:
self.weight = nn.Parameter(torch.empty(dim, device=device, dtype=dtype))
else:
self.register_parameter("weight", None)
def forward(self, x):
return comfy.ldm.common_dit.rms_norm(x, self.weight, self.eps)
class SwiGLUFeedForward(nn.Module):
def __init__(
self,
dim : int,
hidden_dim : int,
multiple_of : int,
ffn_dim_multiplier : Optional[float] = None,
):
"""
Initialize the FeedForward module.
Args:
dim (int): Input dimension.
hidden_dim (int): Hidden dimension of the feedforward layer.
multiple_of (int): Value to ensure hidden dimension is a multiple of this value.
ffn_dim_multiplier (float, optional): Custom multiplier for hidden dimension. Defaults to None.
Attributes:
w1 (ColumnParallelLinear): Linear transformation for the first layer.
w2 (RowParallelLinear): Linear transformation for the second layer.
w3 (ColumnParallelLinear): Linear transformation for the third layer.
"""
super().__init__()
hidden_dim = int(2 * hidden_dim / 3)
# custom dim factor multiplier
if ffn_dim_multiplier is not None:
hidden_dim = int(ffn_dim_multiplier * hidden_dim)
hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
self.w1 = nn.Linear(dim, hidden_dim, bias=False)
self.w2 = nn.Linear(hidden_dim, dim, bias=False)
self.w3 = nn.Linear(dim, hidden_dim, bias=False)
def forward(self, x):
return self.w2(nn.functional.silu(self.w1(x)) * self.w3(x))
class DismantledBlock(nn.Module):
"""
A DiT block with gated adaptive layer norm (adaLN) conditioning.
"""
ATTENTION_MODES = ("xformers", "torch", "torch-hb", "math", "debug")
def __init__(
self,
hidden_size : int,
num_heads : int,
mlp_ratio : float = 4.0,
attn_mode : str = "xformers",
qkv_bias : bool = False,
pre_only : bool = False,
rmsnorm : bool = False,
scale_mod_only : bool = False,
swiglu : bool = False,
qk_norm : Optional[str] = None,
x_block_self_attn : bool = False,
dtype = None,
device = None,
operations = None,
**block_kwargs,
):
super().__init__()
assert attn_mode in self.ATTENTION_MODES
if not rmsnorm:
self.norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
else:
self.norm1 = RMSNorm(hidden_size, elementwise_affine=False, eps=1e-6)
self.attn = SelfAttention(
dim = hidden_size,
num_heads = num_heads,
qkv_bias = qkv_bias,
attn_mode = attn_mode,
pre_only = pre_only,
qk_norm = qk_norm,
rmsnorm = rmsnorm,
dtype = dtype,
device = device,
operations = operations
)
if x_block_self_attn:
assert not pre_only
assert not scale_mod_only
self.x_block_self_attn = True
self.attn2 = SelfAttention(
dim = hidden_size,
num_heads = num_heads,
qkv_bias = qkv_bias,
attn_mode = attn_mode,
pre_only = False,
qk_norm = qk_norm,
rmsnorm = rmsnorm,
dtype = dtype,
device = device,
operations = operations
)
else:
self.x_block_self_attn = False
if not pre_only:
if not rmsnorm:
self.norm2 = operations.LayerNorm(
hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device
)
else:
self.norm2 = RMSNorm(hidden_size, elementwise_affine=False, eps=1e-6)
mlp_hidden_dim = int(hidden_size * mlp_ratio)
if not pre_only:
if not swiglu:
self.mlp = Mlp(
in_features = hidden_size,
hidden_features = mlp_hidden_dim,
act_layer = lambda: nn.GELU(approximate = "tanh"),
drop = 0,
dtype = dtype,
device = device,
operations = operations
)
else:
self.mlp = SwiGLUFeedForward(
dim = hidden_size,
hidden_dim = mlp_hidden_dim,
multiple_of = 256,
)
self.scale_mod_only = scale_mod_only
if x_block_self_attn:
assert not pre_only
assert not scale_mod_only
n_mods = 9
elif not scale_mod_only:
n_mods = 6 if not pre_only else 2
else:
n_mods = 4 if not pre_only else 1
self.adaLN_modulation = nn.Sequential(
nn.SiLU(), operations.Linear(hidden_size, n_mods * hidden_size, bias=True, dtype=dtype, device=device)
)
self.pre_only = pre_only
def pre_attention(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor:
if not self.pre_only:
if not self.scale_mod_only:
(
shift_msa,
scale_msa,
gate_msa,
shift_mlp,
scale_mlp,
gate_mlp,
) = self.adaLN_modulation(c).chunk(6, dim=1)
else:
shift_msa = None
shift_mlp = None
(
scale_msa,
gate_msa,
scale_mlp,
gate_mlp,
) = self.adaLN_modulation(
c
).chunk(4, dim=1)
qkv = self.attn.pre_attention(modulate(self.norm1(x), shift_msa, scale_msa))
return qkv, (
x,
gate_msa,
shift_mlp,
scale_mlp,
gate_mlp,
)
else:
if not self.scale_mod_only:
(
shift_msa,
scale_msa,
) = self.adaLN_modulation(
c
).chunk(2, dim=1)
else:
shift_msa = None
scale_msa = self.adaLN_modulation(c)
qkv = self.attn.pre_attention(modulate(self.norm1(x), shift_msa, scale_msa))
return qkv, None
def post_attention(self, attn, x, gate_msa, shift_mlp, scale_mlp, gate_mlp):
assert not self.pre_only
x = x + gate_msa.unsqueeze(1) * self.attn.post_attention(attn)
x = x + gate_mlp.unsqueeze(1) * self.mlp(
modulate(self.norm2(x), shift_mlp, scale_mlp)
)
return x
def pre_attention_x(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor:
assert self.x_block_self_attn
(
shift_msa,
scale_msa,
gate_msa,
shift_mlp,
scale_mlp,
gate_mlp,
shift_msa2,
scale_msa2,
gate_msa2,
) = self.adaLN_modulation(c).chunk(9, dim=1)
x_norm = self.norm1(x)
qkv = self.attn .pre_attention(modulate(x_norm, shift_msa, scale_msa ))
qkv2 = self.attn2.pre_attention(modulate(x_norm, shift_msa2, scale_msa2))
return qkv, qkv2, (
x,
gate_msa,
shift_mlp,
scale_mlp,
gate_mlp,
gate_msa2,
)
def post_attention_x(self, attn, attn2, x, gate_msa, shift_mlp, scale_mlp, gate_mlp, gate_msa2):
assert not self.pre_only
attn1 = self.attn .post_attention(attn)
attn2 = self.attn2.post_attention(attn2)
out1 = gate_msa .unsqueeze(1) * attn1
out2 = gate_msa2.unsqueeze(1) * attn2
x = x + out1
x = x + out2
x = x + gate_mlp.unsqueeze(1) * self.mlp(
modulate(self.norm2(x), shift_mlp, scale_mlp)
)
return x
def forward(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor:
assert not self.pre_only
if self.x_block_self_attn:
qkv, qkv2, intermediates = self.pre_attention_x(x, c)
attn, _ = optimized_attention(
qkv[0], qkv[1], qkv[2],
num_heads=self.attn.num_heads,
)
attn2, _ = optimized_attention(
qkv2[0], qkv2[1], qkv2[2],
num_heads=self.attn2.num_heads,
)
return self.post_attention_x(attn, attn2, *intermediates)
else:
qkv, intermediates = self.pre_attention (x, c)
attn = optimized_attention(
qkv[0], qkv[1], qkv[2],
heads=self.attn.num_heads,
)
return self.post_attention(attn, *intermediates)
def block_mixing(*args, use_checkpoint=True, **kwargs):
if use_checkpoint:
return torch.utils.checkpoint.checkpoint(
_block_mixing, *args, use_reentrant=False, **kwargs
)
else:
return _block_mixing(*args, **kwargs)
# context_qkv = Tuple[Tensor,Tensor,Tensor] 2,154,1536 2,154,1536 2,154,24,64 x_qkv 2,4096,1536, ..., 2,4096,24,64
def _block_mixing(context, x, context_block, x_block, c, mask=None):
context_qkv, context_intermediates = context_block.pre_attention(context, c)
if x_block.x_block_self_attn: # x_qkv2 = self-attn?
x_qkv, x_qkv2, x_intermediates = x_block.pre_attention_x(x, c)
else:
x_qkv, x_intermediates = x_block.pre_attention (x, c)
o = []
for t in range(3):
o.append(torch.cat((context_qkv[t], x_qkv[t]), dim=1))
qkv = tuple(o)
if mask is not None:
attn = attention_pytorch( #1,4186,1536
qkv[0], qkv[1], qkv[2],
heads = x_block.attn.num_heads,
mask = mask #> 0 if mask is not None else None,
)
else:
attn = optimized_attention( #1,4186,1536
qkv[0], qkv[1], qkv[2],
heads = x_block.attn.num_heads,
mask = None #> 0 if mask is not None else None,
)
context_attn, x_attn = (
attn[:, : context_qkv[0].shape[1] ],
attn[:, context_qkv[0].shape[1] : ],
)
if not context_block.pre_only:
context = context_block.post_attention(context_attn, *context_intermediates)
else:
context = None
if x_block.x_block_self_attn:
attn2 = optimized_attention( # x_qkv2 2,4096,1536
x_qkv2[0], x_qkv2[1], x_qkv2[2],
heads = x_block.attn2.num_heads,
)
x = x_block.post_attention_x(x_attn, attn2, *x_intermediates)
else:
x = x_block.post_attention (x_attn, *x_intermediates)
return context, x
class ReJointBlock(nn.Module):
"""just a small wrapper to serve as a fsdp unit"""
def __init__(
self,
*args,
**kwargs,
):
super().__init__()
pre_only = kwargs.pop("pre_only")
qk_norm = kwargs.pop("qk_norm", None )
x_block_self_attn = kwargs.pop("x_block_self_attn", False)
self.context_block = DismantledBlock(*args, pre_only=pre_only, qk_norm=qk_norm, **kwargs)
self.x_block = DismantledBlock(*args, pre_only=False, qk_norm=qk_norm, x_block_self_attn=x_block_self_attn, **kwargs)
def forward(self, *args, **kwargs): # context_block, x_block are DismantledBlock
return block_mixing( # args = Tuple[Tensor,Tensor] 2,154,1536 2,4096,1536
*args, context_block=self.context_block, x_block=self.x_block, **kwargs
)
class FinalLayer(nn.Module):
"""
The final layer of DiT.
"""
def __init__(
self,
hidden_size : int,
patch_size : int,
out_channels : int,
total_out_channels : Optional[int] = None,
dtype = None,
device = None,
operations = None,
):
super().__init__()
self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.linear = (
operations.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device)
if (total_out_channels is None)
else operations.Linear(hidden_size, total_out_channels, bias=True, dtype=dtype, device=device)
)
self.adaLN_modulation = nn.Sequential(
nn.SiLU(), operations.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device)
)
def forward(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor:
shift, scale = self.adaLN_modulation(c).chunk(2, dim=1)
x = modulate(self.norm_final(x), shift, scale)
x = self.linear(x)
return x
class SelfAttentionContext(nn.Module):
def __init__(self, dim, heads=8, dim_head=64, dtype=None, device=None, operations=None):
super().__init__()
dim_head = dim // heads
inner_dim = dim
self.heads = heads
self.dim_head = dim_head
self.qkv = operations.Linear(dim, dim * 3, bias=True, dtype=dtype, device=device)
self.proj = operations.Linear(inner_dim, dim, dtype=dtype, device=device)
def forward(self, x):
qkv = self.qkv(x)
q, k, v = split_qkv(qkv, self.dim_head)
x = optimized_attention(q.reshape(q.shape[0], q.shape[1], -1), k, v, heads=self.heads)
return self.proj(x)
class ContextProcessorBlock(nn.Module):
def __init__(self, context_size, dtype=None, device=None, operations=None):
super().__init__()
self.norm1 = operations.LayerNorm(context_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.attn = SelfAttentionContext(context_size, dtype=dtype, device=device, operations=operations)
self.norm2 = operations.LayerNorm(context_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
self.mlp = Mlp(in_features=context_size, hidden_features=(context_size * 4), act_layer=lambda: nn.GELU(approximate="tanh"), drop=0, dtype=dtype, device=device, operations=operations)
def forward(self, x):
x += self.attn(self.norm1(x))
x += self.mlp (self.norm2(x))
return x
class ContextProcessor(nn.Module):
def __init__(self, context_size, num_layers, dtype=None, device=None, operations=None):
super().__init__()
self.layers = torch.nn.ModuleList([ContextProcessorBlock(context_size, dtype=dtype, device=device, operations=operations) for i in range(num_layers)])
self.norm = operations.LayerNorm(context_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
def forward(self, x):
for i, l in enumerate(self.layers):
x = l(x)
return self.norm(x)
class MMDiT(nn.Module):
"""
Diffusion model with a Transformer backbone.
"""
def __init__(
self,
input_size : int = 32,
patch_size : int = 2,
in_channels : int = 4,
depth : int = 28,
# hidden_size : Optional[int] = None,
# num_heads : Optional[int] = None,
mlp_ratio : float = 4.0,
learn_sigma : bool = False,
adm_in_channels : Optional[int] = None,
context_embedder_config : Optional[Dict] = None,
compile_core : bool = False,
use_checkpoint : bool = False,
register_length : int = 0,
attn_mode : str = "torch",
rmsnorm : bool = False,
scale_mod_only : bool = False,
swiglu : bool = False,
out_channels : Optional[int] = None,
pos_embed_scaling_factor : Optional[float] = None,
pos_embed_offset : Optional[float] = None,
pos_embed_max_size : Optional[int] = None,
num_patches = None,
qk_norm : Optional[str] = None,
qkv_bias : bool = True,
context_processor_layers = None,
x_block_self_attn : bool = False,
x_block_self_attn_layers : Optional[List[int]] = [],
context_size = 4096,
num_blocks = None,
final_layer = True,
skip_blocks = False,
dtype = None, #TODO
device = None,
operations = None,
):
super().__init__()
self.dtype = dtype
self.learn_sigma = learn_sigma
self.in_channels = in_channels
default_out_channels = in_channels * 2 if learn_sigma else in_channels
self.out_channels = default(out_channels, default_out_channels)
self.patch_size = patch_size
self.pos_embed_scaling_factor = pos_embed_scaling_factor
self.pos_embed_offset = pos_embed_offset
self.pos_embed_max_size = pos_embed_max_size
self.x_block_self_attn_layers = x_block_self_attn_layers
# hidden_size = default(hidden_size, 64 * depth)
# num_heads = default(num_heads, hidden_size // 64)
# apply magic --> this defines a head_size of 64
self.hidden_size = 64 * depth
num_heads = depth
if num_blocks is None:
num_blocks = depth
self.depth = depth
self.num_heads = num_heads
self.x_embedder = PatchEmbed(
input_size,
patch_size,
in_channels,
self.hidden_size,
bias = True,
strict_img_size = self.pos_embed_max_size is None,
dtype = dtype,
device = device,
operations = operations
)
self.t_embedder = TimestepEmbedder(self.hidden_size, dtype=dtype, device=device, operations=operations)
self.y_embedder = None
if adm_in_channels is not None:
assert isinstance(adm_in_channels, int)
self.y_embedder = VectorEmbedder(adm_in_channels, self.hidden_size, dtype=dtype, device=device, operations=operations)
if context_processor_layers is not None:
self.context_processor = ContextProcessor(context_size, context_processor_layers, dtype=dtype, device=device, operations=operations)
else:
self.context_processor = None
self.context_embedder = nn.Identity()
if context_embedder_config is not None:
if context_embedder_config["target"] == "torch.nn.Linear":
self.context_embedder = operations.Linear(**context_embedder_config["params"], dtype=dtype, device=device)
self.register_length = register_length
if self.register_length > 0:
self.register = nn.Parameter(torch.randn(1, register_length, self.hidden_size, dtype=dtype, device=device))
# num_patches = self.x_embedder.num_patches
# Will use fixed sin-cos embedding:
# just use a buffer already
if num_patches is not None:
self.register_buffer(
"pos_embed",
torch.empty(1, num_patches, self.hidden_size, dtype=dtype, device=device),
)
else:
self.pos_embed = None
self.use_checkpoint = use_checkpoint
if not skip_blocks:
self.joint_blocks = nn.ModuleList(
[
ReJointBlock(
self.hidden_size,
num_heads,
mlp_ratio = mlp_ratio,
qkv_bias = qkv_bias,
attn_mode = attn_mode,
pre_only = (i == num_blocks - 1) and final_layer,
rmsnorm = rmsnorm,
scale_mod_only = scale_mod_only,
swiglu = swiglu,
qk_norm = qk_norm,
x_block_self_attn = (i in self.x_block_self_attn_layers) or x_block_self_attn,
dtype = dtype,
device = device,
operations = operations,
)
for i in range(num_blocks)
]
)
if final_layer:
self.final_layer = FinalLayer(self.hidden_size, patch_size, self.out_channels, dtype=dtype, device=device, operations=operations)
if compile_core:
assert False
self.forward_core_with_concat = torch.compile(self.forward_core_with_concat)
def cropped_pos_embed(self, hw, device=None):
p = self.x_embedder.patch_size[0]
h, w = hw
# patched size
h = (h + 1) // p
w = (w + 1) // p
if self.pos_embed is None:
return get_2d_sincos_pos_embed_torch(self.hidden_size, w, h, device=device)
assert self.pos_embed_max_size is not None
assert h <= self.pos_embed_max_size, (h, self.pos_embed_max_size)
assert w <= self.pos_embed_max_size, (w, self.pos_embed_max_size)
top = (self.pos_embed_max_size - h) // 2
left = (self.pos_embed_max_size - w) // 2
spatial_pos_embed = rearrange(
self.pos_embed,
"1 (h w) c -> 1 h w c",
h = self.pos_embed_max_size,
w = self.pos_embed_max_size,
)
spatial_pos_embed = spatial_pos_embed[:, top : top + h, left : left + w, :]
spatial_pos_embed = rearrange(spatial_pos_embed, "1 h w c -> 1 (h w) c")
# print(spatial_pos_embed, top, left, h, w)
# # t = get_2d_sincos_pos_embed_torch(self.hidden_size, w, h, 7.875, 7.875, device=device) #matches exactly for 1024 res
# t = get_2d_sincos_pos_embed_torch(self.hidden_size, w, h, 7.5, 7.5, device=device) #scales better
# # print(t)
# return t
return spatial_pos_embed
def unpatchify(self, x, hw=None):
"""
x: (N, T, patch_size**2 * C)
imgs: (N, H, W, C)
"""
c = self.out_channels
p = self.x_embedder.patch_size[0]
if hw is None:
h = w = int(x.shape[1] ** 0.5)
else:
h, w = hw
h = (h + 1) // p
w = (w + 1) // p
assert h * w == x.shape[1]
x = x.reshape(shape=(x.shape[0], h, w, p, p, c))
x = torch.einsum("nhwpqc->nchpwq", x)
imgs = x.reshape(shape=(x.shape[0], c, h * p, w * p))
return imgs
def forward_core_with_concat(
self,
x : torch.Tensor,
c_mod : torch.Tensor,
c_mod_base : torch.Tensor,
context : Optional[torch.Tensor] = None,
context_base : Optional[torch.Tensor] = None,
control = None,
transformer_options = {},
) -> torch.Tensor:
patches_replace = transformer_options.get("patches_replace", {})
if self.register_length > 0:
context = torch.cat(
(
repeat(self.register, "1 ... -> b ...", b=x.shape[0]),
default(context, torch.Tensor([]).type_as(x)),
),
1,
)
weight = transformer_options['reg_cond_weight'] if 'reg_cond_weight' in transformer_options else 0.0
floor = transformer_options['reg_cond_floor'] if 'reg_cond_floor' in transformer_options else 0.0
floor = min(floor, weight)
if type(weight) == float or type(weight) == int:
pass
else:
weight = weight.item()
AttnMask = transformer_options.get('AttnMask')
mask = None
if AttnMask is not None and weight > 0:
mask = AttnMask.get(weight=weight) #mask_obj[0](transformer_options, weight.item())
mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False
if not mask_type_bool:
mask = mask.to(x.dtype)
text_len = context.shape[1] # mask_obj[0].text_len
mask[text_len:,text_len:] = torch.clamp(mask[text_len:,text_len:], min=floor.to(mask.device)) #ORIGINAL SELF-ATTN REGION BLEED
#reg_cond_mask = reg_cond_mask_expanded.unsqueeze(0).clone() if reg_cond_mask_expanded is not None else None
mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False
if weight <= 0.0:
mask = None
context = context_base
c_mod = c_mod_base
# context is B, L', D
# x is B, L, D
blocks_replace = patches_replace.get("dit", {})
blocks = len(self.joint_blocks)
for i in range(blocks):
if mask_type_bool and weight < (i / (blocks-1)) and mask is not None:
mask = mask.to(x.dtype) # torch.ones((*mask.shape,), dtype=mask.dtype, device=mask.device) #(mask == mask) #set all to false
if ("double_block", i) in blocks_replace:
def block_wrap(args):
out = {}
out["txt"], out["img"] = self.joint_blocks[i](args["txt"], args["img"], c=args["vec"])
return out
out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": c_mod}, {"original_block": block_wrap})
context = out["txt"]
x = out["img"]
else:
context, x = self.joint_blocks[i](
context,
x,
c = c_mod,
use_checkpoint = self.use_checkpoint,
mask = mask,
)
if control is not None:
control_o = control.get("output")
if i < len(control_o):
add = control_o[i]
if add is not None:
x += add
x = self.final_layer(x, c_mod) # (N, T, patch_size ** 2 * out_channels)
return x
def forward(
self,
x : torch.Tensor,
t : torch.Tensor,
y : Optional[torch.Tensor] = None,
context: Optional[torch.Tensor] = None,
control = None,
transformer_options = {},
) -> torch.Tensor:
"""
Forward pass of DiT.
x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
t: (N,) tensor of diffusion timesteps
y: (N,) tensor of class labels
"""
SIGMA = t[0].clone() / 1000
EO = transformer_options.get("ExtraOptions", ExtraOptions(""))
if EO is not None:
EO.mute = True
y0_style_pos = transformer_options.get("y0_style_pos")
y0_style_neg = transformer_options.get("y0_style_neg")
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight", 0.0)
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight", 0.0)
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight", 0.0)
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight", 0.0)
y0_style_neg_synweight *= y0_style_neg_weight
weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0)
floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0)
freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method")
freqsep_sigma = transformer_options.get("freqsep_sigma")
freqsep_kernel_size = transformer_options.get("freqsep_kernel_size")
freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size")
freqsep_stride = transformer_options.get("freqsep_stride")
freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight")
freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight")
freqsep_mask = transformer_options.get("freqsep_mask")
x_orig = x.clone()
y_orig = y.clone()
h,w = x.shape[-2:]
h_len = ((h + (self.patch_size // 2)) // self.patch_size) # h_len 96
w_len = ((w + (self.patch_size // 2)) // self.patch_size) # w_len 96
out_list = []
for i in range(len(transformer_options['cond_or_uncond'])):
UNCOND = transformer_options['cond_or_uncond'][i] == 1
x = x_orig.clone()
y = y_orig.clone()
context_base = context[i][None,...].clone()
if UNCOND:
#transformer_options['reg_cond_weight'] = -1
#context_tmp = context[i][None,...].clone()
transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0) #transformer_options['regional_conditioning_weight']
transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0) #transformer_options['regional_conditioning_floor'] #if "regional_conditioning_floor" in transformer_options else 0.0
transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig')
AttnMask = transformer_options.get('AttnMask', None)
RegContext = transformer_options.get('RegContext', None)
if AttnMask is not None and transformer_options['reg_cond_weight'] > 0.0:
AttnMask.attn_mask_recast(x.dtype)
context_tmp = RegContext.get().to(context.dtype)
#context_tmp = 0 * context_tmp.clone()
A = context[i][None,...].clone()
B = context_tmp
context_tmp = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :]
else:
context_tmp = context[i][None,...].clone()
elif UNCOND == False:
transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0) #transformer_options['regional_conditioning_weight']
transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0) #transformer_options['regional_conditioning_floor'] #if "regional_conditioning_floor" in transformer_options else 0.0
transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig')
AttnMask = transformer_options.get('AttnMask', None)
RegContext = transformer_options.get('RegContext', None)
if AttnMask is not None and transformer_options['reg_cond_weight'] > 0.0:
AttnMask.attn_mask_recast(x.dtype)
context_tmp = RegContext.get().to(context.dtype)
else:
context_tmp = context[i][None,...].clone()
if context_tmp is None:
context_tmp = context[i][None,...].clone()
#context = context_tmp
if self.context_processor is not None:
context_tmp = self.context_processor(context_tmp)
hw = x.shape[-2:]
x = self.x_embedder(x) + comfy.ops.cast_to_input(self.cropped_pos_embed(hw, device=x.device), x)
c = self.t_embedder(t, dtype=x.dtype) # (N, D) # c is like vec...
if y is not None and self.y_embedder is not None:
y = self.y_embedder(y_orig.clone()) # (N, D)
c = c + y # (N, D) # vec = vec + y (y = pooled_output 1,2048)
if context_tmp is not None:
context_tmp = self.context_embedder(context_tmp)
if self.context_processor is not None:
context_base = self.context_processor(context_base)
#hw = x.shape[-2:]
#x = self.x_embedder(x) + comfy.ops.cast_to_input(self.cropped_pos_embed(hw, device=x.device), x)
c_base = self.t_embedder(t, dtype=x.dtype) # (N, D) # c is like vec...
if y is not None and self.y_embedder is not None:
y = self.y_embedder(y_orig.clone()) # (N, D)
c_base = c_base + y # (N, D) # vec = vec + y (y = pooled_output 1,2048)
if context_base is not None:
context_base = self.context_embedder(context_base)
x = self.forward_core_with_concat(
x[i][None,...],
c[i][None,...],
c_base[i][None,...],
context_tmp,
context_base, #context[i][None,...].clone(),
control,
transformer_options,
)
x = self.unpatchify(x, hw=hw) # (N, out_channels, H, W)
out_list.append(x)
x = torch.stack(out_list, dim=0).squeeze(dim=1)
eps = x[:,:,:hw[-2],:hw[-1]]
dtype = eps.dtype if self.style_dtype is None else self.style_dtype
if y0_style_pos is not None:
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight")
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight")
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_pos_mask = transformer_options.get("y0_style_pos_mask")
y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge")
y0_style_pos = y0_style_pos.to(dtype)
x = x_orig.clone().to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised)
y0_adain_embed = self.Retrojector.embed(y0_style_pos)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if EO("scattersort_median_LP"):
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7))
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7))
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad)
denoised_spatial = denoised_spatial_LP + denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
if freqsep_mask is not None:
freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float()
freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact')
if hasattr(self, "adain_tile"):
tile_h, tile_w = self.adain_tile
denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if self.adain_flag:
h_off = tile_h // 2
w_off = tile_w // 2
denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off]
self.adain_flag = False
else:
h_off = 0
w_off = 0
self.adain_flag = True
tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w))
y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w))
tiles_out = []
for i in range(tiles.shape[0]):
tile = tiles[i].unsqueeze(0)
y0_tile = y0_tiles[i].unsqueeze(0)
tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w)
tile = adain_seq_inplace(tile, y0_tile)
tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w))
tiles_out_tensor = torch.cat(tiles_out, dim=0)
tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides)
if h_off == 0:
denoised_pretile = tiles_out_tensor
else:
denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor
denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"):
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median_pw":
denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight)
elif freqsep_lowpass_method == "gaussian_pw":
denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
elif freqsep_lowpass_method is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
if freqsep_lowpass_method == "median":
denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size)
elif freqsep_lowpass_method == "gaussian":
denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size)
denoised_spatial_HP = denoised_spatial - denoised_spatial_LP
if EO("adain_fs_uhp"):
y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP
denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3))
denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP
y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP
#denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP
denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP
denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP
denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len)
else:
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
if transformer_options.get('y0_standard_guide') is not None:
y0_standard_guide = transformer_options.get('y0_standard_guide')
y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide)
f_cs = self.StyleWCT.get(y0_standard_guide_embed)
self.y0_standard_guide = self.Retrojector.unembed(f_cs)
if transformer_options.get('y0_inv_standard_guide') is not None:
y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide')
y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide)
f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed)
self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs)
denoised_approx = self.Retrojector.unembed(denoised_embed)
eps = (x - denoised_approx) / sigma
if not UNCOND:
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
else:
eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
elif eps.shape[0] == 1 and UNCOND:
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
if y0_style_neg is not None:
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight")
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight")
y0_style_neg_synweight *= y0_style_neg_weight
y0_style_neg_mask = transformer_options.get("y0_style_neg_mask")
y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge")
y0_style_neg = y0_style_neg.to(dtype)
x = x_orig.clone().to(dtype)
eps = eps.to(dtype)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
denoised_embed = self.Retrojector.embed(denoised)
y0_adain_embed = self.Retrojector.embed(y0_style_neg)
if transformer_options['y0_style_method'] == "scattersort":
tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width')
pad = transformer_options.get('y0_style_tile_padding')
if pad is not None and tile_h is not None and tile_w is not None:
denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len)
denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad)
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
else:
denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len)
elif transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed))
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
self.StyleWCT.set(y0_adain_embed)
denoised_embed = self.StyleWCT.get(denoised_embed)
denoised_approx = self.Retrojector.unembed(denoised_embed)
if UNCOND:
eps = (x - denoised_approx) / sigma
eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
elif eps.shape[0] == 1 and not UNCOND:
eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
return eps
dtype = eps.dtype if self.style_dtype is None else self.style_dtype
pinv_dtype = torch.float32 if dtype != torch.float64 else dtype
W_inv = None
#if eps.shape[0] == 2 or (eps.shape[0] == 1 and not UNCOND):
if y0_style_pos is not None:
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight")
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight")
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_pos = y0_style_pos.to(torch.float64)
x = x_orig.to(torch.float64)
eps = eps.to(torch.float64)
eps_orig = eps.clone()
sigma = SIGMA# t_orig[0].to(torch.float64) / 1000
denoised = x - sigma * eps
hw = denoised.shape[-2:]
features = 1536# denoised_embed.shape[-1] # should be 1536
W_conv = self.x_embedder.proj.weight.to(torch.float64) # [1536, 16, 2, 2]
W_flat = W_conv.view(features, -1).to(torch.float64) # [1536, 64]
W_pinv = torch.linalg.pinv(W_flat) # [64, 1536]
x_embedder64 = copy.deepcopy(self.x_embedder.proj).to(denoised)
#y = self.x_embedder.proj(denoised.to(torch.float16)).float()
y = x_embedder64(denoised)
B, C_out, H_out, W_out = y.shape
y_flat = y.view(B, C_out, -1) # [B, 1536, N]
y_flat = y_flat.permute(0, 2, 1) # [B, N, 1536]
bias = self.x_embedder.proj.bias.to(torch.float64) # [1536]
denoised_embed = y_flat - bias.view(1, 1, -1)
#y = self.x_embedder.proj(y0_style_pos.to(torch.float16)).float()
y = x_embedder64(y0_style_pos)
B, C_out, H_out, W_out = y.shape
y_flat = y.view(B, C_out, -1) # [B, 1536, N]
y_flat = y_flat.permute(0, 2, 1) # [B, N , 1536]
bias = self.x_embedder.proj.bias.to(torch.float64) # [1536]
y0_adain_embed = y_flat - bias.view(1, 1, -1)
#denoised_embed = adain_seq(denoised_embed, y0_adain_embed)
if transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
"""for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype) # not going to work! needs
denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img)
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)"""
elif transformer_options['y0_style_method'] == "WCT":
if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
self.y0_adain_embed = y0_adain_embed
f_s = y0_adain_embed[0].clone()
self.mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - self.mu_s
cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values
whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T
self.y0_color = whiten.to(f_s_centered)
for wct_i in range(eps.shape[0]):
f_c = denoised_embed[wct_i].clone()
mu_c = f_c.mean(dim=0, keepdim=True)
f_c_centered = f_c - mu_c
cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
inv_sqrt_eig = S_eig.clamp(min=0).rsqrt()
whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T
whiten = whiten.to(f_c_centered)
f_c_whitened = f_c_centered @ whiten.T
f_cs = f_c_whitened @ self.y0_color.T + self.mu_s
denoised_embed[wct_i] = f_cs
x_patches = denoised_embed @ W_pinv.T # [B,N,64]
x_patches = x_patches.permute(0, 2, 1) # [B,64,N]
x_reconstructed = torch.nn.functional.fold(
x_patches, # [B, 64, N]
output_size=(H_out * 2, W_out * 2), # restore original input shape
kernel_size=2,
stride=2
)
denoised_approx = x_reconstructed #.view(B, 16, H_out * 2, W_out * 2)
eps = (x - denoised_approx) / sigma
#if eps.shape[0] == 2:
# eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
# eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
#else:
# eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
if not UNCOND:
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
else:
eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
elif eps.shape[0] == 1 and UNCOND:
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
#if eps.shape[0] == 2 or (eps.shape[0] == 1 and UNCOND):
if y0_style_neg is not None:
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight")
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight")
y0_style_neg_synweight *= y0_style_neg_weight
y0_style_neg = y0_style_neg.to(torch.float64)
x = x_orig.to(torch.float64)
eps = eps.to(torch.float64)
eps_orig = eps.clone()
sigma = SIGMA# t_orig[0].to(torch.float64) / 1000
denoised = x - sigma * eps
hw = denoised.shape[-2:]
features = 1536# denoised_embed.shape[-1] # should be 1536
W_conv = self.x_embedder.proj.weight.float() # [1536, 16, 2, 2]
W_flat = W_conv.view(features, -1).float() # [1536, 64]
W_pinv = torch.linalg.pinv(W_flat) # [64, 1536]
y = self.x_embedder.proj(denoised.to(torch.float16)).float()
B, C_out, H_out, W_out = y.shape
y_flat = y.view(B, C_out, -1) # [B, 1536, N]
y_flat = y_flat.permute(0, 2, 1) # [B, N, 1536]
bias = self.x_embedder.proj.bias.float() # [1536]
denoised_embed = y_flat - bias.view(1, 1, -1)
y = self.x_embedder.proj(y0_style_neg.to(torch.float16)).float()
B, C_out, H_out, W_out = y.shape
y_flat = y.view(B, C_out, -1) # [B, 1536, N]
y_flat = y_flat.permute(0, 2, 1) # [B, N , 1536]
bias = self.x_embedder.proj.bias.float() # [1536]
y0_adain_embed = y_flat - bias.view(1, 1, -1)
#denoised_embed = adain_seq(denoised_embed, y0_adain_embed)
if transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
"""for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype)
denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img)
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)"""
elif transformer_options['y0_style_method'] == "WCT":
if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
self.y0_adain_embed = y0_adain_embed
f_s = y0_adain_embed[0].clone()
self.mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - self.mu_s
cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values
whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T
self.y0_color = whiten.to(f_s_centered)
for wct_i in range(eps.shape[0]):
f_c = denoised_embed[wct_i].clone()
mu_c = f_c.mean(dim=0, keepdim=True)
f_c_centered = f_c - mu_c
cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
inv_sqrt_eig = S_eig.clamp(min=0).rsqrt()
whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T
whiten = whiten.to(f_c_centered)
f_c_whitened = f_c_centered @ whiten.T
f_cs = f_c_whitened @ self.y0_color.T + self.mu_s
denoised_embed[wct_i] = f_cs
x_patches = denoised_embed @ W_pinv.T # [B,N,64]
x_patches = x_patches.permute(0, 2, 1) # [B,64,N]
x_reconstructed = torch.nn.functional.fold(
x_patches, # [B, 64, N]
output_size=(H_out * 2, W_out * 2), # restore original input shape
kernel_size=2,
stride=2
)
denoised_approx = x_reconstructed #.view(B, 16, H_out * 2, W_out * 2)
#eps = (x - denoised_approx) / sigma
#eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
#if eps.shape[0] == 2:
# eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
if UNCOND:
eps = (x - denoised_approx) / sigma
eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
elif eps.shape[0] == 1 and not UNCOND:
eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
return eps
class ReOpenAISignatureMMDITWrapper(MMDiT):
def forward(
self,
x : torch.Tensor,
timesteps : torch.Tensor,
context : Optional[torch.Tensor] = None,
y : Optional[torch.Tensor] = None,
control = None,
transformer_options = {},
**kwargs,
) -> torch.Tensor:
return super().forward(x, timesteps, context=context, y=y, control=control, transformer_options=transformer_options)
def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
mean_c = content.mean(1, keepdim=True)
std_c = content.std (1, keepdim=True).add_(eps) # in-place add
mean_s = style.mean (1, keepdim=True)
std_s = style.std (1, keepdim=True).add_(eps)
content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain
return content
def adain_seq(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
return ((content - content.mean(1, keepdim=True)) / (content.std(1, keepdim=True) + eps)) * (style.std(1, keepdim=True) + eps) + style.mean(1, keepdim=True)
================================================
FILE: sigmas.py
================================================
import torch
import numpy as np
from math import *
import builtins
from scipy.interpolate import CubicSpline
from scipy import special, stats
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import math
from comfy.k_diffusion.sampling import get_sigmas_polyexponential, get_sigmas_karras
import comfy.samplers
from torch import Tensor, nn
from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar
from .res4lyf import RESplain
from .helper import get_res4lyf_scheduler_list
def rescale_linear(input, input_min, input_max, output_min, output_max):
output = ((input - input_min) / (input_max - input_min)) * (output_max - output_min) + output_min;
return output
class set_precision_sigmas:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", ),
"precision": (["16", "32", "64"], ),
"set_default": ("BOOLEAN", {"default": False})
},
}
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("passthrough",)
CATEGORY = "RES4LYF/precision"
FUNCTION = "main"
def main(self, precision="32", sigmas=None, set_default=False):
match precision:
case "16":
if set_default is True:
torch.set_default_dtype(torch.float16)
sigmas = sigmas.to(torch.float16)
case "32":
if set_default is True:
torch.set_default_dtype(torch.float32)
sigmas = sigmas.to(torch.float32)
case "64":
if set_default is True:
torch.set_default_dtype(torch.float64)
sigmas = sigmas.to(torch.float64)
return (sigmas, )
class SimpleInterpolator(nn.Module):
def __init__(self):
super(SimpleInterpolator, self).__init__()
self.net = nn.Sequential(
nn.Linear(1, 16),
nn.ReLU(),
nn.Linear(16, 32),
nn.ReLU(),
nn.Linear(32, 1)
)
def forward(self, x):
return self.net(x)
def train_interpolator(model, sigma_schedule, steps, epochs=5000, lr=0.01):
with torch.inference_mode(False):
model = SimpleInterpolator()
sigma_schedule = sigma_schedule.clone()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
x_train = torch.linspace(0, 1, steps=steps).unsqueeze(1)
y_train = sigma_schedule.unsqueeze(1)
# disable inference mode for training
model.train()
for epoch in range(epochs):
optimizer.zero_grad()
# fwd pass
outputs = model(x_train)
loss = criterion(outputs, y_train)
loss.backward()
optimizer.step()
return model
def interpolate_sigma_schedule_model(sigma_schedule, target_steps):
model = SimpleInterpolator()
sigma_schedule = sigma_schedule.float().detach()
# train on original sigma schedule
trained_model = train_interpolator(model, sigma_schedule, len(sigma_schedule))
# generate target steps for interpolation
x_interpolated = torch.linspace(0, 1, target_steps).unsqueeze(1)
# inference w/o gradients
trained_model.eval()
with torch.no_grad():
interpolated_sigma = trained_model(x_interpolated).squeeze()
return interpolated_sigma
class sigmas_interpolate:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_in": ("SIGMAS", {"forceInput": True}),
"output_length": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}),
"mode": (["linear", "nearest", "polynomial", "exponential", "power", "model"],),
"order": ("INT", {"default": 8, "min": 1,"max": 64,"step": 1}),
"rescale_after": ("BOOLEAN", {"default": True, "tooltip": "Rescale the output to the original min/max range after interpolation."}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
CATEGORY = "RES4LYF/sigmas"
DESCRIPTION = "Interpolate the sigmas schedule to a new length clamping the start and end values."
def interpolate_sigma_schedule_poly(self, sigma_schedule, target_steps):
order = self.order
sigma_schedule_np = sigma_schedule.cpu().numpy()
# orig steps (assuming even spacing)
original_steps = np.linspace(0, 1, len(sigma_schedule_np))
# fit polynomial of the given order
coefficients = np.polyfit(original_steps, sigma_schedule_np, deg=order)
# generate new steps where we want to interpolate the data
target_steps_np = np.linspace(0, 1, target_steps)
# eval polynomial at new steps
interpolated_sigma_np = np.polyval(coefficients, target_steps_np)
interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype)
return interpolated_sigma
def interpolate_sigma_schedule_constrained(self, sigma_schedule, target_steps):
sigma_schedule_np = sigma_schedule.cpu().numpy()
# orig steps
original_steps = np.linspace(0, 1, len(sigma_schedule_np))
# target steps for interpolation
target_steps_np = np.linspace(0, 1, target_steps)
# fit cubic spline with fixed start and end values
cs = CubicSpline(original_steps, sigma_schedule_np, bc_type=((1, 0.0), (1, 0.0)))
# eval spline at the target steps
interpolated_sigma_np = cs(target_steps_np)
interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype)
return interpolated_sigma
def interpolate_sigma_schedule_exp(self, sigma_schedule, target_steps):
# transform to log space
log_sigma_schedule = torch.log(sigma_schedule)
# define the original and target step ranges
original_steps = torch.linspace(0, 1, steps=len(sigma_schedule))
target_steps = torch.linspace(0, 1, steps=target_steps)
# interpolate in log space
interpolated_log_sigma = F.interpolate(
log_sigma_schedule.unsqueeze(0).unsqueeze(0), # Add fake batch and channel dimensions
size=target_steps.shape[0],
mode='linear',
align_corners=True
).squeeze()
# transform back to exponential space
interpolated_sigma_schedule = torch.exp(interpolated_log_sigma)
return interpolated_sigma_schedule
def interpolate_sigma_schedule_power(self, sigma_schedule, target_steps):
sigma_schedule_np = sigma_schedule.cpu().numpy()
original_steps = np.linspace(1, len(sigma_schedule_np), len(sigma_schedule_np))
# power regression using a log-log transformation
log_x = np.log(original_steps)
log_y = np.log(sigma_schedule_np)
# linear regression on log-log data
coefficients = np.polyfit(log_x, log_y, deg=1) # degree 1 for linear fit in log-log space
a = np.exp(coefficients[1]) # a = "b" = intercept (exp because of the log transform)
b = coefficients[0] # b = "m" = slope
target_steps_np = np.linspace(1, len(sigma_schedule_np), target_steps)
# power law prediction: y = a * x^b
interpolated_sigma_np = a * (target_steps_np ** b)
interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype)
return interpolated_sigma
def interpolate_sigma_schedule_linear(self, sigma_schedule, target_steps):
return F.interpolate(sigma_schedule.unsqueeze(0).unsqueeze(0), target_steps, mode='linear').squeeze(0).squeeze(0)
def interpolate_sigma_schedule_nearest(self, sigma_schedule, target_steps):
return F.interpolate(sigma_schedule.unsqueeze(0).unsqueeze(0), target_steps, mode='nearest').squeeze(0).squeeze(0)
def interpolate_nearest_neighbor(self, sigma_schedule, target_steps):
original_steps = torch.linspace(0, 1, steps=len(sigma_schedule))
target_steps = torch.linspace(0, 1, steps=target_steps)
# interpolate original -> target steps using nearest neighbor
indices = torch.searchsorted(original_steps, target_steps)
indices = torch.clamp(indices, 0, len(sigma_schedule) - 1) # clamp indices to valid range
# set nearest neighbor via indices
interpolated_sigma = sigma_schedule[indices]
return interpolated_sigma
def main(self, sigmas_in, output_length, mode, order, rescale_after=True):
self.order = order
sigmas_in = sigmas_in.clone().to(sigmas_in.dtype)
start = sigmas_in[0]
end = sigmas_in[-1]
if mode == "linear":
interpolate = self.interpolate_sigma_schedule_linear
if mode == "nearest":
interpolate = self.interpolate_nearest_neighbor
elif mode == "polynomial":
interpolate = self.interpolate_sigma_schedule_poly
elif mode == "exponential":
interpolate = self.interpolate_sigma_schedule_exp
elif mode == "power":
interpolate = self.interpolate_sigma_schedule_power
elif mode == "model":
with torch.inference_mode(False):
interpolate = interpolate_sigma_schedule_model
sigmas_interp = interpolate(sigmas_in, output_length)
if rescale_after:
sigmas_interp = ((sigmas_interp - sigmas_interp.min()) * (start - end)) / (sigmas_interp.max() - sigmas_interp.min()) + end
return (sigmas_interp,)
class sigmas_noise_inversion:
# flip sigmas for unsampling, and pad both fwd/rev directions with null bytes to disable noise scaling, etc from the model.
# will cause model to return epsilon prediction instead of calculated denoised latent image.
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS","SIGMAS",)
RETURN_NAMES = ("sigmas_fwd","sigmas_rev",)
CATEGORY = "RES4LYF/sigmas"
DESCRIPTION = "For use with unsampling. Connect sigmas_fwd to the unsampling (first) node, and sigmas_rev to the sampling (second) node."
def main(self, sigmas):
sigmas = sigmas.clone().to(sigmas.dtype)
null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype)
sigmas_fwd = torch.flip(sigmas, dims=[0])
sigmas_fwd = torch.cat([sigmas_fwd, null])
sigmas_rev = torch.cat([null, sigmas])
sigmas_rev = torch.cat([sigmas_rev, null])
return (sigmas_fwd, sigmas_rev,)
def compute_sigma_next_variance_floor(sigma):
return (-1 + torch.sqrt(1 + 4 * sigma)) / 2
class sigmas_variance_floor:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
DESCRIPTION = ("Process a sigma schedule so that any steps that are too large for variance-locked SDE sampling are replaced with the maximum permissible value."
"Will be very difficult to approach sigma = 0 due to the nature of the math, as steps become very small much below approximately sigma = 0.15 to 0.2.")
def main(self, sigmas):
dtype = sigmas.dtype
sigmas = sigmas.clone().to(sigmas.dtype)
for i in range(len(sigmas) - 1):
sigma_next = (-1 + torch.sqrt(1 + 4 * sigmas[i])) / 2
if sigmas[i+1] < sigma_next and sigmas[i+1] > 0.0:
print("swapped i+1 with sigma_next+0.001: ", sigmas[i+1], sigma_next + 0.001)
sigmas[i+1] = sigma_next + 0.001
return (sigmas.to(dtype),)
class sigmas_from_text:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"text": ("STRING", {"default": "", "multiline": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
CATEGORY = "RES4LYF/sigmas"
def main(self, text):
text_list = [float(val) for val in text.replace(",", " ").split()]
#text_list = [float(val.strip()) for val in text.split(",")]
sigmas = torch.tensor(text_list) #.to('cuda').to(torch.float64)
return (sigmas,)
class sigmas_concatenate:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_1": ("SIGMAS", {"forceInput": True}),
"sigmas_2": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas_1, sigmas_2):
return (torch.cat((sigmas_1, sigmas_2.to(sigmas_1))),)
class sigmas_truncate:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"sigmas_until": ("INT", {"default": 10, "min": 0,"max": 1000,"step": 1}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, sigmas_until):
sigmas = sigmas.clone()
return (sigmas[:sigmas_until],)
class sigmas_start:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"sigmas_until": ("INT", {"default": 10, "min": 0,"max": 1000,"step": 1}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, sigmas_until):
sigmas = sigmas.clone()
return (sigmas[sigmas_until:],)
class sigmas_split:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"sigmas_start": ("INT", {"default": 0, "min": 0,"max": 1000,"step": 1}),
"sigmas_end": ("INT", {"default": 1000, "min": 0,"max": 1000,"step": 1}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, sigmas_start, sigmas_end):
sigmas = sigmas.clone()
return (sigmas[sigmas_start:sigmas_end],)
sigmas_stop_step = sigmas_end - sigmas_start
return (sigmas[sigmas_start:][:sigmas_stop_step],)
class sigmas_pad:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"value": ("FLOAT", {"default": 0.0, "min": -10000,"max": 10000,"step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, value):
sigmas = sigmas.clone()
return (torch.cat((sigmas, torch.tensor([value], dtype=sigmas.dtype))),)
class sigmas_unpad:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas):
sigmas = sigmas.clone()
return (sigmas[:-1],)
class sigmas_set_floor:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
"new_floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01})
}
}
RETURN_TYPES = ("SIGMAS",)
FUNCTION = "set_floor"
CATEGORY = "RES4LYF/sigmas"
def set_floor(self, sigmas, floor, new_floor):
sigmas = sigmas.clone()
sigmas[sigmas <= floor] = new_floor
return (sigmas,)
class sigmas_delete_below_floor:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01})
}
}
RETURN_TYPES = ("SIGMAS",)
FUNCTION = "delete_below_floor"
CATEGORY = "RES4LYF/sigmas"
def delete_below_floor(self, sigmas, floor):
sigmas = sigmas.clone()
return (sigmas[sigmas >= floor],)
class sigmas_delete_value:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"value": ("FLOAT", {"default": 0.0, "min": -1000,"max": 1000,"step": 0.01})
}
}
RETURN_TYPES = ("SIGMAS",)
FUNCTION = "delete_value"
CATEGORY = "RES4LYF/sigmas"
def delete_value(self, sigmas, value):
return (sigmas[sigmas != value],)
class sigmas_delete_consecutive_duplicates:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_1": ("SIGMAS", {"forceInput": True})
}
}
RETURN_TYPES = ("SIGMAS",)
FUNCTION = "delete_consecutive_duplicates"
CATEGORY = "RES4LYF/sigmas"
def delete_consecutive_duplicates(self, sigmas_1):
mask = sigmas_1[:-1] != sigmas_1[1:]
mask = torch.cat((mask, torch.tensor([True])))
return (sigmas_1[mask],)
class sigmas_cleanup:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"sigmin": ("FLOAT", {"default": 0.0291675, "min": 0,"max": 1000,"step": 0.01})
}
}
RETURN_TYPES = ("SIGMAS",)
FUNCTION = "cleanup"
CATEGORY = "RES4LYF/sigmas"
def cleanup(self, sigmas, sigmin):
sigmas_culled = sigmas[sigmas >= sigmin]
mask = sigmas_culled[:-1] != sigmas_culled[1:]
mask = torch.cat((mask, torch.tensor([True])))
filtered_sigmas = sigmas_culled[mask]
return (torch.cat((filtered_sigmas,torch.tensor([0]))),)
class sigmas_mult:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"multiplier": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01})
},
"optional": {
"sigmas2": ("SIGMAS", {"forceInput": False})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, multiplier, sigmas2=None):
if sigmas2 is not None:
return (sigmas * sigmas2 * multiplier,)
else:
return (sigmas * multiplier,)
class sigmas_modulus:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"divisor": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, divisor):
return (sigmas % divisor,)
class sigmas_quotient:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"divisor": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, divisor):
return (sigmas // divisor,)
class sigmas_add:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"addend": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, addend):
return (sigmas + addend,)
class sigmas_power:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"power": ("FLOAT", {"default": 1, "min": -100,"max": 100,"step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, power):
return (sigmas ** power,)
class sigmas_abs:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas):
return (abs(sigmas),)
class sigmas2_mult:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_1": ("SIGMAS", {"forceInput": True}),
"sigmas_2": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas_1, sigmas_2):
return (sigmas_1 * sigmas_2,)
class sigmas2_add:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_1": ("SIGMAS", {"forceInput": True}),
"sigmas_2": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas_1, sigmas_2):
return (sigmas_1 + sigmas_2,)
class sigmas_rescale:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"start": ("FLOAT", {"default": 1.0, "min": -10000,"max": 10000,"step": 0.01}),
"end": ("FLOAT", {"default": 0.0, "min": -10000,"max": 10000,"step": 0.01}),
"sigmas": ("SIGMAS", ),
},
"optional": {
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas_rescaled",)
CATEGORY = "RES4LYF/sigmas"
DESCRIPTION = ("Can be used to set denoise. Results are generally better than with the approach used by KSampler and most nodes with denoise values "
"(which slice the sigmas schedule according to step count, not the noise level). Will also flip the sigma schedule if the start and end values are reversed."
)
def main(self, start=0, end=-1, sigmas=None):
s_out_1 = ((sigmas - sigmas.min()) * (start - end)) / (sigmas.max() - sigmas.min()) + end
return (s_out_1,)
class sigmas_count:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", ),
}
}
FUNCTION = "main"
RETURN_TYPES = ("INT",)
RETURN_NAMES = ("count",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas=None):
return (len(sigmas),)
class sigmas_math1:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"start": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}),
"stop": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}),
"trim": ("INT", {"default": 0, "min": -10000,"max": 0,"step": 1}),
"x": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"y": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"z": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"f1": ("STRING", {"default": "s", "multiline": True}),
"rescale" : ("BOOLEAN", {"default": False}),
"max1": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}),
"min1": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
},
"optional": {
"a": ("SIGMAS", {"forceInput": False}),
"b": ("SIGMAS", {"forceInput": False}),
"c": ("SIGMAS", {"forceInput": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, start=0, stop=0, trim=0, a=None, b=None, c=None, x=1.0, y=1.0, z=1.0, f1="s", rescale=False, min1=1.0, max1=1.0):
if stop == 0:
t_lens = [len(tensor) for tensor in [a, b, c] if tensor is not None]
t_len = stop = min(t_lens) if t_lens else 0
else:
stop = stop + 1
t_len = stop - start
stop = stop + trim
t_len = t_len + trim
t_a = t_b = t_c = None
if a is not None:
t_a = a[start:stop]
if b is not None:
t_b = b[start:stop]
if c is not None:
t_c = c[start:stop]
t_s = torch.arange(0.0, t_len)
t_x = torch.full((t_len,), x)
t_y = torch.full((t_len,), y)
t_z = torch.full((t_len,), z)
eval_namespace = {"__builtins__": None, "round": builtins.round, "np": np, "a": t_a, "b": t_b, "c": t_c, "x": t_x, "y": t_y, "z": t_z, "s": t_s, "torch": torch}
eval_namespace.update(np.__dict__)
s_out_1 = eval(f1, eval_namespace)
if rescale == True:
s_out_1 = ((s_out_1 - min(s_out_1)) * (max1 - min1)) / (max(s_out_1) - min(s_out_1)) + min1
return (s_out_1,)
class sigmas_math3:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"start": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}),
"stop": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}),
"trim": ("INT", {"default": 0, "min": -10000,"max": 0,"step": 1}),
},
"optional": {
"a": ("SIGMAS", {"forceInput": False}),
"b": ("SIGMAS", {"forceInput": False}),
"c": ("SIGMAS", {"forceInput": False}),
"x": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"y": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"z": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}),
"f1": ("STRING", {"default": "s", "multiline": True}),
"rescale1" : ("BOOLEAN", {"default": False}),
"max1": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}),
"min1": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
"f2": ("STRING", {"default": "s", "multiline": True}),
"rescale2" : ("BOOLEAN", {"default": False}),
"max2": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}),
"min2": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
"f3": ("STRING", {"default": "s", "multiline": True}),
"rescale3" : ("BOOLEAN", {"default": False}),
"max3": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}),
"min3": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS","SIGMAS","SIGMAS")
CATEGORY = "RES4LYF/sigmas"
def main(self, start=0, stop=0, trim=0, a=None, b=None, c=None, x=1.0, y=1.0, z=1.0, f1="s", f2="s", f3="s", rescale1=False, rescale2=False, rescale3=False, min1=1.0, max1=1.0, min2=1.0, max2=1.0, min3=1.0, max3=1.0):
if stop == 0:
t_lens = [len(tensor) for tensor in [a, b, c] if tensor is not None]
t_len = stop = min(t_lens) if t_lens else 0
else:
stop = stop + 1
t_len = stop - start
stop = stop + trim
t_len = t_len + trim
t_a = t_b = t_c = None
if a is not None:
t_a = a[start:stop]
if b is not None:
t_b = b[start:stop]
if c is not None:
t_c = c[start:stop]
t_s = torch.arange(0.0, t_len)
t_x = torch.full((t_len,), x)
t_y = torch.full((t_len,), y)
t_z = torch.full((t_len,), z)
eval_namespace = {"__builtins__": None, "np": np, "a": t_a, "b": t_b, "c": t_c, "x": t_x, "y": t_y, "z": t_z, "s": t_s, "torch": torch}
eval_namespace.update(np.__dict__)
s_out_1 = eval(f1, eval_namespace)
s_out_2 = eval(f2, eval_namespace)
s_out_3 = eval(f3, eval_namespace)
if rescale1 == True:
s_out_1 = ((s_out_1 - min(s_out_1)) * (max1 - min1)) / (max(s_out_1) - min(s_out_1)) + min1
if rescale2 == True:
s_out_2 = ((s_out_2 - min(s_out_2)) * (max2 - min2)) / (max(s_out_2) - min(s_out_2)) + min2
if rescale3 == True:
s_out_3 = ((s_out_3 - min(s_out_3)) * (max3 - min3)) / (max(s_out_3) - min(s_out_3)) + min3
return s_out_1, s_out_2, s_out_3
class sigmas_iteration_karras:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps_up": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}),
"steps_down": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}),
"rho_up": ("FLOAT", {"default": 3, "min": -10000,"max": 10000,"step": 0.01}),
"rho_down": ("FLOAT", {"default": 4, "min": -10000,"max": 10000,"step": 0.01}),
"s_min_start": ("FLOAT", {"default":0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
"s_max": ("FLOAT", {"default": 2, "min": -10000,"max": 10000,"step": 0.01}),
"s_min_end": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
},
"optional": {
"momentums": ("SIGMAS", {"forceInput": False}),
"sigmas": ("SIGMAS", {"forceInput": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS","SIGMAS")
RETURN_NAMES = ("momentums","sigmas")
CATEGORY = "RES4LYF/schedulers"
def main(self, steps_up, steps_down, rho_up, rho_down, s_min_start, s_max, s_min_end, sigmas=None, momentums=None):
s_up = get_sigmas_karras(steps_up, s_min_start, s_max, rho_up)
s_down = get_sigmas_karras(steps_down, s_min_end, s_max, rho_down)
s_up = s_up[:-1]
s_down = s_down[:-1]
s_up = torch.flip(s_up, dims=[0])
sigmas_new = torch.cat((s_up, s_down), dim=0)
momentums_new = torch.cat((s_up, -1*s_down), dim=0)
if sigmas is not None:
sigmas = torch.cat([sigmas, sigmas_new])
else:
sigmas = sigmas_new
if momentums is not None:
momentums = torch.cat([momentums, momentums_new])
else:
momentums = momentums_new
return (momentums,sigmas)
class sigmas_iteration_polyexp:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps_up": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}),
"steps_down": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}),
"rho_up": ("FLOAT", {"default": 0.6, "min": -10000,"max": 10000,"step": 0.01}),
"rho_down": ("FLOAT", {"default": 0.8, "min": -10000,"max": 10000,"step": 0.01}),
"s_min_start": ("FLOAT", {"default":0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
"s_max": ("FLOAT", {"default": 2, "min": -10000,"max": 10000,"step": 0.01}),
"s_min_end": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}),
},
"optional": {
"momentums": ("SIGMAS", {"forceInput": False}),
"sigmas": ("SIGMAS", {"forceInput": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS","SIGMAS")
RETURN_NAMES = ("momentums","sigmas")
CATEGORY = "RES4LYF/schedulers"
def main(self, steps_up, steps_down, rho_up, rho_down, s_min_start, s_max, s_min_end, sigmas=None, momentums=None):
s_up = get_sigmas_polyexponential(steps_up, s_min_start, s_max, rho_up)
s_down = get_sigmas_polyexponential(steps_down, s_min_end, s_max, rho_down)
s_up = s_up[:-1]
s_down = s_down[:-1]
s_up = torch.flip(s_up, dims=[0])
sigmas_new = torch.cat((s_up, s_down), dim=0)
momentums_new = torch.cat((s_up, -1*s_down), dim=0)
if sigmas is not None:
sigmas = torch.cat([sigmas, sigmas_new])
else:
sigmas = sigmas_new
if momentums is not None:
momentums = torch.cat([momentums, momentums_new])
else:
momentums = momentums_new
return (momentums,sigmas)
class tan_scheduler:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 20, "min": 0,"max": 100000,"step": 1}),
"offset": ("FLOAT", {"default": 20, "min": 0,"max": 100000,"step": 0.1}),
"slope": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}),
"start": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}),
"end": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}),
"sgm" : ("BOOLEAN", {"default": False}),
"pad" : ("BOOLEAN", {"default": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/schedulers"
def main(self, steps, slope, offset, start, end, sgm, pad):
smax = ((2/pi)*atan(-slope*(0-offset))+1)/2
smin = ((2/pi)*atan(-slope*((steps-1)-offset))+1)/2
srange = smax-smin
sscale = start - end
if sgm:
steps+=1
sigmas = [ ( (((2/pi)*atan(-slope*(x-offset))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)]
if sgm:
sigmas = sigmas[:-1]
if pad:
sigmas = torch.tensor(sigmas+[0])
else:
sigmas = torch.tensor(sigmas)
return (sigmas,)
class tan_scheduler_2stage:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}),
"midpoint": ("INT", {"default": 20, "min": 0,"max": 100000,"step": 1}),
"pivot_1": ("INT", {"default": 10, "min": 0,"max": 100000,"step": 1}),
"pivot_2": ("INT", {"default": 30, "min": 0,"max": 100000,"step": 1}),
"slope_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.1}),
"slope_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.1}),
"start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.1}),
"middle": ("FLOAT", {"default": 0.5, "min": -100000,"max": 100000,"step": 0.1}),
"end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.1}),
"pad" : ("BOOLEAN", {"default": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
CATEGORY = "RES4LYF/schedulers"
def get_tan_sigmas(self, steps, slope, pivot, start, end):
smax = ((2/pi)*atan(-slope*(0-pivot))+1)/2
smin = ((2/pi)*atan(-slope*((steps-1)-pivot))+1)/2
srange = smax-smin
sscale = start - end
sigmas = [ ( (((2/pi)*atan(-slope*(x-pivot))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)]
return sigmas
def main(self, steps, midpoint, start, middle, end, pivot_1, pivot_2, slope_1, slope_2, pad):
steps += 2
stage_2_len = steps - midpoint
stage_1_len = steps - stage_2_len
tan_sigmas_1 = self.get_tan_sigmas(stage_1_len, slope_1, pivot_1, start, middle)
tan_sigmas_2 = self.get_tan_sigmas(stage_2_len, slope_2, pivot_2 - stage_1_len, middle, end)
tan_sigmas_1 = tan_sigmas_1[:-1]
if pad:
tan_sigmas_2 = tan_sigmas_2+[0]
tan_sigmas = torch.tensor(tan_sigmas_1 + tan_sigmas_2)
return (tan_sigmas,)
class tan_scheduler_2stage_simple:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}),
"pivot_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}),
"pivot_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}),
"slope_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}),
"slope_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}),
"start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}),
"middle": ("FLOAT", {"default": 0.5, "min": -100000,"max": 100000,"step": 0.01}),
"end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.01}),
"pad" : ("BOOLEAN", {"default": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
CATEGORY = "RES4LYF/schedulers"
def get_tan_sigmas(self, steps, slope, pivot, start, end):
smax = ((2/pi)*atan(-slope*(0-pivot))+1)/2
smin = ((2/pi)*atan(-slope*((steps-1)-pivot))+1)/2
srange = smax-smin
sscale = start - end
sigmas = [ ( (((2/pi)*atan(-slope*(x-pivot))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)]
return sigmas
def main(self, steps, start=1.0, middle=0.5, end=0.0, pivot_1=0.6, pivot_2=0.6, slope_1=0.2, slope_2=0.2, pad=False, model_sampling=None):
steps += 2
midpoint = int( (steps*pivot_1 + steps*pivot_2) / 2 )
pivot_1 = int(steps * pivot_1)
pivot_2 = int(steps * pivot_2)
slope_1 = slope_1 / (steps/40)
slope_2 = slope_2 / (steps/40)
stage_2_len = steps - midpoint
stage_1_len = steps - stage_2_len
tan_sigmas_1 = self.get_tan_sigmas(stage_1_len, slope_1, pivot_1, start, middle)
tan_sigmas_2 = self.get_tan_sigmas(stage_2_len, slope_2, pivot_2 - stage_1_len, middle, end)
tan_sigmas_1 = tan_sigmas_1[:-1]
if pad:
tan_sigmas_2 = tan_sigmas_2+[0]
tan_sigmas = torch.tensor(tan_sigmas_1 + tan_sigmas_2)
return (tan_sigmas,)
class linear_quadratic_advanced:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model": ("MODEL",),
"steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}),
"denoise": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}),
"inflection_percent": ("FLOAT", {"default": 0.5, "min": 0,"max": 1,"step": 0.01}),
"threshold_noise": ("FLOAT", {"default": 0.025, "min": 0.001,"max": 1.000,"step": 0.001}),
},
# "optional": {
# }
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
CATEGORY = "RES4LYF/schedulers"
def main(self, steps, denoise, inflection_percent, threshold_noise, model=None):
sigmas = get_sigmas(model, "linear_quadratic", steps, denoise, 0.0, inflection_percent, threshold_noise)
return (sigmas, )
class constant_scheduler:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}),
"value_start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}),
"value_end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.01}),
"cutoff_percent": ("FLOAT", {"default": 1.0, "min": 0,"max": 1,"step": 0.01}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
CATEGORY = "RES4LYF/schedulers"
def main(self, steps, value_start, value_end, cutoff_percent):
sigmas = torch.ones(steps + 1) * value_start
cutoff_step = int(round(steps * cutoff_percent)) + 1
sigmas = torch.concat((sigmas[:cutoff_step], torch.ones(steps + 1 - cutoff_step) * value_end), dim=0)
return (sigmas,)
class ClownScheduler:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"pad_start_value": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"start_value": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"end_value": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"pad_end_value": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},),
"scheduler_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}),
"scheduler_end_step": ("INT", {"default": 30, "min": -1, "max": 10000}),
"total_steps": ("INT", {"default": 100, "min": -1, "max": 10000}),
"flip_schedule": ("BOOLEAN", {"default": False}),
},
"optional": {
"model": ("MODEL", ),
}
}
RETURN_TYPES = ("SIGMAS",)
RETURN_NAMES = ("sigmas",)
FUNCTION = "main"
CATEGORY = "RES4LYF/schedulers"
def create_callback(self, **kwargs):
def callback(model):
kwargs["model"] = model
schedule, = self.prepare_schedule(**kwargs)
return schedule
return callback
def main(self,
model = None,
pad_start_value : float = 1.0,
start_value : float = 0.0,
end_value : float = 1.0,
pad_end_value = None,
denoise : int = 1.0,
scheduler = None,
scheduler_start_step : int = 0,
scheduler_end_step : int = 30,
total_steps : int = 60,
flip_schedule = False,
) -> Tuple[Tensor]:
if model is None:
callback = self.create_callback(pad_start_value = pad_start_value,
start_value = start_value,
end_value = end_value,
pad_end_value = pad_end_value,
scheduler = scheduler,
start_step = scheduler_start_step,
end_step = scheduler_end_step,
flip_schedule = flip_schedule,
)
else:
default_dtype = torch.float64
default_device = torch.device("cuda")
if scheduler_end_step == -1:
scheduler_total_steps = total_steps - scheduler_start_step
else:
scheduler_total_steps = scheduler_end_step - scheduler_start_step
if total_steps == -1:
total_steps = scheduler_start_step + scheduler_end_step
end_pad_steps = total_steps - scheduler_end_step
if scheduler != "constant":
values = get_sigmas(model, scheduler, scheduler_total_steps, denoise).to(dtype=default_dtype, device=default_device)
values = ((values - values.min()) * (start_value - end_value)) / (values.max() - values.min()) + end_value
else:
values = torch.linspace(start_value, end_value, scheduler_total_steps, dtype=default_dtype, device=default_device)
if flip_schedule:
values = torch.flip(values, dims=[0])
prepend = torch.full((scheduler_start_step,), pad_start_value, dtype=default_dtype, device=default_device)
postpend = torch.full((end_pad_steps,), pad_end_value, dtype=default_dtype, device=default_device)
values = torch.cat((prepend, values, postpend), dim=0)
#ositive[0][1]['callback_regional'] = callback
return (values,)
def prepare_schedule(self,
model = None,
pad_start_value : float = 1.0,
start_value : float = 0.0,
end_value : float = 1.0,
pad_end_value = None,
weight_scheduler = None,
start_step : int = 0,
end_step : int = 30,
flip_schedule = False,
) -> Tuple[Tensor]:
default_dtype = torch.float64
default_device = torch.device("cuda")
return (None,)
def get_sigmas_simple_exponential(model, steps):
s = model.model_sampling
sigs = []
ss = len(s.sigmas) / steps
for x in range(steps):
sigs += [float(s.sigmas[-(1 + int(x * ss))])]
sigs += [0.0]
sigs = torch.FloatTensor(sigs)
exp = torch.exp(torch.log(torch.linspace(1, 0, steps + 1)))
return sigs * exp
extra_schedulers = {
"simple_exponential": get_sigmas_simple_exponential
}
def get_sigmas(model, scheduler, steps, denoise, shift=0.0, lq_inflection_percent=0.5, lq_threshold_noise=0.025): #adapted from comfyui
total_steps = steps
if denoise < 1.0:
if denoise <= 0.0:
return (torch.FloatTensor([]),)
total_steps = int(steps/denoise)
try:
model_sampling = model.get_model_object("model_sampling")
except:
if hasattr(model, "model"):
model_sampling = model.model.model_sampling
elif hasattr(model, "inner_model"):
model_sampling = model.inner_model.inner_model.model_sampling
else:
raise Exception("get_sigmas: Could not get model_sampling")
if shift > 1e-6:
import copy
model_sampling = copy.deepcopy(model_sampling)
model_sampling.set_parameters(shift=shift)
RESplain("model_sampling shift manually set to " + str(shift), debug=True)
if scheduler == "beta57":
sigmas = comfy.samplers.beta_scheduler(model_sampling, total_steps, alpha=0.5, beta=0.7).cpu()
elif scheduler == "linear_quadratic":
linear_steps = int(total_steps * lq_inflection_percent)
sigmas = comfy.samplers.linear_quadratic_schedule(model_sampling, total_steps, threshold_noise=lq_threshold_noise, linear_steps=linear_steps).cpu()
else:
sigmas = comfy.samplers.calculate_sigmas(model_sampling, scheduler, total_steps).cpu()
sigmas = sigmas[-(steps + 1):]
return sigmas
#/// Adam Kormendi /// Inspired from Unreal Engine Maths ///
# Sigmoid Function
class sigmas_sigmoid:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"variant": (["logistic", "tanh", "softsign", "hardswish", "mish", "swish"], {"default": "logistic"}),
"gain": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}),
"offset": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.01}),
"normalize_output": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, variant, gain, offset, normalize_output):
# Apply gain and offset
x = gain * (sigmas + offset)
if variant == "logistic":
result = 1.0 / (1.0 + torch.exp(-x))
elif variant == "tanh":
result = torch.tanh(x)
elif variant == "softsign":
result = x / (1.0 + torch.abs(x))
elif variant == "hardswish":
result = x * torch.minimum(torch.maximum(x + 3, torch.zeros_like(x)), torch.tensor(6.0)) / 6.0
elif variant == "mish":
result = x * torch.tanh(torch.log(1.0 + torch.exp(x)))
elif variant == "swish":
result = x * torch.sigmoid(x)
if normalize_output:
# Normalize to [min(sigmas), max(sigmas)]
result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
return (result,)
# ----- Easing Function -----
class sigmas_easing:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"easing_type": (["sine", "quad", "cubic", "quart", "quint", "expo", "circ",
"back", "elastic", "bounce"], {"default": "cubic"}),
"easing_mode": (["in", "out", "in_out"], {"default": "in_out"}),
"normalize_input": ("BOOLEAN", {"default": True}),
"normalize_output": ("BOOLEAN", {"default": True}),
"strength": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, easing_type, easing_mode, normalize_input, normalize_output, strength):
# Normalize input to [0, 1] if requested
if normalize_input:
t = (sigmas - sigmas.min()) / (sigmas.max() - sigmas.min())
else:
t = torch.clamp(sigmas, 0.0, 1.0)
# Apply strength
t_orig = t.clone()
t = t ** strength
# Apply easing function based on type and mode
if easing_mode == "in":
result = self._ease_in(t, easing_type)
elif easing_mode == "out":
result = self._ease_out(t, easing_type)
else: # in_out
result = self._ease_in_out(t, easing_type)
# Normalize output if requested
if normalize_output:
if normalize_input:
result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
else:
result = ((result - result.min()) / (result.max() - result.min()))
return (result,)
def _ease_in(self, t, easing_type):
if easing_type == "sine":
return 1 - torch.cos((t * math.pi) / 2)
elif easing_type == "quad":
return t * t
elif easing_type == "cubic":
return t * t * t
elif easing_type == "quart":
return t * t * t * t
elif easing_type == "quint":
return t * t * t * t * t
elif easing_type == "expo":
return torch.where(t == 0, torch.zeros_like(t), torch.pow(2, 10 * t - 10))
elif easing_type == "circ":
return 1 - torch.sqrt(1 - torch.pow(t, 2))
elif easing_type == "back":
c1 = 1.70158
c3 = c1 + 1
return c3 * t * t * t - c1 * t * t
elif easing_type == "elastic":
c4 = (2 * math.pi) / 3
return torch.where(
t == 0,
torch.zeros_like(t),
torch.where(
t == 1,
torch.ones_like(t),
-torch.pow(2, 10 * t - 10) * torch.sin((t * 10 - 10.75) * c4)
)
)
elif easing_type == "bounce":
return 1 - self._ease_out_bounce(1 - t)
def _ease_out(self, t, easing_type):
if easing_type == "sine":
return torch.sin((t * math.pi) / 2)
elif easing_type == "quad":
return 1 - (1 - t) * (1 - t)
elif easing_type == "cubic":
return 1 - torch.pow(1 - t, 3)
elif easing_type == "quart":
return 1 - torch.pow(1 - t, 4)
elif easing_type == "quint":
return 1 - torch.pow(1 - t, 5)
elif easing_type == "expo":
return torch.where(t == 1, torch.ones_like(t), 1 - torch.pow(2, -10 * t))
elif easing_type == "circ":
return torch.sqrt(1 - torch.pow(t - 1, 2))
elif easing_type == "back":
c1 = 1.70158
c3 = c1 + 1
return 1 + c3 * torch.pow(t - 1, 3) + c1 * torch.pow(t - 1, 2)
elif easing_type == "elastic":
c4 = (2 * math.pi) / 3
return torch.where(
t == 0,
torch.zeros_like(t),
torch.where(
t == 1,
torch.ones_like(t),
torch.pow(2, -10 * t) * torch.sin((t * 10 - 0.75) * c4) + 1
)
)
elif easing_type == "bounce":
return self._ease_out_bounce(t)
def _ease_in_out(self, t, easing_type):
if easing_type == "sine":
return -(torch.cos(math.pi * t) - 1) / 2
elif easing_type == "quad":
return torch.where(t < 0.5, 2 * t * t, 1 - torch.pow(-2 * t + 2, 2) / 2)
elif easing_type == "cubic":
return torch.where(t < 0.5, 4 * t * t * t, 1 - torch.pow(-2 * t + 2, 3) / 2)
elif easing_type == "quart":
return torch.where(t < 0.5, 8 * t * t * t * t, 1 - torch.pow(-2 * t + 2, 4) / 2)
elif easing_type == "quint":
return torch.where(t < 0.5, 16 * t * t * t * t * t, 1 - torch.pow(-2 * t + 2, 5) / 2)
elif easing_type == "expo":
return torch.where(
t < 0.5,
torch.pow(2, 20 * t - 10) / 2,
(2 - torch.pow(2, -20 * t + 10)) / 2
)
elif easing_type == "circ":
return torch.where(
t < 0.5,
(1 - torch.sqrt(1 - torch.pow(2 * t, 2))) / 2,
(torch.sqrt(1 - torch.pow(-2 * t + 2, 2)) + 1) / 2
)
elif easing_type == "back":
c1 = 1.70158
c2 = c1 * 1.525
return torch.where(
t < 0.5,
(torch.pow(2 * t, 2) * ((c2 + 1) * 2 * t - c2)) / 2,
(torch.pow(2 * t - 2, 2) * ((c2 + 1) * (t * 2 - 2) + c2) + 2) / 2
)
elif easing_type == "elastic":
c5 = (2 * math.pi) / 4.5
return torch.where(
t < 0.5,
-(torch.pow(2, 20 * t - 10) * torch.sin((20 * t - 11.125) * c5)) / 2,
(torch.pow(2, -20 * t + 10) * torch.sin((20 * t - 11.125) * c5)) / 2 + 1
)
elif easing_type == "bounce":
return torch.where(
t < 0.5,
(1 - self._ease_out_bounce(1 - 2 * t)) / 2,
(1 + self._ease_out_bounce(2 * t - 1)) / 2
)
def _ease_out_bounce(self, t):
n1 = 7.5625
d1 = 2.75
mask1 = t < 1 / d1
mask2 = t < 2 / d1
mask3 = t < 2.5 / d1
result = torch.zeros_like(t)
result = torch.where(mask1, n1 * t * t, result)
result = torch.where(mask2 & ~mask1, n1 * (t - 1.5 / d1) * (t - 1.5 / d1) + 0.75, result)
result = torch.where(mask3 & ~mask2, n1 * (t - 2.25 / d1) * (t - 2.25 / d1) + 0.9375, result)
result = torch.where(~mask3, n1 * (t - 2.625 / d1) * (t - 2.625 / d1) + 0.984375, result)
return result
# ----- Hyperbolic Function -----
class sigmas_hyperbolic:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"function": (["sinh", "cosh", "tanh", "asinh", "acosh", "atanh"], {"default": "tanh"}),
"scale": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}),
"normalize_output": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, function, scale, normalize_output):
# Apply scaling
x = sigmas * scale
if function == "sinh":
result = torch.sinh(x)
elif function == "cosh":
result = torch.cosh(x)
elif function == "tanh":
result = torch.tanh(x)
elif function == "asinh":
result = torch.asinh(x)
elif function == "acosh":
# Domain of acosh is [1, inf)
result = torch.acosh(torch.clamp(x, min=1.0))
elif function == "atanh":
# Domain of atanh is (-1, 1)
result = torch.atanh(torch.clamp(x, min=-0.99, max=0.99))
if normalize_output:
# Normalize to [min(sigmas), max(sigmas)]
result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
return (result,)
# ----- Gaussian Distribution Function -----
class sigmas_gaussian:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"mean": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.01}),
"std": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}),
"operation": (["pdf", "cdf", "inverse_cdf", "transform", "modulate"], {"default": "transform"}),
"normalize_output": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, mean, std, operation, normalize_output):
# Standardize values (z-score)
z = (sigmas - sigmas.mean()) / sigmas.std()
if operation == "pdf":
# Probability density function
result = (1 / (std * math.sqrt(2 * math.pi))) * torch.exp(-0.5 * ((sigmas - mean) / std) ** 2)
elif operation == "cdf":
# Cumulative distribution function
result = 0.5 * (1 + torch.erf((sigmas - mean) / (std * math.sqrt(2))))
elif operation == "inverse_cdf":
# Inverse CDF (quantile function)
# First normalize to [0.01, 0.99] to avoid numerical issues
normalized = ((sigmas - sigmas.min()) / (sigmas.max() - sigmas.min())) * 0.98 + 0.01
result = mean + std * torch.sqrt(2) * torch.erfinv(2 * normalized - 1)
elif operation == "transform":
# Transform to Gaussian distribution with specified mean and std
result = z * std + mean
elif operation == "modulate":
# Modulate with a Gaussian curve centered at mean
result = sigmas * torch.exp(-0.5 * ((sigmas - mean) / std) ** 2)
if normalize_output:
# Normalize to [min(sigmas), max(sigmas)]
result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
return (result,)
# ----- Percentile Function -----
class sigmas_percentile:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"percentile_min": ("FLOAT", {"default": 5.0, "min": 0.0, "max": 49.0, "step": 0.1}),
"percentile_max": ("FLOAT", {"default": 95.0, "min": 51.0, "max": 100.0, "step": 0.1}),
"target_min": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"target_max": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"clip_outliers": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, percentile_min, percentile_max, target_min, target_max, clip_outliers):
# Convert to numpy for percentile computation
sigmas_np = sigmas.cpu().numpy()
# Compute percentiles
p_min = np.percentile(sigmas_np, percentile_min)
p_max = np.percentile(sigmas_np, percentile_max)
# Convert back to tensor
p_min = torch.tensor(p_min, device=sigmas.device, dtype=sigmas.dtype)
p_max = torch.tensor(p_max, device=sigmas.device, dtype=sigmas.dtype)
# Map values from [p_min, p_max] to [target_min, target_max]
if clip_outliers:
sigmas_clipped = torch.clamp(sigmas, p_min, p_max)
result = ((sigmas_clipped - p_min) / (p_max - p_min)) * (target_max - target_min) + target_min
else:
result = ((sigmas - p_min) / (p_max - p_min)) * (target_max - target_min) + target_min
return (result,)
# ----- Kernel Smooth Function -----
class sigmas_kernel_smooth:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"kernel": (["gaussian", "box", "triangle", "epanechnikov", "cosine"], {"default": "gaussian"}),
"kernel_size": ("INT", {"default": 5, "min": 3, "max": 51, "step": 2}), # Must be odd
"sigma": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, kernel, kernel_size, sigma):
# Ensure kernel_size is odd
if kernel_size % 2 == 0:
kernel_size += 1
# Define kernel weights
if kernel == "gaussian":
# Gaussian kernel
kernel_1d = self._gaussian_kernel(kernel_size, sigma)
elif kernel == "box":
# Box (uniform) kernel
kernel_1d = torch.ones(kernel_size, device=sigmas.device, dtype=sigmas.dtype) / kernel_size
elif kernel == "triangle":
# Triangle kernel
x = torch.linspace(-(kernel_size//2), kernel_size//2, kernel_size, device=sigmas.device, dtype=sigmas.dtype)
kernel_1d = (1.0 - torch.abs(x) / (kernel_size//2))
kernel_1d = kernel_1d / kernel_1d.sum()
elif kernel == "epanechnikov":
# Epanechnikov kernel
x = torch.linspace(-(kernel_size//2), kernel_size//2, kernel_size, device=sigmas.device, dtype=sigmas.dtype)
x = x / (kernel_size//2) # Scale to [-1, 1]
kernel_1d = 0.75 * (1 - x**2)
kernel_1d = kernel_1d / kernel_1d.sum()
elif kernel == "cosine":
# Cosine kernel
x = torch.linspace(-(kernel_size//2), kernel_size//2, kernel_size, device=sigmas.device, dtype=sigmas.dtype)
x = x / (kernel_size//2) * (math.pi/2) # Scale to [-π/2, π/2]
kernel_1d = torch.cos(x)
kernel_1d = kernel_1d / kernel_1d.sum()
# Pad input to handle boundary conditions
pad_size = kernel_size // 2
padded = F.pad(sigmas.unsqueeze(0).unsqueeze(0), (pad_size, pad_size), mode='reflect')
# Apply convolution
smoothed = F.conv1d(padded, kernel_1d.unsqueeze(0).unsqueeze(0))
return (smoothed.squeeze(),)
def _gaussian_kernel(self, kernel_size, sigma):
# Generate 1D Gaussian kernel
x = torch.linspace(-(kernel_size//2), kernel_size//2, kernel_size)
kernel = torch.exp(-x**2 / (2*sigma**2))
return kernel / kernel.sum()
# ----- Quantile Normalization -----
class sigmas_quantile_norm:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"target_distribution": (["uniform", "normal", "exponential", "logistic", "custom"], {"default": "uniform"}),
"num_quantiles": ("INT", {"default": 100, "min": 10, "max": 1000, "step": 10}),
},
"optional": {
"reference_sigmas": ("SIGMAS", {"forceInput": False}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, target_distribution, num_quantiles, reference_sigmas=None):
# Convert to numpy for processing
sigmas_np = sigmas.cpu().numpy()
# Sort values
sorted_values = np.sort(sigmas_np)
# Create rank for each value (fractional rank)
ranks = np.zeros_like(sigmas_np)
for i, val in enumerate(sigmas_np):
ranks[i] = np.searchsorted(sorted_values, val, side='right') / len(sorted_values)
# Generate target distribution
if target_distribution == "uniform":
# Uniform distribution between min and max of sigmas
target_values = np.linspace(sigmas_np.min(), sigmas_np.max(), num_quantiles)
elif target_distribution == "normal":
# Normal distribution with same mean and std as sigmas
target_values = np.random.normal(sigmas_np.mean(), sigmas_np.std(), num_quantiles)
target_values.sort()
elif target_distribution == "exponential":
# Exponential distribution with lambda=1/mean
target_values = np.random.exponential(1/max(1e-6, sigmas_np.mean()), num_quantiles)
target_values.sort()
elif target_distribution == "logistic":
# Logistic distribution
target_values = np.random.logistic(0, 1, num_quantiles)
target_values.sort()
# Rescale to match sigmas range
target_values = (target_values - target_values.min()) / (target_values.max() - target_values.min())
target_values = target_values * (sigmas_np.max() - sigmas_np.min()) + sigmas_np.min()
elif target_distribution == "custom" and reference_sigmas is not None:
# Use provided reference distribution
reference_np = reference_sigmas.cpu().numpy()
target_values = np.sort(reference_np)
if len(target_values) < num_quantiles:
# Interpolate if reference is smaller
old_indices = np.linspace(0, len(target_values)-1, len(target_values))
new_indices = np.linspace(0, len(target_values)-1, num_quantiles)
target_values = np.interp(new_indices, old_indices, target_values)
else:
# Subsample if reference is larger
indices = np.linspace(0, len(target_values)-1, num_quantiles, dtype=int)
target_values = target_values[indices]
else:
# Default to uniform
target_values = np.linspace(sigmas_np.min(), sigmas_np.max(), num_quantiles)
# Map each value to its corresponding quantile in the target distribution
result_np = np.interp(ranks, np.linspace(0, 1, len(target_values)), target_values)
# Convert back to tensor
result = torch.tensor(result_np, device=sigmas.device, dtype=sigmas.dtype)
return (result,)
# ----- Adaptive Step Function -----
class sigmas_adaptive_step:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"adaptation_type": (["gradient", "curvature", "importance", "density"], {"default": "gradient"}),
"sensitivity": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}),
"min_step": ("FLOAT", {"default": 0.01, "min": 0.0001, "max": 1.0, "step": 0.01}),
"max_step": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}),
"target_steps": ("INT", {"default": 0, "min": 0, "max": 1000, "step": 1}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, adaptation_type, sensitivity, min_step, max_step, target_steps):
if len(sigmas) <= 1:
return (sigmas,)
# Compute step sizes based on chosen adaptation type
if adaptation_type == "gradient":
# Compute gradient (first difference)
grads = torch.abs(sigmas[1:] - sigmas[:-1])
# Normalize gradients
if grads.max() > grads.min():
norm_grads = (grads - grads.min()) / (grads.max() - grads.min())
else:
norm_grads = torch.ones_like(grads)
# Convert to step sizes: smaller steps where gradient is large
step_sizes = 1.0 / (1.0 + norm_grads * sensitivity)
elif adaptation_type == "curvature":
# Compute second derivative approximation
if len(sigmas) >= 3:
# Second difference
second_diff = sigmas[2:] - 2*sigmas[1:-1] + sigmas[:-2]
# Pad to match length
second_diff = F.pad(second_diff, (0, 1), mode='replicate')
else:
second_diff = torch.zeros_like(sigmas[:-1])
# Normalize curvature
abs_curve = torch.abs(second_diff)
if abs_curve.max() > abs_curve.min():
norm_curve = (abs_curve - abs_curve.min()) / (abs_curve.max() - abs_curve.min())
else:
norm_curve = torch.ones_like(abs_curve)
# Convert to step sizes: smaller steps where curvature is high
step_sizes = 1.0 / (1.0 + norm_curve * sensitivity)
elif adaptation_type == "importance":
# Importance based on values: focus more on extremes
centered = torch.abs(sigmas - sigmas.mean())
if centered.max() > centered.min():
importance = (centered - centered.min()) / (centered.max() - centered.min())
else:
importance = torch.ones_like(centered)
# Steps are smaller for important regions
step_sizes = 1.0 / (1.0 + importance[:-1] * sensitivity)
elif adaptation_type == "density":
# Density-based adaptation using kernel density estimation
# Use a simple histogram approximation
sigma_min, sigma_max = sigmas.min(), sigmas.max()
bins = 20
hist = torch.histc(sigmas, bins=bins, min=sigma_min, max=sigma_max)
hist = hist / hist.sum() # Normalize
# Map each sigma to its bin density
bin_indices = torch.floor((sigmas - sigma_min) / (sigma_max - sigma_min) * (bins-1)).long()
bin_indices = torch.clamp(bin_indices, 0, bins-1)
densities = hist[bin_indices]
# Compute step sizes: smaller steps in high density regions
step_sizes = 1.0 / (1.0 + densities[:-1] * sensitivity)
# Scale step sizes to [min_step, max_step]
if step_sizes.max() > step_sizes.min():
step_sizes = (step_sizes - step_sizes.min()) / (step_sizes.max() - step_sizes.min())
step_sizes = step_sizes * (max_step - min_step) + min_step
else:
step_sizes = torch.ones_like(step_sizes) * min_step
# Cumulative sum to get positions
positions = torch.cat([torch.tensor([0.0], device=step_sizes.device), torch.cumsum(step_sizes, dim=0)])
# Normalize positions to match original range
positions = positions / positions[-1] * (sigmas[-1] - sigmas[0]) + sigmas[0]
# Resample if target_steps is specified
if target_steps > 0:
new_positions = torch.linspace(sigmas[0], sigmas[-1], target_steps, device=sigmas.device)
# Interpolate to get new sigma values
new_sigmas = torch.zeros_like(new_positions)
# Simple linear interpolation
for i, pos in enumerate(new_positions):
# Find enclosing original positions
idx = torch.searchsorted(positions, pos)
idx = torch.clamp(idx, 1, len(positions)-1)
# Linear interpolation
t = (pos - positions[idx-1]) / (positions[idx] - positions[idx-1])
new_sigmas[i] = sigmas[idx-1] * (1-t) + sigmas[idx-1] * t
result = new_sigmas
else:
result = positions
return (result,)
# ----- Chaos Function -----
class sigmas_chaos:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"system": (["logistic", "henon", "tent", "sine", "cubic"], {"default": "logistic"}),
"parameter": ("FLOAT", {"default": 3.9, "min": 0.1, "max": 5.0, "step": 0.01}),
"iterations": ("INT", {"default": 10, "min": 1, "max": 100, "step": 1}),
"normalize_output": ("BOOLEAN", {"default": True}),
"use_as_seed": ("BOOLEAN", {"default": False})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, system, parameter, iterations, normalize_output, use_as_seed):
# Normalize input to [0,1] for chaotic maps
if use_as_seed:
# Use input as initial seed
x = (sigmas - sigmas.min()) / (sigmas.max() - sigmas.min())
else:
# Use single initial value and apply iterations
x = torch.zeros_like(sigmas)
for i in range(len(sigmas)):
# Use i/len as initial value for variety
x[i] = i / len(sigmas)
# Apply chaos map iterations
for _ in range(iterations):
if system == "logistic":
# Logistic map: x_{n+1} = r * x_n * (1 - x_n)
x = parameter * x * (1 - x)
elif system == "henon":
# Simplified 1D version of Henon map
x = 1 - parameter * x**2
elif system == "tent":
# Tent map
x = torch.where(x < 0.5, parameter * x, parameter * (1 - x))
elif system == "sine":
# Sine map: x_{n+1} = r * sin(pi * x_n)
x = parameter * torch.sin(math.pi * x)
elif system == "cubic":
# Cubic map: x_{n+1} = r * x_n * (1 - x_n^2)
x = parameter * x * (1 - x**2)
# Normalize output if requested
if normalize_output:
result = ((x - x.min()) / (x.max() - x.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
else:
result = x
return (result,)
# ----- Reaction Diffusion Function -----
class sigmas_reaction_diffusion:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"system": (["gray_scott", "fitzhugh_nagumo", "brusselator"], {"default": "gray_scott"}),
"iterations": ("INT", {"default": 10, "min": 1, "max": 100, "step": 1}),
"dt": ("FLOAT", {"default": 0.1, "min": 0.01, "max": 1.0, "step": 0.01}),
"param_a": ("FLOAT", {"default": 0.04, "min": 0.01, "max": 0.1, "step": 0.001}),
"param_b": ("FLOAT", {"default": 0.06, "min": 0.01, "max": 0.1, "step": 0.001}),
"diffusion_a": ("FLOAT", {"default": 0.1, "min": 0.01, "max": 1.0, "step": 0.01}),
"diffusion_b": ("FLOAT", {"default": 0.05, "min": 0.01, "max": 1.0, "step": 0.01}),
"normalize_output": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, system, iterations, dt, param_a, param_b, diffusion_a, diffusion_b, normalize_output):
# Initialize a and b based on sigmas
a = (sigmas - sigmas.min()) / (sigmas.max() - sigmas.min())
b = 1.0 - a
# Pad for diffusion calculation (periodic boundary)
a_pad = F.pad(a.unsqueeze(0).unsqueeze(0), (1, 1), mode='circular').squeeze()
b_pad = F.pad(b.unsqueeze(0).unsqueeze(0), (1, 1), mode='circular').squeeze()
# Simple 1D reaction-diffusion
for _ in range(iterations):
# Compute Laplacian (diffusion term) as second derivative
laplacian_a = a_pad[:-2] + a_pad[2:] - 2 * a
laplacian_b = b_pad[:-2] + b_pad[2:] - 2 * b
if system == "gray_scott":
# Gray-Scott model for pattern formation
# a is "U" (activator), b is "V" (inhibitor)
feed = 0.055 # feed rate
kill = 0.062 # kill rate
# Update equations
a_new = a + dt * (diffusion_a * laplacian_a - a * b**2 + feed * (1 - a))
b_new = b + dt * (diffusion_b * laplacian_b + a * b**2 - (feed + kill) * b)
elif system == "fitzhugh_nagumo":
# FitzHugh-Nagumo model (simplified)
# a is the membrane potential, b is the recovery variable
# Update equations
a_new = a + dt * (diffusion_a * laplacian_a + a - a**3 - b + param_a)
b_new = b + dt * (diffusion_b * laplacian_b + param_b * (a - b))
elif system == "brusselator":
# Brusselator model
# a is U, b is V
# Update equations
a_new = a + dt * (diffusion_a * laplacian_a + 1 - (param_b + 1) * a + param_a * a**2 * b)
b_new = b + dt * (diffusion_b * laplacian_b + param_b * a - param_a * a**2 * b)
# Update and repad
a, b = a_new, b_new
a_pad = F.pad(a.unsqueeze(0).unsqueeze(0), (1, 1), mode='circular').squeeze()
b_pad = F.pad(b.unsqueeze(0).unsqueeze(0), (1, 1), mode='circular').squeeze()
# Use the activator component as the result
result = a
# Normalize output if requested
if normalize_output:
result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
return (result,)
# ----- Attractor Function -----
class sigmas_attractor:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"attractor": (["lorenz", "rossler", "aizawa", "chen", "thomas"], {"default": "lorenz"}),
"iterations": ("INT", {"default": 5, "min": 1, "max": 50, "step": 1}),
"dt": ("FLOAT", {"default": 0.01, "min": 0.001, "max": 0.1, "step": 0.001}),
"component": (["x", "y", "z", "magnitude"], {"default": "x"}),
"normalize_output": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, attractor, iterations, dt, component, normalize_output):
# Initialize 3D state from sigmas
n = len(sigmas)
# Normalize sigmas to a reasonable range for the attractor
norm_sigmas = (sigmas - sigmas.min()) / (sigmas.max() - sigmas.min()) * 2.0 - 1.0
# Create initial state
x = norm_sigmas
y = torch.roll(norm_sigmas, 1) # Shifted version for variety
z = torch.roll(norm_sigmas, 2) # Another shifted version
# Parameters for the attractors
if attractor == "lorenz":
sigma, rho, beta = 10.0, 28.0, 8.0/3.0
elif attractor == "rossler":
a, b, c = 0.2, 0.2, 5.7
elif attractor == "aizawa":
a, b, c, d, e, f = 0.95, 0.7, 0.6, 3.5, 0.25, 0.1
elif attractor == "chen":
a, b, c = 5.0, -10.0, -0.38
elif attractor == "thomas":
b = 0.208186
# Run the attractor dynamics
for _ in range(iterations):
if attractor == "lorenz":
# Lorenz attractor
dx = sigma * (y - x)
dy = x * (rho - z) - y
dz = x * y - beta * z
elif attractor == "rossler":
# Rössler attractor
dx = -y - z
dy = x + a * y
dz = b + z * (x - c)
elif attractor == "aizawa":
# Aizawa attractor
dx = (z - b) * x - d * y
dy = d * x + (z - b) * y
dz = c + a * z - z**3/3 - (x**2 + y**2) * (1 + e * z) + f * z * x**3
elif attractor == "chen":
# Chen attractor
dx = a * (y - x)
dy = (c - a) * x - x * z + c * y
dz = x * y - b * z
elif attractor == "thomas":
# Thomas attractor
dx = -b * x + torch.sin(y)
dy = -b * y + torch.sin(z)
dz = -b * z + torch.sin(x)
# Update state
x = x + dt * dx
y = y + dt * dy
z = z + dt * dz
# Select component
if component == "x":
result = x
elif component == "y":
result = y
elif component == "z":
result = z
elif component == "magnitude":
result = torch.sqrt(x**2 + y**2 + z**2)
# Normalize output if requested
if normalize_output:
result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
return (result,)
# ----- Catmull-Rom Spline -----
class sigmas_catmull_rom:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"tension": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}),
"points": ("INT", {"default": 100, "min": 5, "max": 1000, "step": 5}),
"boundary_condition": (["repeat", "clamp", "mirror"], {"default": "clamp"})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, tension, points, boundary_condition):
n = len(sigmas)
# Need at least 4 points for Catmull-Rom interpolation
if n < 4:
# If we have fewer, just use linear interpolation
t = torch.linspace(0, 1, points, device=sigmas.device)
result = torch.zeros(points, device=sigmas.device, dtype=sigmas.dtype)
for i in range(points):
idx = min(int(i * (n - 1) / (points - 1)), n - 2)
alpha = (i * (n - 1) / (points - 1)) - idx
result[i] = (1 - alpha) * sigmas[idx] + alpha * sigmas[idx + 1]
return (result,)
# Handle boundary conditions for control points
if boundary_condition == "repeat":
# Repeat endpoints
p0 = sigmas[0]
p3 = sigmas[-1]
elif boundary_condition == "clamp":
# Extrapolate
p0 = 2 * sigmas[0] - sigmas[1]
p3 = 2 * sigmas[-1] - sigmas[-2]
elif boundary_condition == "mirror":
# Mirror
p0 = sigmas[1]
p3 = sigmas[-2]
# Create extended control points
control_points = torch.cat([torch.tensor([p0], device=sigmas.device), sigmas, torch.tensor([p3], device=sigmas.device)])
# Compute spline
result = torch.zeros(points, device=sigmas.device, dtype=sigmas.dtype)
# Parameter to adjust curve tension (0 = Catmull-Rom, 1 = Linear)
alpha = 1.0 - tension
for i in range(points):
# Determine which segment we're in
t = i / (points - 1) * (n - 1)
idx = min(int(t), n - 2)
# Normalized parameter within the segment [0, 1]
t_local = t - idx
# Get control points for this segment
p0 = control_points[idx]
p1 = control_points[idx + 1]
p2 = control_points[idx + 2]
p3 = control_points[idx + 3]
# Catmull-Rom basis functions
t2 = t_local * t_local
t3 = t2 * t_local
# Compute spline point
result[i] = (
(-alpha * t3 + 2 * alpha * t2 - alpha * t_local) * p0 +
((2 - alpha) * t3 + (alpha - 3) * t2 + 1) * p1 +
((alpha - 2) * t3 + (3 - 2 * alpha) * t2 + alpha * t_local) * p2 +
(alpha * t3 - alpha * t2) * p3
) * 0.5
return (result,)
# ----- Lambert W-Function -----
class sigmas_lambert_w:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"branch": (["principal", "secondary"], {"default": "principal"}),
"scale": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}),
"normalize_output": ("BOOLEAN", {"default": True}),
"max_iterations": ("INT", {"default": 20, "min": 5, "max": 100, "step": 1})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, branch, scale, normalize_output, max_iterations):
# Apply scaling
x = sigmas * scale
# Lambert W function (numerically approximated)
result = torch.zeros_like(x)
# Process each value separately (since Lambert W is non-vectorized)
for i in range(len(x)):
xi = x[i].item()
# Initial guess varies by branch
if branch == "principal":
# Valid for x >= -1/e
if xi < -1/math.e:
xi = -1/math.e # Clamp to domain
# Initial guess for W₀(x)
if xi < 0:
w = 0.0
elif xi < 1:
w = xi * (1 - xi * (1 - 0.5 * xi))
else:
w = math.log(xi)
else: # secondary branch
# Valid for -1/e <= x < 0
if xi < -1/math.e:
xi = -1/math.e # Clamp to lower bound
elif xi >= 0:
xi = -0.01 # Clamp to upper bound
# Initial guess for W₋₁(x)
w = math.log(-xi)
# Halley's method for numerical approximation
for _ in range(max_iterations):
ew = math.exp(w)
wew = w * ew
# If we've converged, break
if abs(wew - xi) < 1e-10:
break
# Halley's update
wpe = w + 1 # w plus 1
div = ew * wpe - (ew * w - xi) * wpe / (2 * wpe * ew)
w_next = w - (wew - xi) / div
# Check for convergence
if abs(w_next - w) < 1e-10:
w = w_next
break
w = w_next
result[i] = w
# Normalize output if requested
if normalize_output:
result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
return (result,)
# ----- Zeta & Eta Functions -----
class sigmas_zeta_eta:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"function": (["riemann_zeta", "dirichlet_eta", "lerch_phi"], {"default": "riemann_zeta"}),
"offset": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.1}),
"scale": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}),
"normalize_output": ("BOOLEAN", {"default": True}),
"approx_terms": ("INT", {"default": 100, "min": 10, "max": 1000, "step": 10})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, function, offset, scale, normalize_output, approx_terms):
# Apply offset and scaling
s = sigmas * scale + offset
# Process based on function type
if function == "riemann_zeta":
# Riemann zeta function
# For Re(s) > 1, ζ(s) = sum(1/n^s, n=1 to infinity)
# For performance reasons, we'll use scipy's implementation for CPU
# and a truncated series approximation for GPU
# Move to CPU for scipy
s_cpu = s.cpu().numpy()
# Apply zeta function
result_np = np.zeros_like(s_cpu)
for i, si in enumerate(s_cpu):
# Handle special values
if si == 1.0:
# ζ(1) is the harmonic series, which diverges to infinity
result_np[i] = float('inf')
elif si < 0 and si == int(si) and int(si) % 2 == 0:
# ζ(-2n) = 0 for n > 0
result_np[i] = 0.0
else:
try:
# Use scipy for computation
result_np[i] = float(special.zeta(si))
except (ValueError, OverflowError):
# Fall back to approximation for problematic values
if si > 1:
# Truncated series for Re(s) > 1
result_np[i] = sum(1.0 / np.power(n, si) for n in range(1, approx_terms))
else:
# Use functional equation for Re(s) < 0
if si < 0:
# ζ(s) = 2^s π^(s-1) sin(πs/2) Γ(1-s) ζ(1-s)
# Gamma function blows up at negative integers, so use the fact that
# ζ(-n) = -B_{n+1}/(n+1) for n > 0, where B is a Bernoulli number
# However, as this gets complex, we'll use a simpler approximation
result_np[i] = 0.0 # Default for problematic values
# Convert back to tensor
result = torch.tensor(result_np, device=sigmas.device, dtype=sigmas.dtype)
elif function == "dirichlet_eta":
# Dirichlet eta function (alternating zeta function)
# η(s) = sum((-1)^(n+1)/n^s, n=1 to infinity)
# For GPU efficiency, compute directly using alternating series
result = torch.zeros_like(s)
# Use a fixed number of terms for approximation
for i in range(1, approx_terms + 1):
term = torch.pow(i, -s) * (1 if i % 2 == 1 else -1)
result += term
elif function == "lerch_phi":
# Lerch transcendent with fixed parameters
# Φ(z, s, a) = sum(z^n / (n+a)^s, n=0 to infinity)
# We'll use z=0.5, a=1 for simplicity
z, a = 0.5, 1.0
result = torch.zeros_like(s)
for i in range(approx_terms):
term = torch.pow(z, i) / torch.pow(i + a, s)
result += term
# Replace infinities and NaNs with large or small values
result = torch.where(torch.isfinite(result), result, torch.sign(result) * 1e10)
# Normalize output if requested
if normalize_output:
result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
return (result,)
# ----- Gamma & Beta Functions -----
class sigmas_gamma_beta:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"function": (["gamma", "beta", "incomplete_gamma", "incomplete_beta", "log_gamma"], {"default": "gamma"}),
"offset": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.1}),
"scale": ("FLOAT", {"default": 0.1, "min": 0.01, "max": 10.0, "step": 0.01}),
"parameter_a": ("FLOAT", {"default": 0.5, "min": 0.1, "max": 10.0, "step": 0.1}),
"parameter_b": ("FLOAT", {"default": 0.5, "min": 0.1, "max": 10.0, "step": 0.1}),
"normalize_output": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, function, offset, scale, parameter_a, parameter_b, normalize_output):
# Apply offset and scaling
x = sigmas * scale + offset
# Convert to numpy for special functions
x_np = x.cpu().numpy()
# Apply function
if function == "gamma":
# Gamma function Γ(x)
# For performance and stability, use scipy
result_np = np.zeros_like(x_np)
for i, xi in enumerate(x_np):
# Handle special cases
if xi <= 0 and xi == int(xi):
# Gamma has poles at non-positive integers
result_np[i] = float('inf')
else:
try:
result_np[i] = float(special.gamma(xi))
except (ValueError, OverflowError):
# Use approximation for large values
result_np[i] = float('inf')
elif function == "log_gamma":
# Log Gamma function log(Γ(x))
# More numerically stable for large values
result_np = np.zeros_like(x_np)
for i, xi in enumerate(x_np):
# Handle special cases
if xi <= 0 and xi == int(xi):
# log(Γ(x)) is undefined for non-positive integers
result_np[i] = float('inf')
else:
try:
result_np[i] = float(special.gammaln(xi))
except (ValueError, OverflowError):
# Use approximation for large values
result_np[i] = float('inf')
elif function == "beta":
# Beta function B(a, x)
result_np = np.zeros_like(x_np)
for i, xi in enumerate(x_np):
try:
result_np[i] = float(special.beta(parameter_a, xi))
except (ValueError, OverflowError):
# Handle cases where beta is undefined
result_np[i] = float('inf')
elif function == "incomplete_gamma":
# Regularized incomplete gamma function P(a, x)
result_np = np.zeros_like(x_np)
for i, xi in enumerate(x_np):
if xi < 0:
# Undefined for negative x
result_np[i] = 0.0
else:
try:
result_np[i] = float(special.gammainc(parameter_a, xi))
except (ValueError, OverflowError):
result_np[i] = 1.0 # Approach 1 for large x
elif function == "incomplete_beta":
# Regularized incomplete beta function I(x; a, b)
result_np = np.zeros_like(x_np)
for i, xi in enumerate(x_np):
# Clamp to [0,1] for domain of incomplete beta
xi_clamped = min(max(xi, 0), 1)
try:
result_np[i] = float(special.betainc(parameter_a, parameter_b, xi_clamped))
except (ValueError, OverflowError):
result_np[i] = 0.5 # Default for errors
# Convert back to tensor
result = torch.tensor(result_np, device=sigmas.device, dtype=sigmas.dtype)
# Replace infinities and NaNs
result = torch.where(torch.isfinite(result), result, torch.sign(result) * 1e10)
# Normalize output if requested
if normalize_output:
# Handle cases where result has infinities
if torch.isinf(result).any() or torch.isnan(result).any():
# Replace inf/nan with max/min finite values
max_val = torch.max(result[torch.isfinite(result)]) if torch.any(torch.isfinite(result)) else 1e10
min_val = torch.min(result[torch.isfinite(result)]) if torch.any(torch.isfinite(result)) else -1e10
result = torch.where(torch.isinf(result) & (result > 0), max_val, result)
result = torch.where(torch.isinf(result) & (result < 0), min_val, result)
result = torch.where(torch.isnan(result), (max_val + min_val) / 2, result)
# Now normalize
result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
return (result,)
# ----- Sigma Lerp -----
class sigmas_lerp:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_a": ("SIGMAS", {"forceInput": True}),
"sigmas_b": ("SIGMAS", {"forceInput": True}),
"t": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}),
"ensure_length": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas_a, sigmas_b, t, ensure_length):
if ensure_length and len(sigmas_a) != len(sigmas_b):
# Resize the smaller one to match the larger one
if len(sigmas_a) < len(sigmas_b):
sigmas_a = torch.nn.functional.interpolate(
sigmas_a.unsqueeze(0).unsqueeze(0),
size=len(sigmas_b),
mode='linear'
).squeeze(0).squeeze(0)
else:
sigmas_b = torch.nn.functional.interpolate(
sigmas_b.unsqueeze(0).unsqueeze(0),
size=len(sigmas_a),
mode='linear'
).squeeze(0).squeeze(0)
return ((1 - t) * sigmas_a + t * sigmas_b,)
# ----- Sigma InvLerp -----
class sigmas_invlerp:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"min_value": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"max_value": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, min_value, max_value):
# Clamp values to avoid division by zero
if min_value == max_value:
max_value = min_value + 1e-5
normalized = (sigmas - min_value) / (max_value - min_value)
# Clamp the values to be in [0, 1]
normalized = torch.clamp(normalized, 0.0, 1.0)
return (normalized,)
# ----- Sigma ArcSine -----
class sigmas_arcsine:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"normalize_input": ("BOOLEAN", {"default": True}),
"scale_output": ("BOOLEAN", {"default": True}),
"out_min": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"out_max": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, normalize_input, scale_output, out_min, out_max):
if normalize_input:
sigmas = torch.clamp(sigmas, -1.0, 1.0)
else:
# Ensure values are in valid arcsin domain
sigmas = torch.clamp(sigmas, -1.0, 1.0)
result = torch.asin(sigmas)
if scale_output:
# ArcSine output is in range [-π/2, π/2]
# Normalize to [0, 1] and then scale to [out_min, out_max]
result = (result + math.pi/2) / math.pi
result = result * (out_max - out_min) + out_min
return (result,)
# ----- Sigma LinearSine -----
class sigmas_linearsine:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"amplitude": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 10.0, "step": 0.01}),
"frequency": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
"phase": ("FLOAT", {"default": 0.0, "min": -6.28, "max": 6.28, "step": 0.01}), # -2π to 2π
"linear_weight": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, amplitude, frequency, phase, linear_weight):
# Create indices for the sine function
indices = torch.linspace(0, 1, len(sigmas), device=sigmas.device)
# Calculate sine component
sine_component = amplitude * torch.sin(2 * math.pi * frequency * indices + phase)
# Blend linear and sine components
step_indices = torch.linspace(0, 1, len(sigmas), device=sigmas.device)
result = linear_weight * sigmas + (1 - linear_weight) * (step_indices.unsqueeze(0) * sine_component)
return (result.squeeze(0),)
# ----- Sigmas Append -----
class sigmas_append:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"value": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"count": ("INT", {"default": 1, "min": 1, "max": 100, "step": 1})
},
"optional": {
"additional_sigmas": ("SIGMAS", {"forceInput": False})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, value, count, additional_sigmas=None):
# Create tensor of the value to append
append_values = torch.full((count,), value, device=sigmas.device, dtype=sigmas.dtype)
# Append the values
result = torch.cat([sigmas, append_values], dim=0)
# If additional sigmas provided, append those as well
if additional_sigmas is not None:
result = torch.cat([result, additional_sigmas], dim=0)
return (result,)
# ----- Sigma Arccosine -----
class sigmas_arccosine:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"normalize_input": ("BOOLEAN", {"default": True}),
"scale_output": ("BOOLEAN", {"default": True}),
"out_min": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"out_max": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, normalize_input, scale_output, out_min, out_max):
if normalize_input:
sigmas = torch.clamp(sigmas, -1.0, 1.0)
else:
# Ensure values are in valid arccos domain
sigmas = torch.clamp(sigmas, -1.0, 1.0)
result = torch.acos(sigmas)
if scale_output:
# ArcCosine output is in range [0, π]
# Normalize to [0, 1] and then scale to [out_min, out_max]
result = result / math.pi
result = result * (out_max - out_min) + out_min
return (result,)
# ----- Sigma Arctangent -----
class sigmas_arctangent:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"scale_output": ("BOOLEAN", {"default": True}),
"out_min": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"out_max": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, scale_output, out_min, out_max):
result = torch.atan(sigmas)
if scale_output:
# ArcTangent output is in range [-π/2, π/2]
# Normalize to [0, 1] and then scale to [out_min, out_max]
result = (result + math.pi/2) / math.pi
result = result * (out_max - out_min) + out_min
return (result,)
# ----- Sigma CrossProduct -----
class sigmas_crossproduct:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_a": ("SIGMAS", {"forceInput": True}),
"sigmas_b": ("SIGMAS", {"forceInput": True}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas_a, sigmas_b):
# Ensure we have at least 3 elements in each tensor
# If not, pad with zeros or truncate
if len(sigmas_a) < 3:
sigmas_a = torch.nn.functional.pad(sigmas_a, (0, 3 - len(sigmas_a)))
if len(sigmas_b) < 3:
sigmas_b = torch.nn.functional.pad(sigmas_b, (0, 3 - len(sigmas_b)))
# Take the first 3 elements of each tensor
a = sigmas_a[:3]
b = sigmas_b[:3]
# Compute cross product
c = torch.zeros(3, device=sigmas_a.device, dtype=sigmas_a.dtype)
c[0] = a[1] * b[2] - a[2] * b[1]
c[1] = a[2] * b[0] - a[0] * b[2]
c[2] = a[0] * b[1] - a[1] * b[0]
return (c,)
# ----- Sigma DotProduct -----
class sigmas_dotproduct:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas_a": ("SIGMAS", {"forceInput": True}),
"sigmas_b": ("SIGMAS", {"forceInput": True}),
"normalize": ("BOOLEAN", {"default": False})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas_a, sigmas_b, normalize):
# Ensure equal lengths by taking the minimum
min_length = min(len(sigmas_a), len(sigmas_b))
a = sigmas_a[:min_length]
b = sigmas_b[:min_length]
if normalize:
a_norm = torch.norm(a)
b_norm = torch.norm(b)
# Avoid division by zero
if a_norm > 0 and b_norm > 0:
a = a / a_norm
b = b / b_norm
# Compute dot product
result = torch.sum(a * b)
# Return as a single-element tensor
return (torch.tensor([result], device=sigmas_a.device, dtype=sigmas_a.dtype),)
# ----- Sigma Fmod -----
class sigmas_fmod:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"divisor": ("FLOAT", {"default": 1.0, "min": 0.0001, "max": 10000.0, "step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, divisor):
# Ensure divisor is not zero
if divisor == 0:
divisor = 0.0001
result = torch.fmod(sigmas, divisor)
return (result,)
# ----- Sigma Frac -----
class sigmas_frac:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas):
# Get the fractional part (x - floor(x))
result = sigmas - torch.floor(sigmas)
return (result,)
# ----- Sigma If -----
class sigmas_if:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"condition_sigmas": ("SIGMAS", {"forceInput": True}),
"true_sigmas": ("SIGMAS", {"forceInput": True}),
"false_sigmas": ("SIGMAS", {"forceInput": True}),
"threshold": ("FLOAT", {"default": 0.5, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"comp_type": (["greater", "less", "equal", "not_equal"], {"default": "greater"})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, condition_sigmas, true_sigmas, false_sigmas, threshold, comp_type):
# Make sure we have values to compare
max_length = max(len(condition_sigmas), len(true_sigmas), len(false_sigmas))
# Extend all tensors to the maximum length using interpolation
if len(condition_sigmas) != max_length:
condition_sigmas = torch.nn.functional.interpolate(
condition_sigmas.unsqueeze(0).unsqueeze(0),
size=max_length,
mode='linear'
).squeeze(0).squeeze(0)
if len(true_sigmas) != max_length:
true_sigmas = torch.nn.functional.interpolate(
true_sigmas.unsqueeze(0).unsqueeze(0),
size=max_length,
mode='linear'
).squeeze(0).squeeze(0)
if len(false_sigmas) != max_length:
false_sigmas = torch.nn.functional.interpolate(
false_sigmas.unsqueeze(0).unsqueeze(0),
size=max_length,
mode='linear'
).squeeze(0).squeeze(0)
# Create mask based on comparison type
if comp_type == "greater":
mask = condition_sigmas > threshold
elif comp_type == "less":
mask = condition_sigmas < threshold
elif comp_type == "equal":
mask = torch.isclose(condition_sigmas, torch.tensor(threshold, device=condition_sigmas.device))
elif comp_type == "not_equal":
mask = ~torch.isclose(condition_sigmas, torch.tensor(threshold, device=condition_sigmas.device))
# Apply the mask to select values
result = torch.where(mask, true_sigmas, false_sigmas)
return (result,)
# ----- Sigma Logarithm2 -----
class sigmas_logarithm2:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"handle_negative": ("BOOLEAN", {"default": True}),
"epsilon": ("FLOAT", {"default": 1e-10, "min": 1e-15, "max": 0.1, "step": 1e-10})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, handle_negative, epsilon):
if handle_negative:
# For negative values, compute -log2(-x) and negate the result
mask_negative = sigmas < 0
mask_positive = ~mask_negative
# Prepare positive and negative parts
pos_part = torch.log2(torch.clamp(sigmas[mask_positive], min=epsilon))
neg_part = -torch.log2(torch.clamp(-sigmas[mask_negative], min=epsilon))
# Create result tensor
result = torch.zeros_like(sigmas)
result[mask_positive] = pos_part
result[mask_negative] = neg_part
else:
# Simply compute log2, clamping values to avoid log(0)
result = torch.log2(torch.clamp(sigmas, min=epsilon))
return (result,)
# ----- Sigma SmoothStep -----
class sigmas_smoothstep:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"edge0": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"edge1": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}),
"mode": (["smoothstep", "smootherstep"], {"default": "smoothstep"})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, edge0, edge1, mode):
# Normalize the values to the range [0, 1]
t = torch.clamp((sigmas - edge0) / (edge1 - edge0), 0.0, 1.0)
if mode == "smoothstep":
# Smooth step: 3t^2 - 2t^3
result = t * t * (3.0 - 2.0 * t)
else: # smootherstep
# Smoother step: 6t^5 - 15t^4 + 10t^3
result = t * t * t * (t * (t * 6.0 - 15.0) + 10.0)
# Scale back to the original range
result = result * (edge1 - edge0) + edge0
return (result,)
# ----- Sigma SquareRoot -----
class sigmas_squareroot:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"handle_negative": ("BOOLEAN", {"default": False})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, handle_negative):
if handle_negative:
# For negative values, compute sqrt(-x) and negate the result
mask_negative = sigmas < 0
mask_positive = ~mask_negative
# Prepare positive and negative parts
pos_part = torch.sqrt(sigmas[mask_positive])
neg_part = -torch.sqrt(-sigmas[mask_negative])
# Create result tensor
result = torch.zeros_like(sigmas)
result[mask_positive] = pos_part
result[mask_negative] = neg_part
else:
# Only compute square root for non-negative values
# Negative values will be set to 0
result = torch.sqrt(torch.clamp(sigmas, min=0))
return (result,)
# ----- Sigma TimeStep -----
class sigmas_timestep:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"dt": ("FLOAT", {"default": 0.1, "min": 0.0001, "max": 10.0, "step": 0.01}),
"scaling": (["linear", "quadratic", "sqrt", "log"], {"default": "linear"}),
"decay": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, dt, scaling, decay):
# Create time steps
timesteps = torch.arange(len(sigmas), device=sigmas.device, dtype=sigmas.dtype) * dt
# Apply scaling
if scaling == "quadratic":
timesteps = timesteps ** 2
elif scaling == "sqrt":
timesteps = torch.sqrt(timesteps)
elif scaling == "log":
# Add small epsilon to avoid log(0)
timesteps = torch.log(timesteps + 1e-10)
# Apply decay
if decay > 0:
decay_factor = torch.exp(-decay * timesteps)
timesteps = timesteps * decay_factor
# Normalize to match the range of sigmas
timesteps = ((timesteps - timesteps.min()) /
(timesteps.max() - timesteps.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
return (timesteps,)
class sigmas_gaussian_cdf:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"mu": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.01}),
"sigma": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}),
"normalize_output": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, mu, sigma, normalize_output):
# Apply Gaussian CDF transformation
result = 0.5 * (1 + torch.erf((sigmas - mu) / (sigma * math.sqrt(2))))
# Normalize output if requested
if normalize_output:
result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
return (result,)
class sigmas_stepwise_multirate:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 30, "min": 1, "max": 1000, "step": 1}),
"rates": ("STRING", {"default": "1.0,0.5,0.25", "multiline": False}),
"boundaries": ("STRING", {"default": "0.3,0.7", "multiline": False}),
"start_value": ("FLOAT", {"default": 10.0, "min": 0.0, "max": 100.0, "step": 0.1}),
"end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 100.0, "step": 0.01}),
"pad_end": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, steps, rates, boundaries, start_value, end_value, pad_end):
# Parse rates and boundaries
rates_list = [float(r) for r in rates.split(',')]
if len(rates_list) < 1:
rates_list = [1.0]
boundaries_list = [float(b) for b in boundaries.split(',')]
if len(boundaries_list) != len(rates_list) - 1:
# Create equal size segments if boundaries don't match rates
boundaries_list = [i / len(rates_list) for i in range(1, len(rates_list))]
# Convert boundaries to step indices
boundary_indices = [int(b * steps) for b in boundaries_list]
# Create steps array
result = torch.zeros(steps)
# Fill segments with different rates
current_idx = 0
for i, rate in enumerate(rates_list):
next_idx = boundary_indices[i] if i < len(boundary_indices) else steps
segment_length = next_idx - current_idx
if segment_length <= 0:
continue
segment_start = start_value if i == 0 else result[current_idx-1]
segment_end = end_value if i == len(rates_list) - 1 else start_value * (1 - boundaries_list[i])
# Apply rate to the segment
t = torch.linspace(0, 1, segment_length)
segment = segment_start + (segment_end - segment_start) * (t ** rate)
result[current_idx:next_idx] = segment
current_idx = next_idx
# Add padding zero at the end if requested
if pad_end:
result = torch.cat([result, torch.tensor([0.0])])
return (result,)
class sigmas_harmonic_decay:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 30, "min": 1, "max": 1000, "step": 1}),
"start_value": ("FLOAT", {"default": 10.0, "min": 0.0, "max": 100.0, "step": 0.1}),
"end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 100.0, "step": 0.01}),
"harmonic_offset": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10.0, "step": 0.01}),
"decay_rate": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}),
"pad_end": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, steps, start_value, end_value, harmonic_offset, decay_rate, pad_end):
# Create harmonic series: 1/(n+offset)^rate
n = torch.arange(1, steps + 1, dtype=torch.float32)
harmonic_values = 1.0 / torch.pow(n + harmonic_offset, decay_rate)
# Normalize to [0, 1]
normalized = (harmonic_values - harmonic_values.min()) / (harmonic_values.max() - harmonic_values.min())
# Scale to [end_value, start_value] and reverse (higher values first)
result = start_value - (start_value - end_value) * normalized
result = torch.flip(result, [0])
# Add padding zero at the end if requested
if pad_end:
result = torch.cat([result, torch.tensor([0.0])])
return (result,)
class sigmas_adaptive_noise_floor:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"min_noise_level": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 1.0, "step": 0.001}),
"adaptation_factor": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}),
"window_size": ("INT", {"default": 3, "min": 1, "max": 10, "step": 1})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, min_noise_level, adaptation_factor, window_size):
# Initialize result with original sigmas
result = sigmas.clone()
# Apply adaptive noise floor
for i in range(window_size, len(sigmas)):
# Calculate local statistics in the window
window = sigmas[i-window_size:i]
local_mean = torch.mean(window)
local_var = torch.var(window)
# Adapt the noise floor based on local statistics
adaptive_floor = min_noise_level + adaptation_factor * local_var / (local_mean + 1e-6)
# Apply the floor if needed
if result[i] < adaptive_floor:
result[i] = adaptive_floor
return (result,)
class sigmas_collatz_iteration:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"iterations": ("INT", {"default": 3, "min": 1, "max": 20, "step": 1}),
"scaling_factor": ("FLOAT", {"default": 0.1, "min": 0.0001, "max": 10.0, "step": 0.01}),
"normalize_output": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, iterations, scaling_factor, normalize_output):
# Scale input to reasonable range for Collatz
scaled_input = sigmas * scaling_factor
# Apply Collatz iterations
result = scaled_input.clone()
for _ in range(iterations):
# Create masks for even and odd values
even_mask = (result % 2 == 0)
odd_mask = ~even_mask
# Apply Collatz function: n/2 for even, 3n+1 for odd
result[even_mask] = result[even_mask] / 2
result[odd_mask] = 3 * result[odd_mask] + 1
# Normalize output if requested
if normalize_output:
result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min()
return (result,)
class sigmas_conway_sequence:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 20, "min": 1, "max": 50, "step": 1}),
"sequence_type": (["look_and_say", "audioactive", "paperfolding", "thue_morse"], {"default": "look_and_say"}),
"normalize_range": ("BOOLEAN", {"default": True}),
"min_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01}),
"max_value": ("FLOAT", {"default": 10.0, "min": 0.0, "max": 50.0, "step": 0.1})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, steps, sequence_type, normalize_range, min_value, max_value):
if sequence_type == "look_and_say":
# Start with "1"
s = "1"
lengths = [1] # Length of first term is 1
# Generate look-and-say sequence
for _ in range(min(steps - 1, 25)): # Limit to prevent excessive computation
next_s = ""
i = 0
while i < len(s):
count = 1
while i + 1 < len(s) and s[i] == s[i + 1]:
i += 1
count += 1
next_s += str(count) + s[i]
i += 1
s = next_s
lengths.append(len(s))
# Convert to tensor
result = torch.tensor(lengths, dtype=torch.float32)
elif sequence_type == "audioactive":
# Audioactive sequence (similar to look-and-say but counts digits)
a = [1]
for _ in range(min(steps - 1, 30)):
b = []
digit_count = {}
for digit in a:
digit_count[digit] = digit_count.get(digit, 0) + 1
for digit in sorted(digit_count.keys()):
b.append(digit_count[digit])
b.append(digit)
a = b
result = torch.tensor(a, dtype=torch.float32)
if len(result) > steps:
result = result[:steps]
elif sequence_type == "paperfolding":
# Paper folding sequence (dragon curve)
sequence = []
for i in range(min(steps, 30)):
sequence.append(1 if (i & (i + 1)) % 2 == 0 else 0)
result = torch.tensor(sequence, dtype=torch.float32)
elif sequence_type == "thue_morse":
# Thue-Morse sequence
sequence = [0]
while len(sequence) < steps:
sequence.extend([1 - x for x in sequence])
result = torch.tensor(sequence, dtype=torch.float32)[:steps]
# Normalize to desired range
if normalize_range:
if result.max() > result.min():
result = (result - result.min()) / (result.max() - result.min())
result = result * (max_value - min_value) + min_value
else:
result = torch.ones_like(result) * min_value
return (result,)
class sigmas_gilbreath_sequence:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 30, "min": 10, "max": 100, "step": 1}),
"levels": ("INT", {"default": 3, "min": 1, "max": 10, "step": 1}),
"normalize_range": ("BOOLEAN", {"default": True}),
"min_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01}),
"max_value": ("FLOAT", {"default": 10.0, "min": 0.0, "max": 50.0, "step": 0.1})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, steps, levels, normalize_range, min_value, max_value):
# Generate first few prime numbers
def sieve_of_eratosthenes(limit):
sieve = [True] * (limit + 1)
sieve[0] = sieve[1] = False
for i in range(2, int(limit**0.5) + 1):
if sieve[i]:
for j in range(i*i, limit + 1, i):
sieve[j] = False
return [i for i in range(limit + 1) if sieve[i]]
# Get primes
primes = sieve_of_eratosthenes(steps * 6) # Get enough primes
primes = primes[:steps]
# Generate Gilbreath sequence levels
sequences = [primes]
for level in range(1, levels):
prev_seq = sequences[level-1]
new_seq = [abs(prev_seq[i] - prev_seq[i+1]) for i in range(len(prev_seq)-1)]
sequences.append(new_seq)
# Select the requested level
selected_level = min(levels-1, len(sequences)-1)
result_list = sequences[selected_level]
# Ensure we have enough values
while len(result_list) < steps:
result_list.append(1) # Gilbreath conjecture: eventually all 1s
# Convert to tensor
result = torch.tensor(result_list[:steps], dtype=torch.float32)
# Normalize to desired range
if normalize_range:
if result.max() > result.min():
result = (result - result.min()) / (result.max() - result.min())
result = result * (max_value - min_value) + min_value
else:
result = torch.ones_like(result) * min_value
return (result,)
class sigmas_cnf_inverse:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS", {"forceInput": True}),
"time_steps": ("INT", {"default": 20, "min": 5, "max": 100, "step": 1}),
"flow_type": (["linear", "quadratic", "sigmoid", "exponential"], {"default": "sigmoid"}),
"reverse": ("BOOLEAN", {"default": True})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, sigmas, time_steps, flow_type, reverse):
# Create normalized time steps
t = torch.linspace(0, 1, time_steps)
# Apply CNF flow transformation
if flow_type == "linear":
flow = t
elif flow_type == "quadratic":
flow = t**2
elif flow_type == "sigmoid":
flow = 1 / (1 + torch.exp(-10 * (t - 0.5)))
elif flow_type == "exponential":
flow = torch.exp(3 * t) - 1
flow = flow / flow.max() # Normalize to [0,1]
# Reverse flow if requested
if reverse:
flow = 1 - flow
# Interpolate sigmas according to flow
# First normalize sigmas to [0,1] for interpolation
normalized_sigmas = (sigmas - sigmas.min()) / (sigmas.max() - sigmas.min())
# Create indices for interpolation
indices = flow * (len(sigmas) - 1)
# Linear interpolation
result = torch.zeros(time_steps, device=sigmas.device, dtype=sigmas.dtype)
for i in range(time_steps):
idx_low = int(indices[i])
idx_high = min(idx_low + 1, len(sigmas) - 1)
frac = indices[i] - idx_low
result[i] = (1 - frac) * normalized_sigmas[idx_low] + frac * normalized_sigmas[idx_high]
# Scale back to original sigma range
result = result * (sigmas.max() - sigmas.min()) + sigmas.min()
return (result,)
class sigmas_riemannian_flow:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 30, "min": 5, "max": 100, "step": 1}),
"metric_type": (["euclidean", "hyperbolic", "spherical", "lorentzian"], {"default": "hyperbolic"}),
"curvature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}),
"start_value": ("FLOAT", {"default": 10.0, "min": 0.1, "max": 50.0, "step": 0.1}),
"end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, steps, metric_type, curvature, start_value, end_value):
# Create parameter t in [0, 1]
t = torch.linspace(0, 1, steps)
# Apply different Riemannian metrics
if metric_type == "euclidean":
# Simple linear interpolation in Euclidean space
result = start_value * (1 - t) + end_value * t
elif metric_type == "hyperbolic":
# Hyperbolic space geodesic
K = -curvature # Negative curvature for hyperbolic space
# Convert to hyperbolic coordinates (using Poincaré disk model)
x_start = torch.tanh(start_value / 2)
x_end = torch.tanh(end_value / 2)
# Distance in hyperbolic space
d = torch.acosh(1 + 2 * ((x_start - x_end)**2) / ((1 - x_start**2) * (1 - x_end**2)))
# Geodesic interpolation
lambda_t = torch.sinh(t * d) / torch.sinh(d)
result = 2 * torch.atanh((1 - lambda_t) * x_start + lambda_t * x_end)
elif metric_type == "spherical":
# Spherical space geodesic (great circle)
K = curvature # Positive curvature for spherical space
# Convert to angular coordinates
theta_start = start_value * torch.sqrt(K)
theta_end = end_value * torch.sqrt(K)
# Geodesic interpolation along great circle
result = torch.sin((1 - t) * theta_start + t * theta_end) / torch.sqrt(K)
elif metric_type == "lorentzian":
# Lorentzian spacetime-inspired metric (time dilation effect)
gamma = 1 / torch.sqrt(1 - curvature * t**2) # Lorentz factor
result = start_value * (1 - t) + end_value * t
result = result * gamma # Apply time dilation
# Ensure the values are in the desired range
result = torch.clamp(result, min=min(start_value, end_value), max=max(start_value, end_value))
# Ensure result is decreasing if start_value > end_value
if start_value > end_value and result[0] < result[-1]:
result = torch.flip(result, [0])
return (result,)
class sigmas_langevin_dynamics:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 30, "min": 5, "max": 100, "step": 1}),
"start_value": ("FLOAT", {"default": 10.0, "min": 0.1, "max": 50.0, "step": 0.1}),
"end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01}),
"temperature": ("FLOAT", {"default": 0.5, "min": 0.01, "max": 10.0, "step": 0.01}),
"friction": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}),
"seed": ("INT", {"default": 42, "min": 0, "max": 99999, "step": 1})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, steps, start_value, end_value, temperature, friction, seed):
# Set random seed for reproducibility
torch.manual_seed(seed)
# Potential function (quadratic well centered at end_value)
def U(x):
return 0.5 * (x - end_value)**2
# Gradient of the potential
def grad_U(x):
return x - end_value
# Initialize state
x = torch.tensor([start_value], dtype=torch.float32)
v = torch.zeros(1) # Initial velocity
# Discretization parameters
dt = 1.0 / steps
sqrt_2dt = math.sqrt(2 * dt)
# Storage for trajectory
trajectory = [start_value]
# Langevin dynamics integration (velocity Verlet with Langevin thermostat)
for _ in range(steps - 1):
# Half step in velocity
v = v - dt * friction * v - dt * grad_U(x) / 2
# Full step in position
x = x + dt * v
# Random force (thermal noise)
noise = torch.randn(1) * sqrt_2dt * temperature
# Another half step in velocity with noise
v = v - dt * friction * v - dt * grad_U(x) / 2 + noise
# Store current position
trajectory.append(x.item())
# Convert to tensor
result = torch.tensor(trajectory, dtype=torch.float32)
# Ensure we reach the end value
result[-1] = end_value
return (result,)
class sigmas_persistent_homology:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 30, "min": 5, "max": 100, "step": 1}),
"start_value": ("FLOAT", {"default": 10.0, "min": 0.1, "max": 50.0, "step": 0.1}),
"end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01}),
"persistence_type": (["linear", "exponential", "logarithmic", "sigmoidal"], {"default": "exponential"}),
"birth_density": ("FLOAT", {"default": 0.3, "min": 0.0, "max": 1.0, "step": 0.01}),
"death_density": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0, "step": 0.01})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, steps, start_value, end_value, persistence_type, birth_density, death_density):
# Basic filtration function (linear by default)
t = torch.linspace(0, 1, steps)
# Persistence diagram simulation
# Create birth and death times
birth_points = int(steps * birth_density)
death_points = int(steps * death_density)
# Filtration function based on selected type
if persistence_type == "linear":
filtration = t
elif persistence_type == "exponential":
filtration = 1 - torch.exp(-5 * t)
elif persistence_type == "logarithmic":
filtration = torch.log(1 + 9 * t) / torch.log(torch.tensor([10.0]))
elif persistence_type == "sigmoidal":
filtration = 1 / (1 + torch.exp(-10 * (t - 0.5)))
# Generate birth-death pairs
birth_indices = torch.linspace(0, steps // 2, birth_points).long()
death_indices = torch.linspace(steps // 2, steps - 1, death_points).long()
# Create persistence barcode
barcode = torch.zeros(steps)
for b_idx in birth_indices:
for d_idx in death_indices:
if b_idx < d_idx:
# Add a persistence feature from birth to death
barcode[b_idx:d_idx] += 1
# Normalize and weight the barcode
if barcode.max() > 0:
barcode = barcode / barcode.max()
# Modulate the filtration function with the persistence barcode
result = filtration * (0.7 + 0.3 * barcode)
# Scale to desired range
result = start_value + (end_value - start_value) * result
return (result,)
class sigmas_normalizing_flows:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"steps": ("INT", {"default": 30, "min": 5, "max": 100, "step": 1}),
"start_value": ("FLOAT", {"default": 10.0, "min": 0.1, "max": 50.0, "step": 0.1}),
"end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01}),
"flow_type": (["affine", "planar", "radial", "realnvp"], {"default": "realnvp"}),
"num_transforms": ("INT", {"default": 3, "min": 1, "max": 10, "step": 1}),
"seed": ("INT", {"default": 42, "min": 0, "max": 99999, "step": 1})
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS",)
CATEGORY = "RES4LYF/sigmas"
def main(self, steps, start_value, end_value, flow_type, num_transforms, seed):
# Set random seed for reproducibility
torch.manual_seed(seed)
# Create base linear schedule from start_value to end_value
base_schedule = torch.linspace(start_value, end_value, steps)
# Apply different normalizing flow transformations
if flow_type == "affine":
# Affine transformation: f(x) = a*x + b
result = base_schedule.clone()
for _ in range(num_transforms):
a = torch.rand(1) * 0.5 + 0.75 # Scale in [0.75, 1.25]
b = (torch.rand(1) - 0.5) * 0.2 # Shift in [-0.1, 0.1]
result = a * result + b
elif flow_type == "planar":
# Planar flow: f(x) = x + u * tanh(w * x + b)
result = base_schedule.clone()
for _ in range(num_transforms):
u = torch.rand(1) * 0.4 - 0.2 # in [-0.2, 0.2]
w = torch.rand(1) * 2 - 1 # in [-1, 1]
b = torch.rand(1) * 0.2 - 0.1 # in [-0.1, 0.1]
result = result + u * torch.tanh(w * result + b)
elif flow_type == "radial":
# Radial flow: f(x) = x + beta * (x - x0) / (alpha + |x - x0|)
result = base_schedule.clone()
for _ in range(num_transforms):
# Pick a random reference point within the range
idx = torch.randint(0, steps, (1,))
x0 = result[idx]
alpha = torch.rand(1) * 0.5 + 0.5 # in [0.5, 1.0]
beta = torch.rand(1) * 0.4 - 0.2 # in [-0.2, 0.2]
# Apply radial flow
diff = result - x0
r = torch.abs(diff)
result = result + beta * diff / (alpha + r)
elif flow_type == "realnvp":
# Simplified RealNVP-inspired flow with masking
result = base_schedule.clone()
for _ in range(num_transforms):
# Create alternating mask
mask = torch.zeros(steps)
mask[::2] = 1 # Mask even indices
# Generate scale and shift parameters
log_scale = torch.rand(steps) * 0.2 - 0.1 # in [-0.1, 0.1]
shift = torch.rand(steps) * 0.2 - 0.1 # in [-0.1, 0.1]
# Apply affine coupling transformation
scale = torch.exp(log_scale * mask)
masked_shift = shift * mask
# Transform
result = result * scale + masked_shift
# Rescale to ensure we maintain start_value and end_value
if result[0] != start_value or result[-1] != end_value:
result = (result - result[0]) / (result[-1] - result[0]) * (end_value - start_value) + start_value
return (result,)
class sigmas_split_value:
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"sigmas": ("SIGMAS",),
"split_value": ("FLOAT", {"default": 0.875, "min": 0.0, "max": 80085.0, "step": 0.001}),
"bias_split_up": ("BOOLEAN", {"default": False, "tooltip": "If True, split happens above the split value, so high_sigmas includes the split point."}),
}
}
FUNCTION = "main"
RETURN_TYPES = ("SIGMAS", "SIGMAS")
RETURN_NAMES = ("high_sigmas", "low_sigmas")
CATEGORY = "RES4LYF/sigmas"
DESCRIPTION = ("Splits sigma schedule at a specific sigma value.")
def main(self, sigmas, split_value, bias_split_up):
if len(sigmas) == 0:
return (sigmas, sigmas)
# Find the split index
if bias_split_up:
# Find first sigma <= split_value
split_idx = None
for i, sigma in enumerate(sigmas):
if sigma <= split_value:
split_idx = i
break
if split_idx is None:
# All sigmas are above split_value
return (sigmas, torch.tensor([], device=sigmas.device, dtype=sigmas.dtype))
# high_sigmas: from start to split_idx (inclusive)
# low_sigmas: from split_idx to end
high_sigmas = sigmas[:split_idx + 1]
low_sigmas = sigmas[split_idx:]
else:
# Find first sigma < split_value
split_idx = None
for i, sigma in enumerate(sigmas):
if sigma < split_value:
split_idx = i
break
if split_idx is None:
# All sigmas are >= split_value
return (torch.tensor([], device=sigmas.device, dtype=sigmas.dtype), sigmas)
# high_sigmas: from start to split_idx (exclusive)
# low_sigmas: from split_idx-1 to end (includes the boundary point)
high_sigmas = sigmas[:split_idx]
low_sigmas = sigmas[split_idx - 1:]
return (high_sigmas, low_sigmas)
def get_bong_tangent_sigmas(steps, slope, pivot, start, end):
smax = ((2/pi)*atan(-slope*(0-pivot))+1)/2
smin = ((2/pi)*atan(-slope*((steps-1)-pivot))+1)/2
srange = smax-smin
sscale = start - end
sigmas = [ ( (((2/pi)*atan(-slope*(x-pivot))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)]
return sigmas
def bong_tangent_scheduler(model_sampling, steps, start=1.0, middle=0.5, end=0.0, pivot_1=0.6, pivot_2=0.6, slope_1=0.2, slope_2=0.2, pad=False):
steps += 2
midpoint = int( (steps*pivot_1 + steps*pivot_2) / 2 )
pivot_1 = int(steps * pivot_1)
pivot_2 = int(steps * pivot_2)
slope_1 = slope_1 / (steps/40)
slope_2 = slope_2 / (steps/40)
stage_2_len = steps - midpoint
stage_1_len = steps - stage_2_len
tan_sigmas_1 = get_bong_tangent_sigmas(stage_1_len, slope_1, pivot_1, start, middle)
tan_sigmas_2 = get_bong_tangent_sigmas(stage_2_len, slope_2, pivot_2 - stage_1_len, middle, end)
tan_sigmas_1 = tan_sigmas_1[:-1]
if pad:
tan_sigmas_2 = tan_sigmas_2+[0]
tan_sigmas = torch.tensor(tan_sigmas_1 + tan_sigmas_2)
return tan_sigmas
================================================
FILE: style_transfer.py
================================================
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch import Tensor, FloatTensor
from typing import Optional, Callable, Tuple, Dict, List, Any, Union
import einops
from einops import rearrange
import copy
import comfy
from .latents import gaussian_blur_2d, median_blur_2d
# WIP... not yet in use...
class StyleTransfer:
def __init__(self,
style_method = "WCT",
embedder_method = None,
patch_size = 1,
pinv_dtype = torch.float64,
dtype = torch.float64,
):
self.style_method = style_method
self.embedder_method = None
self.unembedder_method = None
if embedder_method is not None:
self.set_embedder_method(embedder_method)
self.patch_size = patch_size
#if embedder_type == "conv2d":
# self.unembedder = self.invert_conv2d
self.pinv_dtype = pinv_dtype
self.dtype = dtype
self.patchify = None
self.unpatchify = None
self.orig_shape = None
self.grid_sizes = None
#self.x_embed_ndim = 0
def set_patchify_method(self, patchify_method=None):
self.patchify_method = patchify_method
def set_unpatchify_method(self, unpatchify_method=None):
self.unpatchify_method = unpatchify_method
def set_embedder_method(self, embedder_method):
self.embedder_method = copy.deepcopy(embedder_method).to(self.pinv_dtype)
self.W = self.embedder_method.weight
self.B = self.embedder_method.bias
if isinstance(embedder_method, nn.Linear):
self.unembedder_method = self.invert_linear
elif isinstance(embedder_method, nn.Conv2d):
self.unembedder_method = self.invert_conv2d
elif isinstance(embedder_method, nn.Conv3d):
self.unembedder_method = self.invert_conv3d
def set_patch_size(self, patch_size):
self.patch_size = patch_size
def unpatchify(self, x: Tensor) -> List[Tensor]:
x_arr = []
for i, img_size in enumerate(self.img_sizes): # [[64,64]] , img_sizes: List[Tuple[int, int]]
pH, pW = img_size
x_arr.append(
einops.rearrange(x[i, :pH*pW].reshape(1, pH, pW, -1), 'B H W (p1 p2 C) -> B C (H p1) (W p2)',
p1=self.patch_size, p2=self.patch_size)
)
x = torch.cat(x_arr, dim=0)
return x
def patchify(self, x: Tensor):
x = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size))
pH, pW = x.shape[-2] // self.patch_size, x.shape[-1] // self.patch_size
self.img_sizes = [[pH, pW]] * x.shape[0]
x = einops.rearrange(x, 'B C (H p1) (W p2) -> B (H W) (p1 p2 C)', p1=self.patch_size, p2=self.patch_size)
return x
def embedder(self, x):
if isinstance(self.embedder_method, nn.Linear):
x = self.patchify(x)
self.orig_shape = x.shape
x = self.embedder_method(x)
self.grid_sizes = x.shape[2:]
#self.x_embed_ndim = x.ndim
#if x.ndim > 3:
# x = einops.rearrange(x, "B C H W -> B (H W) C")
return x
def unembedder(self, x):
#if self.x_embed_ndim > 3:
# x = einops.rearrange(x, "B (H W) C -> B C H W", W=self.orig_shape[-1])
x = self.unembedder_method(x)
return x
def invert_linear(self, x : torch.Tensor,) -> torch.Tensor:
x = x.to(self.pinv_dtype)
#x = (x - self.B.to(self.dtype)) @ torch.linalg.pinv(self.W.to(self.pinv_dtype)).T.to(self.dtype)
x = (x - self.B) @ torch.linalg.pinv(self.W).T
return x.to(self.dtype)
def invert_conv2d(self, z: torch.Tensor,) -> torch.Tensor:
z = z.to(self.pinv_dtype)
conv = self.embedder_method
B, C_in, H, W = self.orig_shape
C_out, _, kH, kW = conv.weight.shape
stride_h, stride_w = conv.stride
pad_h, pad_w = conv.padding
b = conv.bias.view(1, C_out, 1, 1).to(z)
z_nobias = z - b
W_flat = conv.weight.view(C_out, -1).to(z)
W_pinv = torch.linalg.pinv(W_flat)
Bz, Co, Hp, Wp = z_nobias.shape
z_flat = z_nobias.reshape(Bz, Co, -1)
x_patches = W_pinv @ z_flat
x_sum = F.fold(
x_patches,
output_size=(H + 2*pad_h, W + 2*pad_w),
kernel_size=(kH, kW),
stride=(stride_h, stride_w),
)
ones = torch.ones_like(x_patches)
count = F.fold(
ones,
output_size=(H + 2*pad_h, W + 2*pad_w),
kernel_size=(kH, kW),
stride=(stride_h, stride_w),
)
x_recon = x_sum / count.clamp(min=1e-6)
if pad_h > 0 or pad_w > 0:
x_recon = x_recon[..., pad_h:pad_h+H, pad_w:pad_w+W]
return x_recon.to(self.dtype)
def invert_conv3d(self, z: torch.Tensor, ) -> torch.Tensor:
z = z.to(self.pinv_dtype)
conv = self.embedder_method
grid_sizes = self.grid_sizes
B, C_in, D, H, W = self.orig_shape
pD, pH, pW = self.patch_size
sD, sH, sW = pD, pH, pW
if z.ndim == 3:
# [B, S, C_out] -> reshape to [B, C_out, D', H', W']
S = z.shape[1]
if grid_sizes is None:
Dp = D // pD
Hp = H // pH # getting actual patchified dims
Wp = W // pW
else:
Dp, Hp, Wp = grid_sizes
C_out = z.shape[2]
z = z.transpose(1, 2).reshape(B, C_out, Dp, Hp, Wp)
else:
B2, C_out, Dp, Hp, Wp = z.shape
assert B2 == B, "Batch size mismatch... ya sharked it."
b = conv.bias.view(1, C_out, 1, 1, 1) # need to kncokout bias to invert via weight
z_nobias = z - b
# 2D filter -> pinv
w3 = conv.weight # [C_out, C_in, 1, pH, pW]
w2 = w3.squeeze(2) # [C_out, C_in, pH, pW]
out_ch, in_ch, kH, kW = w2.shape
W_flat = w2.view(out_ch, -1) # [C_out, in_ch*pH*pW]
W_pinv = torch.linalg.pinv(W_flat) # [in_ch*pH*pW, C_out]
# merge depth for 2D unfold wackiness
z2 = z_nobias.permute(0,2,1,3,4).reshape(B*Dp, C_out, Hp, Wp)
# apply pinv ... get patch vectors
z_flat = z2.reshape(B*Dp, C_out, -1) # [B*Dp, C_out, L]
x_patches = W_pinv @ z_flat # [B*Dp, in_ch*pH*pW, L]
# fold -> restore spatial frames
x2 = F.fold(
x_patches,
output_size=(H, W),
kernel_size=(pH, pW),
stride=(sH, sW)
) # → [B*Dp, C_in, H, W]
# unmerge depth (de-depth charge)
x2 = x2.reshape(B, Dp, in_ch, H, W) # [B, Dp, C_in, H, W]
x_recon = x2.permute(0,2,1,3,4).contiguous() # [B, C_in, D, H, W]
return x_recon.to(self.dtype)
def adain_seq_inplace(self, content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
mean_c = content.mean(1, keepdim=True)
std_c = content.std (1, keepdim=True).add_(eps)
mean_s = style.mean (1, keepdim=True)
std_s = style.std (1, keepdim=True).add_(eps)
content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s)
return content
class StyleWCT:
def __init__(self, dtype=torch.float64, use_svd=False,):
self.dtype = dtype
self.use_svd = use_svd
self.y0_adain_embed = None
self.mu_s = None
self.y0_color = None
self.spatial_shape = None
def whiten(self, f_s_centered: torch.Tensor, set=False):
cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1)
if self.use_svd:
U_svd, S_svd, Vh_svd = torch.linalg.svd(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
S_eig = S_svd
U_eig = U_svd
else:
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
if set:
S_eig_root = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values
else:
S_eig_root = S_eig.clamp(min=0).rsqrt() # inverse square root
whiten = U_eig @ torch.diag(S_eig_root) @ U_eig.T
return whiten.to(f_s_centered)
def set(self, y0_adain_embed: torch.Tensor, spatial_shape=None):
if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
self.y0_adain_embed = y0_adain_embed.clone()
if spatial_shape is not None:
self.spatial_shape = spatial_shape
f_s = y0_adain_embed[0] # if y0_adain_embed.ndim > 4 else y0_adain_embed
self.mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - self.mu_s
self.y0_color = self.whiten(f_s_centered, set=True)
def get(self, denoised_embed: torch.Tensor):
for wct_i in range(denoised_embed.shape[0]):
f_c = denoised_embed[wct_i]
mu_c = f_c.mean(dim=0, keepdim=True)
f_c_centered = f_c - mu_c
whiten = self.whiten(f_c_centered)
f_c_whitened = f_c_centered @ whiten.T
f_cs = f_c_whitened @ self.y0_color.T + self.mu_s
denoised_embed[wct_i] = f_cs
return denoised_embed
class WaveletStyleWCT(StyleWCT):
def set(self, y0_adain_embed: torch.Tensor, h_len, w_len):
if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
self.y0_adain_embed = y0_adain_embed.clone()
B, HW, C = y0_adain_embed.shape
LL, _, _, _ = haar_wavelet_decompose(y0_adain_embed.contiguous().view(B, C, h_len, w_len))
B_LL, C_LL, H_LL, W_LL = LL.shape
#flat = rearrange(LL, 'b c h w -> b (h w) c')
flat = LL.contiguous().view(B_LL, H_LL * W_LL, C_LL)
f_s = flat[0] # assuming batch size 1 or using only the first
self.mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - self.mu_s
self.y0_color = self.whiten(f_s_centered, set=True)
#self.y0_adain_embed = flat # cache if needed
def get(self, denoised_embed: torch.Tensor, h_len, w_len, stylize_highfreq=False):
B, HW, C = denoised_embed.shape
denoised_embed = denoised_embed.contiguous().view(B, C, h_len, w_len)
for i in range(B):
x = denoised_embed[i:i+1] # [1, C, H, W]
LL, LH, HL, HH = haar_wavelet_decompose(x)
def process_band(band):
Bc, Cc, Hc, Wc = band.shape
flat = band.contiguous().view(Bc, Hc * Wc, Cc)
styled = super(WaveletStyleWCT, self).get(flat)
return styled.contiguous().view(Bc, Cc, Hc, Wc)
LL_styled = process_band(LL)
if stylize_highfreq:
LH_styled = process_band(LH)
HL_styled = process_band(HL)
HH_styled = process_band(HH)
else:
LH_styled, HL_styled, HH_styled = LH, HL, HH
recon = haar_wavelet_reconstruct(LL_styled, LH_styled, HL_styled, HH_styled)
denoised_embed[i] = recon.squeeze(0)
return denoised_embed.view(B, HW, C)
def haar_wavelet_decompose(x):
"""
Orthonormal Haar decomposition.
Input: [B, C, H, W]
Output: LL, LH, HL, HH with shape [B, C, H//2, W//2]
"""
if x.dtype != torch.float32:
x = x.float()
B, C, H, W = x.shape
assert H % 2 == 0 and W % 2 == 0, "Input must have even H, W"
# Precompute
norm = 1 / 2**0.5
x00 = x[:, :, 0::2, 0::2]
x01 = x[:, :, 0::2, 1::2]
x10 = x[:, :, 1::2, 0::2]
x11 = x[:, :, 1::2, 1::2]
LL = (x00 + x01 + x10 + x11) * norm * 0.5
LH = (x00 - x01 + x10 - x11) * norm * 0.5
HL = (x00 + x01 - x10 - x11) * norm * 0.5
HH = (x00 - x01 - x10 + x11) * norm * 0.5
return LL, LH, HL, HH
def haar_wavelet_reconstruct(LL, LH, HL, HH):
"""
Orthonormal inverse Haar reconstruction.
Input: LL, LH, HL, HH [B, C, H, W]
Output: Reconstructed [B, C, H*2, W*2]
"""
norm = 1 / 2**0.5
B, C, H, W = LL.shape
x00 = (LL + LH + HL + HH) * norm
x01 = (LL - LH + HL - HH) * norm
x10 = (LL + LH - HL - HH) * norm
x11 = (LL - LH - HL + HH) * norm
out = torch.zeros(B, C, H * 2, W * 2, device=LL.device, dtype=LL.dtype)
out[:, :, 0::2, 0::2] = x00
out[:, :, 0::2, 1::2] = x01
out[:, :, 1::2, 0::2] = x10
out[:, :, 1::2, 1::2] = x11
return out
"""
class StyleFeatures:
def __init__(self, dtype=torch.float64,):
self.dtype = dtype
def set(self, y0_adain_embed: torch.Tensor):
def get(self, denoised_embed: torch.Tensor):
return "Norpity McNerp"
"""
class Retrojector:
def __init__(self, proj=None, patch_size=2, pinv_dtype=torch.float64, dtype=torch.float64, ENDO=False):
self.proj = proj
self.patch_size = patch_size
self.pinv_dtype = pinv_dtype
self.dtype = dtype
self.LINEAR = isinstance(proj, nn.Linear)
self.CONV2D = isinstance(proj, nn.Conv2d)
self.CONV3D = isinstance(proj, nn.Conv3d)
self.ENDO = ENDO
self.W = proj.weight.data.to(dtype=pinv_dtype).cuda()
if self.LINEAR:
self.W_inv = torch.linalg.pinv(self.W.cuda())
elif self.CONV2D:
C_out, _, kH, kW = proj.weight.shape
W_flat = proj.weight.view(C_out, -1).to(dtype=pinv_dtype)
self.W_inv = torch.linalg.pinv(W_flat.cuda())
if proj.bias is None:
if self.LINEAR:
bias_size = proj.out_features
else:
bias_size = proj.out_channels
self.b = torch.zeros(bias_size, dtype=pinv_dtype, device=self.W_inv.device)
else:
self.b = proj.bias.data.to(dtype=pinv_dtype).to(self.W_inv.device)
def embed(self, img: torch.Tensor):
self.h = img.shape[-2] // self.patch_size
self.w = img.shape[-1] // self.patch_size
img = comfy.ldm.common_dit.pad_to_patch_size(img, (self.patch_size, self.patch_size))
if self.CONV2D:
self.orig_shape = img.shape # for unembed
img_embed = F.conv2d(
img.to(self.W),
weight=self.W,
bias=self.b,
stride=self.proj.stride,
padding=self.proj.padding
)
#img_embed = rearrange(img_embed, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=self.patch_size, pw=self.patch_size)
img_embed = rearrange(img_embed, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=1, pw=1)
elif self.LINEAR:
if img.ndim == 4:
img = rearrange(img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=self.patch_size, pw=self.patch_size)
if self.ENDO:
img_embed = F.linear(img.to(self.b) - self.b, self.W_inv)
else:
img_embed = F.linear(img.to(self.W), self.W, self.b)
return img_embed.to(img)
def unembed(self, img_embed: torch.Tensor):
if self.CONV2D:
#img_embed = rearrange(img_embed, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=self.h, w=self.w, ph=self.patch_size, pw=self.patch_size)
img_embed = rearrange(img_embed, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=self.h, w=self.w, ph=1, pw=1)
img = self.invert_conv2d(img_embed)
elif self.LINEAR:
if self.ENDO:
img = F.linear(img_embed.to(self.W), self.W, self.b)
else:
img = F.linear(img_embed.to(self.b) - self.b, self.W_inv)
if img.ndim == 3:
img = rearrange(img, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=self.h, w=self.w, ph=self.patch_size, pw=self.patch_size)
return img.to(img_embed)
def invert_conv2d(self, z: torch.Tensor,) -> torch.Tensor:
z_dtype = z.dtype
z = z.to(self.pinv_dtype)
conv = self.proj
B, C_in, H, W = self.orig_shape
C_out, _, kH, kW = conv.weight.shape
stride_h, stride_w = conv.stride
pad_h, pad_w = conv.padding
b = conv.bias.view(1, C_out, 1, 1).to(z)
z_nobias = z - b
#W_flat = conv.weight.view(C_out, -1).to(z)
#W_pinv = torch.linalg.pinv(W_flat)
Bz, Co, Hp, Wp = z_nobias.shape
z_flat = z_nobias.reshape(Bz, Co, -1)
x_patches = self.W_inv @ z_flat
x_sum = F.fold(
x_patches,
output_size=(H + 2*pad_h, W+ 2*pad_w),
kernel_size=(kH, kW),
stride=(stride_h, stride_w),
)
ones = torch.ones_like(x_patches)
count = F.fold(
ones,
output_size=(H + 2*pad_h, W + 2*pad_w),
kernel_size=(kH, kW),
stride=(stride_h, stride_w),
)
x_recon = x_sum / count.clamp(min=1e-6)
if pad_h > 0 or pad_w > 0:
x_recon = x_recon[..., pad_h:pad_h+H, pad_w:pad_w+W]
return x_recon.to(z_dtype)
def invert_patch_embedding(self, z: torch.Tensor, original_shape: torch.Size, grid_sizes: Optional[Tuple[int,int,int]] = None) -> torch.Tensor:
B, C_in, D, H, W = original_shape
pD, pH, pW = self.patch_size
sD, sH, sW = pD, pH, pW
if z.ndim == 3:
# [B, S, C_out] -> reshape to [B, C_out, D', H', W']
S = z.shape[1]
if grid_sizes is None:
Dp = D // pD
Hp = H // pH
Wp = W // pW
else:
Dp, Hp, Wp = grid_sizes
C_out = z.shape[2]
z = z.transpose(1, 2).reshape(B, C_out, Dp, Hp, Wp)
else:
B2, C_out, Dp, Hp, Wp = z.shape
assert B2 == B, "Batch size mismatch... ya sharked it."
# kncokout bias
b = self.patch_embedding.bias.view(1, C_out, 1, 1, 1)
z_nobias = z - b
# 2D filter -> pinv
w3 = self.patch_embedding.weight # [C_out, C_in, 1, pH, pW]
w2 = w3.squeeze(2) # [C_out, C_in, pH, pW]
out_ch, in_ch, kH, kW = w2.shape
W_flat = w2.view(out_ch, -1) # [C_out, in_ch*pH*pW]
W_pinv = torch.linalg.pinv(W_flat) # [in_ch*pH*pW, C_out]
# merge depth for 2D unfold wackiness
z2 = z_nobias.permute(0,2,1,3,4).reshape(B*Dp, C_out, Hp, Wp)
# apply pinv ... get patch vectors
z_flat = z2.reshape(B*Dp, C_out, -1) # [B*Dp, C_out, L]
x_patches = W_pinv @ z_flat # [B*Dp, in_ch*pH*pW, L]
# fold -> spatial frames
x2 = F.fold(
x_patches,
output_size=(H, W),
kernel_size=(pH, pW),
stride=(sH, sW)
) # → [B*Dp, C_in, H, W]
# un-merge depth
x2 = x2.reshape(B, Dp, in_ch, H, W) # [B, Dp, C_in, H, W]
x_recon = x2.permute(0,2,1,3,4).contiguous() # [B, C_in, D, H, W]
return x_recon
def invert_conv2d(
conv: torch.nn.Conv2d,
z: torch.Tensor,
original_shape: torch.Size,
) -> torch.Tensor:
import torch.nn.functional as F
B, C_in, H, W = original_shape
C_out, _, kH, kW = conv.weight.shape
stride_h, stride_w = conv.stride
pad_h, pad_w = conv.padding
if conv.bias is not None:
b = conv.bias.view(1, C_out, 1, 1).to(z)
z_nobias = z - b
else:
z_nobias = z
W_flat = conv.weight.view(C_out, -1).to(z)
W_pinv = torch.linalg.pinv(W_flat)
Bz, Co, Hp, Wp = z_nobias.shape
z_flat = z_nobias.reshape(Bz, Co, -1)
x_patches = W_pinv @ z_flat
x_sum = F.fold(
x_patches,
output_size=(H + 2*pad_h, W + 2*pad_w),
kernel_size=(kH, kW),
stride=(stride_h, stride_w),
)
ones = torch.ones_like(x_patches)
count = F.fold(
ones,
output_size=(H + 2*pad_h, W + 2*pad_w),
kernel_size=(kH, kW),
stride=(stride_h, stride_w),
)
x_recon = x_sum / count.clamp(min=1e-6)
if pad_h > 0 or pad_w > 0:
x_recon = x_recon[..., pad_h:pad_h+H, pad_w:pad_w+W]
return x_recon
def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, dim=1, eps: float = 1e-7) -> torch.Tensor:
mean_c = content.mean(dim, keepdim=True)
std_c = content.std (dim, keepdim=True).add_(eps) # in-place add
mean_s = style.mean (dim, keepdim=True)
std_s = style.std (dim, keepdim=True).add_(eps)
content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain
return content
def adain_seq(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
return ((content - content.mean(1, keepdim=True)) / (content.std(1, keepdim=True) + eps)) * (style.std(1, keepdim=True) + eps) + style.mean(1, keepdim=True)
def apply_scattersort_tiled(
denoised_spatial : torch.Tensor,
y0_adain_spatial : torch.Tensor,
tile_h : int,
tile_w : int,
pad : int,
):
"""
Apply spatial scattersort between denoised_spatial and y0_adain_spatial
using local tile-wise sorted value matching.
Args:
denoised_spatial (Tensor): (B, C, H, W) tensor.
y0_adain_spatial (Tensor): (B, C, H, W) reference tensor.
tile_h (int): tile height.
tile_w (int): tile width.
pad (int): padding size to apply around tiles.
Returns:
denoised_embed (Tensor): (B, H*W, C) tensor after sortmatch.
"""
denoised_padded = F.pad(denoised_spatial, (pad, pad, pad, pad), mode='reflect')
y0_padded = F.pad(y0_adain_spatial, (pad, pad, pad, pad), mode='reflect')
denoised_padded_out = denoised_padded.clone()
_, _, h_len, w_len = denoised_spatial.shape
for ix in range(pad, h_len, tile_h):
for jx in range(pad, w_len, tile_w):
tile = denoised_padded[:, :, ix - pad:ix + tile_h + pad, jx - pad:jx + tile_w + pad]
y0_tile = y0_padded[:, :, ix - pad:ix + tile_h + pad, jx - pad:jx + tile_w + pad]
tile = rearrange(tile, "b c h w -> b c (h w)", h=tile_h + pad * 2, w=tile_w + pad * 2)
y0_tile = rearrange(y0_tile, "b c h w -> b c (h w)", h=tile_h + pad * 2, w=tile_w + pad * 2)
src_sorted, src_idx = tile.sort(dim=-1)
ref_sorted, ref_idx = y0_tile.sort(dim=-1)
new_tile = tile.scatter(dim=-1, index=src_idx, src=ref_sorted.expand(src_sorted.shape))
new_tile = rearrange(new_tile, "b c (h w) -> b c h w", h=tile_h + pad * 2, w=tile_w + pad * 2)
denoised_padded_out[:, :, ix:ix + tile_h, jx:jx + tile_w] = (
new_tile if pad == 0 else new_tile[:, :, pad:-pad, pad:-pad]
)
denoised_padded_out = denoised_padded_out if pad == 0 else denoised_padded_out[:, :, pad:-pad, pad:-pad]
return denoised_padded_out
def apply_scattersort_masked(
denoised_embed : torch.Tensor,
y0_adain_embed : torch.Tensor,
y0_style_pos_mask : torch.Tensor | None,
y0_style_pos_mask_edge : torch.Tensor | None,
h_len : int,
w_len : int
):
if y0_style_pos_mask is None:
flatmask = torch.ones((1,1,h_len,w_len)).bool().flatten().bool()
else:
flatmask = F.interpolate(y0_style_pos_mask, size=(h_len, w_len)).bool().flatten().cpu()
flatunmask = ~flatmask
if y0_style_pos_mask_edge is not None:
edgemask = F.interpolate(
y0_style_pos_mask_edge.unsqueeze(0), size=(h_len, w_len)
).bool().flatten()
flatmask = flatmask & (~edgemask)
flatunmask = flatunmask & (~edgemask)
denoised_masked = denoised_embed[:, flatmask, :].clone()
y0_adain_masked = y0_adain_embed[:, flatmask, :].clone()
src_sorted, src_idx = denoised_masked.sort(dim=-2)
ref_sorted, ref_idx = y0_adain_masked.sort(dim=-2)
denoised_embed[:, flatmask, :] = src_sorted.scatter(dim=-2, index=src_idx, src=ref_sorted.expand(src_sorted.shape))
if (flatunmask == True).any():
denoised_unmasked = denoised_embed[:, flatunmask, :].clone()
y0_adain_unmasked = y0_adain_embed[:, flatunmask, :].clone()
src_sorted, src_idx = denoised_unmasked.sort(dim=-2)
ref_sorted, ref_idx = y0_adain_unmasked.sort(dim=-2)
denoised_embed[:, flatunmask, :] = src_sorted.scatter(dim=-2, index=src_idx, src=ref_sorted.expand(src_sorted.shape))
if y0_style_pos_mask_edge is not None:
denoised_edgemasked = denoised_embed[:, edgemask, :].clone()
y0_adain_edgemasked = y0_adain_embed[:, edgemask, :].clone()
src_sorted, src_idx = denoised_edgemasked.sort(dim=-2)
ref_sorted, ref_idx = y0_adain_edgemasked.sort(dim=-2)
denoised_embed[:, edgemask, :] = src_sorted.scatter(dim=-2, index=src_idx, src=ref_sorted.expand(src_sorted.shape))
return denoised_embed
def apply_scattersort(
denoised_embed : torch.Tensor,
y0_adain_embed : torch.Tensor,
):
#src_sorted, src_idx = denoised_embed.cpu().sort(dim=-2)
src_idx = denoised_embed.argsort(dim=-2)
ref_sorted = y0_adain_embed.sort(dim=-2)[0]
denoised_embed.scatter_(dim=-2, index=src_idx, src=ref_sorted.expand(ref_sorted.shape))
return denoised_embed
def apply_scattersort_spatial(
denoised_spatial : torch.Tensor,
y0_adain_spatial : torch.Tensor,
):
denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c")
y0_adain_embed = rearrange(y0_adain_spatial, "b c h w -> b (h w) c")
src_sorted, src_idx = denoised_embed.sort(dim=-2)
ref_sorted, ref_idx = y0_adain_embed.sort(dim=-2)
denoised_embed = src_sorted.scatter(dim=-2, index=src_idx, src=ref_sorted.expand(src_sorted.shape))
return rearrange(denoised_embed, "b (h w) c -> b c h w", h=denoised_spatial.shape[-2], w=denoised_spatial.shape[-1])
def apply_scattersort_spatial(
x_spatial : torch.Tensor,
y_spatial : torch.Tensor,
):
x_emb = rearrange(x_spatial, "b c h w -> b (h w) c")
y_emb = rearrange(y_spatial, "b c h w -> b (h w) c")
x_sorted, x_idx = x_emb.sort(dim=-2)
y_sorted, y_idx = y_emb.sort(dim=-2)
x_emb = x_sorted.scatter(dim=-2, index=x_idx, src=y_sorted.expand(x_sorted.shape))
return rearrange(x_emb, "b (h w) c -> b c h w", h=x_spatial.shape[-2], w=x_spatial.shape[-1])
def apply_adain_spatial(
x_spatial : torch.Tensor,
y_spatial : torch.Tensor,
):
x_emb = rearrange(x_spatial, "b c h w -> b (h w) c")
y_emb = rearrange(y_spatial, "b c h w -> b (h w) c")
x_mean = x_emb.mean(-2, keepdim=True)
x_std = x_emb.std (-2, keepdim=True)
y_mean = y_emb.mean(-2, keepdim=True)
y_std = y_emb.std (-2, keepdim=True)
assert (x_std == 0).any() == 0, "Target tensor has no variance!"
assert (y_std == 0).any() == 0, "Reference tensor has no variance!"
x_emb_adain = (x_emb - x_mean) / x_std
x_emb_adain = (x_emb_adain * y_std) + y_mean
return x_emb_adain.reshape_as(x_spatial)
def adain_patchwise(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5) -> torch.Tensor:
# this one is really slow
B, C, H, W = content.shape
device = content.device
dtype = content.dtype
if kernel_size is None:
kernel_size = int(2 * math.ceil(3 * sigma) + 1)
if kernel_size % 2 == 0:
kernel_size += 1
pad = kernel_size // 2
coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad
gauss = torch.exp(-0.5 * (coords / sigma) ** 2)
gauss /= gauss.sum()
kernel_2d = (gauss[:, None] * gauss[None, :]).to(dtype=dtype)
weight = kernel_2d.view(1, 1, kernel_size, kernel_size)
content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect')
style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect')
result = torch.zeros_like(content)
for i in range(H):
for j in range(W):
c_patch = content_padded[:, :, i:i + kernel_size, j:j + kernel_size]
s_patch = style_padded[:, :, i:i + kernel_size, j:j + kernel_size]
w = weight.expand_as(c_patch)
c_mean = (c_patch * w).sum(dim=(-1, -2), keepdim=True)
c_std = ((c_patch - c_mean)**2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
s_mean = (s_patch * w).sum(dim=(-1, -2), keepdim=True)
s_std = ((s_patch - s_mean)**2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
normed = (c_patch[:, :, pad:pad+1, pad:pad+1] - c_mean) / c_std
stylized = normed * s_std + s_mean
result[:, :, i, j] = stylized.squeeze(-1).squeeze(-1)
return result
def adain_patchwise_row_batch(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5) -> torch.Tensor:
B, C, H, W = content.shape
device, dtype = content.device, content.dtype
if kernel_size is None:
kernel_size = int(2 * math.ceil(3 * sigma) + 1)
if kernel_size % 2 == 0:
kernel_size += 1
pad = kernel_size // 2
coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad
gauss = torch.exp(-0.5 * (coords / sigma) ** 2)
gauss = (gauss / gauss.sum()).to(dtype)
kernel_2d = (gauss[:, None] * gauss[None, :])
weight = kernel_2d.view(1, 1, kernel_size, kernel_size)
content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect')
style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect')
result = torch.zeros_like(content)
for i in range(H):
c_row_patches = torch.stack([
content_padded[:, :, i:i+kernel_size, j:j+kernel_size]
for j in range(W)
], dim=0) # [W, B, C, k, k]
s_row_patches = torch.stack([
style_padded[:, :, i:i+kernel_size, j:j+kernel_size]
for j in range(W)
], dim=0)
w = weight.expand_as(c_row_patches[0])
c_mean = (c_row_patches * w).sum(dim=(-1, -2), keepdim=True)
c_std = ((c_row_patches - c_mean) ** 2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
s_mean = (s_row_patches * w).sum(dim=(-1, -2), keepdim=True)
s_std = ((s_row_patches - s_mean) ** 2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
center = kernel_size // 2
central = c_row_patches[:, :, :, center:center+1, center:center+1]
normed = (central - c_mean) / c_std
stylized = normed * s_std + s_mean
result[:, :, i, :] = stylized.squeeze(-1).squeeze(-1).permute(1, 2, 0) # [B,C,W]
return result
def adain_patchwise_row_batch_med(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5, mask: torch.Tensor = None, use_median_blur: bool = False, lowpass_weight=1.0, highpass_weight=1.0) -> torch.Tensor:
B, C, H, W = content.shape
device, dtype = content.device, content.dtype
if kernel_size is None:
kernel_size = int(2 * math.ceil(3 * abs(sigma)) + 1)
if kernel_size % 2 == 0:
kernel_size += 1
pad = kernel_size // 2
content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect')
style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect')
result = torch.zeros_like(content)
scaling = torch.ones((B, 1, H, W), device=device, dtype=dtype)
sigma_scale = torch.ones((H, W), device=device, dtype=torch.float32)
if mask is not None:
with torch.no_grad():
padded_mask = F.pad(mask.float(), (pad, pad, pad, pad), mode="reflect")
blurred_mask = F.avg_pool2d(padded_mask, kernel_size=kernel_size, stride=1, padding=pad)
blurred_mask = blurred_mask[..., pad:-pad, pad:-pad]
edge_proximity = blurred_mask * (1.0 - blurred_mask)
scaling = 1.0 - (edge_proximity / 0.25).clamp(0.0, 1.0)
sigma_scale = scaling[0, 0] # assuming single-channel mask broadcasted across B, C
if not use_median_blur:
coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad
base_gauss = torch.exp(-0.5 * (coords / sigma) ** 2)
base_gauss = (base_gauss / base_gauss.sum()).to(dtype)
gaussian_table = {}
for s in sigma_scale.unique():
sig = float((sigma * s + eps).clamp(min=1e-3))
gauss_local = torch.exp(-0.5 * (coords / sig) ** 2)
gauss_local = (gauss_local / gauss_local.sum()).to(dtype)
kernel_2d = gauss_local[:, None] * gauss_local[None, :]
gaussian_table[s.item()] = kernel_2d
for i in range(H):
row_result = torch.zeros(B, C, W, dtype=dtype, device=device)
for j in range(W):
c_patch = content_padded[:, :, i:i+kernel_size, j:j+kernel_size]
s_patch = style_padded[:, :, i:i+kernel_size, j:j+kernel_size]
if use_median_blur:
# Median blur with residual restoration
unfolded_c = c_patch.reshape(B, C, -1)
unfolded_s = s_patch.reshape(B, C, -1)
c_median = unfolded_c.median(dim=-1, keepdim=True).values
s_median = unfolded_s.median(dim=-1, keepdim=True).values
center = kernel_size // 2
central = c_patch[:, :, center, center].view(B, C, 1)
residual = central - c_median
stylized = lowpass_weight * s_median + residual * highpass_weight
else:
k = gaussian_table[float(sigma_scale[i, j].item())]
local_weight = k.view(1, 1, kernel_size, kernel_size).expand(B, C, kernel_size, kernel_size)
c_mean = (c_patch * local_weight).sum(dim=(-1, -2), keepdim=True)
c_std = ((c_patch - c_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
s_mean = (s_patch * local_weight).sum(dim=(-1, -2), keepdim=True)
s_std = ((s_patch - s_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps
center = kernel_size // 2
central = c_patch[:, :, center:center+1, center:center+1]
normed = (central - c_mean) / c_std
stylized = normed * s_std + s_mean
local_scaling = scaling[:, :, i, j].view(B, 1, 1)
stylized = central * (1 - local_scaling) + stylized * local_scaling
row_result[:, :, j] = stylized.squeeze(-1)
result[:, :, i, :] = row_result
return result
def weighted_mix_n(tensor_list, weight_list, dim=-1, offset=0):
assert all(t.shape == tensor_list[0].shape for t in tensor_list)
assert len(tensor_list) == len(weight_list)
total_weight = sum(weight_list)
ratios = [w / total_weight for w in weight_list]
length = tensor_list[0].shape[dim]
idx = torch.arange(length)
# Create a bin index tensor based on weighted slots
float_bins = (idx + offset) * len(ratios) / length
bin_idx = torch.floor(float_bins).long() % len(ratios)
# Allocate slots based on ratio using a cyclic pattern
counters = [0.0 for _ in ratios]
slots = torch.empty_like(idx)
for i in range(length):
# Assign to the group that's most under-allocated
expected = [r * (i + 1) for r in ratios]
errors = [expected[j] - counters[j] for j in range(len(ratios))]
k = max(range(len(errors)), key=lambda j: errors[j])
slots[i] = k
counters[k] += 1
# Create mask for each tensor
out = tensor_list[0].clone()
for i, tensor in enumerate(tensor_list):
mask = slots == i
while mask.dim() < tensor.dim():
mask = mask.unsqueeze(0)
mask = mask.expand_as(tensor)
out = torch.where(mask, tensor, out)
return out
from torch import vmap
BLOCK_NAMES = {"double_blocks", "single_blocks", "up_blocks", "middle_blocks", "down_blocks", "input_blocks", "output_blocks"}
DEFAULT_BLOCK_WEIGHTS_MMDIT = {
"attn_norm" : 0.0,
"attn_norm_mod": 0.0,
"attn" : 1.0,
"attn_gated" : 0.0,
"attn_res" : 1.0,
"ff_norm" : 0.0,
"ff_norm_mod" : 0.0,
"ff" : 1.0,
"ff_gated" : 0.0,
"ff_res" : 1.0,
"h_tile" : 8,
"w_tile" : 8,
}
DEFAULT_ATTN_WEIGHTS_MMDIT = {
"q_proj": 0.0,
"k_proj": 0.0,
"v_proj": 1.0,
"q_norm": 0.0,
"k_norm": 0.0,
"out" : 1.0,
"h_tile": 8,
"w_tile": 8,
}
DEFAULT_BASE_WEIGHTS_MMDIT = {
"proj_in" : 1.0,
"proj_out": 1.0,
"h_tile" : 8,
"w_tile" : 8,
}
class Stylizer:
buffer = {}
CLS_WCT = StyleWCT()
CLS_WCT2 = WaveletStyleWCT()
def __init__(self, dtype=torch.float64, device=torch.device("cuda")):
self.dtype = dtype
self.device = device
self.mask = [None]
self.apply_to = [""]
self.method = ["passthrough"]
self.h_tile = [-1]
self.w_tile = [-1]
self.w_len = 0
self.h_len = 0
self.img_len = 0
self.IMG_1ST = True
self.HEADS = 0
self.KONTEXT = 0
def set_mode(self, mode):
self.method = [mode] #[getattr(self, mode)]
def set_weights(self, **kwargs):
for k, v in kwargs.items():
if hasattr(self, k):
setattr(self, k, [v])
def set_weights_recursive(self, **kwargs):
for name, val in kwargs.items():
if hasattr(self, name):
setattr(self, name, [val])
for attr_name, attr_val in vars(self).items():
if isinstance(attr_val, Stylizer):
attr_val.set_weights_recursive(**kwargs)
for list_name in BLOCK_NAMES:
lst = getattr(self, list_name, None)
if isinstance(lst, list):
for element in lst:
if isinstance(element, Stylizer):
element.set_weights_recursive(**kwargs)
def merge_weights(self, other):
def recursive_merge(a, b, path):
if isinstance(a, list) and isinstance(b, list):
if path in BLOCK_NAMES:
out = []
for i in range(max(len(a), len(b))):
if i < len(a) and i < len(b):
out.append(recursive_merge(a[i], b[i], path=None))
elif i < len(a):
out.append(a[i])
else:
out.append(b[i])
return out
return a + b
if isinstance(a, dict) and isinstance(b, dict):
merged = dict(a)
for k, v_b in b.items():
if k in merged:
merged[k] = recursive_merge(merged[k], v_b, path=None)
else:
merged[k] = v_b
return merged
if hasattr(a, "__dict__") and hasattr(b, "__dict__"):
for attr, val_b in vars(b).items():
val_a = getattr(a, attr, None)
if val_a is not None:
setattr(a, attr, recursive_merge(val_a, val_b, path=attr))
else:
setattr(a, attr, val_b)
return a
return b
for attr in vars(self):
if attr in BLOCK_NAMES:
merged = recursive_merge(getattr(self, attr), getattr(other, attr, []), path=attr)
elif hasattr(other, attr):
merged = recursive_merge(getattr(self, attr), getattr(other, attr), path=attr)
else:
continue
setattr(self, attr, merged)
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
self.h_len = h_len
self.w_len = w_len
self.img_slice = img_slice
self.txt_slice = txt_slice
self.img_len = h_len * w_len
self.HEADS = HEADS
@staticmethod
def middle_slice(length, weight):
"""
Returns a slice object that selects the middle `weight` fraction of a dimension.
Example: weight=1.0 → full slice; weight=0.5 → middle 50%
"""
if weight >= 1.0:
return slice(None)
wr = int((length * (1 - weight)) // 2)
return slice(wr, -wr if wr > 0 else None)
@staticmethod
def get_outer_slice(x, weight):
if weight >= 0.0:
return x
length = x.shape[-2]
wr = int((length * (1 - (-weight))) // 2)
return torch.cat([x[...,:wr,:], x[...,-wr:,:]], dim=-2)
@staticmethod
def restore_outer_slice(x, x_outer, weight):
if weight >= 0.0:
return x
length = x.shape[-2]
wr = int((length * (1 - (-weight))) // 2)
x[...,:wr,:] = x_outer[...,:wr,:]
x[...,-wr:,:] = x_outer[...,-wr:,:]
return x
def __call__(self, x, attr):
if x.shape[0] == 1 and not self.KONTEXT:
return x
weight_list = getattr(self, attr)
weights_all_zero = all(weight == 0.0 for weight in weight_list)
if weights_all_zero:
return x
#self.HEADS=24
#x_ndim = x.ndim
#if x_ndim == 3:
# B, HW, C = x.shape
# if x.shape[-2] != self.HEADS and self.HEADS != 0:
# x = x.reshape(B,self.HEADS,HW,-1)
HEAD_DIM = x.shape[1]
if HEAD_DIM == self.HEADS:
B, HEAD_DIM, HW, C = x.shape
x = x.reshape(B, HW, C*HEAD_DIM)
if hasattr(self, "KONTEXT") and self.KONTEXT == 1:
x = x.reshape(2, x.shape[1] // 2, x.shape[2])
txt_slice, img_slice, ktx_slice = self.txt_slice, self.img_slice, None
if hasattr(self, "KONTEXT") and self.KONTEXT == 2:
ktx_slice = self.img_slice # slice(2 * self.img_slice.start, None)
img_slice = slice(2 * self.img_slice.start, self.img_slice.start)
txt_slice = slice(None, 2 * self.txt_slice.stop)
weights_all_one = all(weight == 1.0 for weight in weight_list)
methods_all_scattersort = all(name == "scattersort" for name in self.method)
masks_all_none = all(mask is None for mask in self.mask)
if weights_all_one and methods_all_scattersort and len(weight_list) > 1 and masks_all_none:
buf = Stylizer.buffer
buf['src_idx'] = x[0:1].argsort(dim=-2)
buf['ref_sorted'], buf['ref_idx'] = x[1:].reshape(1, -1, x.shape[-1]).sort(dim=-2)
buf['src'] = buf['ref_sorted'][:,::len(weight_list)].expand_as(buf['src_idx']) # interleave_stride = len(weight_list)
x[0:1] = x[0:1].scatter_(dim=-2, index=buf['src_idx'], src=buf['src'],)
else:
for i, (weight, mask) in enumerate(zip(weight_list, self.mask)):
if mask is not None:
x01 = x[0:1].clone()
slc = Stylizer.middle_slice(x.shape[-2], weight)
#slc = slice(None)
txt_method_name = self.method[i].removeprefix("tiled_")
txt_method = getattr(self, txt_method_name)
method_name = self.method[i].removeprefix("tiled_") if self.img_len > x.shape[-2] or self.h_len < 0 else self.method[i]
method = getattr(self, method_name)
apply_to = self.apply_to[i]
if weight == 0.0:
continue
else: # if weight == 1.0:
if weight > 0 and weight < 1:
x_clone = x.clone()
if self.img_len == x.shape[-2] or apply_to == "img+txt" or self.h_len < 0:
x = method(x, idx=i+1, slc=slc)
elif self.img_len < x.shape[-2]:
if "img" in apply_to:
x[...,img_slice,:] = method(x[...,img_slice,:], idx=i+1, slc=slc)
#if ktx_slice is not None:
# x[...,ktx_slice,:] = method(x[...,ktx_slice,:], idx=i+1)
#x[:,:self.img_len,:] = method(x[:,:self.img_len,:], idx=i+1)
if "txt" in apply_to:
x[...,txt_slice,:] = txt_method(x[...,txt_slice,:], idx=i+1, slc=slc)
#x[:,self.img_len:,:] = method(x[:,self.img_len:,:], idx=i+1)
if not "img" in apply_to and not "txt" in apply_to:
pass
else:
x = method(x, idx=i+1, slc=slc)
if weight > 0 and weight < 1 and txt_method_name != "scattersort":
x = torch.lerp(x_clone, x, weight)
#else:
# x = torch.lerp(x, method(x.clone(), idx=i+1), weight)
if mask is not None:
x[0:1,...,img_slice,:] = torch.lerp(x01[...,img_slice,:], x[0:1,...,img_slice,:], mask.view(1, -1, 1))
if ktx_slice is not None:
x[0:1,...,ktx_slice,:] = torch.lerp(x01[...,ktx_slice,:], x[0:1,...,ktx_slice,:], mask.view(1, -1, 1))
#x[0:1,:self.img_len] = torch.lerp(x01[:,:self.img_len], x[0:1,:self.img_len], mask.view(1, -1, 1))
#if x_ndim == 3:
# return x.view(B,HW,C)
if hasattr(self, "KONTEXT") and self.KONTEXT == 1:
x = x.reshape(1, x.shape[1] * 2, x.shape[2])
if HEAD_DIM == self.HEADS:
return x.reshape(B, HEAD_DIM, HW, C)
else:
return x
def WCT(self, x, idx=1):
Stylizer.CLS_WCT.set(x[idx:idx+1])
x[0:1] = Stylizer.CLS_WCT.get(x[0:1])
return x
def WCT2(self, x, idx=1):
Stylizer.CLS_WCT2.set(x[idx:idx+1], self.h_len, self.w_len)
x[0:1] = Stylizer.CLS_WCT2.get(x[0:1], self.h_len, self.w_len)
return x
@staticmethod
def AdaIN_(x, y, eps: float = 1e-7) -> torch.Tensor:
mean_c = x.mean(-2, keepdim=True)
std_c = x.std (-2, keepdim=True).add_(eps) # in-place add
mean_s = y.mean (-2, keepdim=True)
std_s = y.std (-2, keepdim=True).add_(eps)
x.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain
return x
def AdaIN(self, x, idx=1, eps: float = 1e-7) -> torch.Tensor:
mean_c = x[0:1].mean(-2, keepdim=True)
std_c = x[0:1].std (-2, keepdim=True).add_(eps) # in-place add
mean_s = x[idx:idx+1].mean (-2, keepdim=True)
std_s = x[idx:idx+1].std (-2, keepdim=True).add_(eps)
x[0:1].sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain
return x
def injection(self, x:torch.Tensor, idx=1) -> torch.Tensor:
x[0:1] = x[idx:idx+1]
return x
@staticmethod
def injection_(x:torch.Tensor, y:torch.Tensor) -> torch.Tensor:
return y
@staticmethod
def passthrough(x:torch.Tensor, idx=1) -> torch.Tensor:
return x
@staticmethod
def decompose_magnitude_direction(x, dim=-1, eps=1e-8):
magnitude = x.norm(p=2, dim=dim, keepdim=True)
direction = x / (magnitude + eps)
return magnitude, direction
@staticmethod
def scattersort_dir_(x, y, dim=-2):
#buf = Stylizer.buffer
#buf['src_sorted'], buf['src_idx'] = x.sort(dim=-2)
#buf['ref_sorted'], buf['ref_idx'] = y.sort(dim=-2)
#mag, _ = Stylizer.decompose_magnitude_direction(buf['src_sorted'], dim)
#_, dir = Stylizer.decompose_magnitude_direction(buf['ref_sorted'], dim)
mag, _ = Stylizer.decompose_magnitude_direction(x.to(torch.float64), dim)
buf = Stylizer.buffer
buf['src_idx'] = x.argsort(dim=-2)
buf['ref_sorted'], buf['ref_idx'] = y .sort(dim=-2)
x.scatter_(dim=-2, index=buf['src_idx'], src=buf['ref_sorted'].expand_as(buf['src_idx']))
_, dir = Stylizer.decompose_magnitude_direction(x.to(torch.float64), dim)
return (mag * dir).to(x)
@staticmethod
def scattersort_dir2_(x, y, dim=-2):
#buf = Stylizer.buffer
#buf['src_sorted'], buf['src_idx'] = x.sort(dim=-2)
#buf['ref_sorted'], buf['ref_idx'] = y.sort(dim=-2)
#mag, _ = Stylizer.decompose_magnitude_direction(buf['src_sorted'], dim)
#_, dir = Stylizer.decompose_magnitude_direction(buf['ref_sorted'], dim)
buf = Stylizer.buffer
buf['src_sorted'], buf['src_idx'] = x.sort(dim=dim)
buf['ref_sorted'], buf['ref_idx'] = y.sort(dim=dim)
buf['x_sub'], buf['x_sub_idx'] = buf['src_sorted'].sort(dim=-1)
buf['y_sub'], buf['y_sub_idx'] = buf['ref_sorted'].sort(dim=-1)
mag, _ = Stylizer.decompose_magnitude_direction(buf['x_sub'].to(torch.float64), -1)
_, dir = Stylizer.decompose_magnitude_direction(buf['y_sub'].to(torch.float64), -1)
buf['y_sub'] = (mag * dir).to(x)
buf['ref_sorted'].scatter_(dim=-1, index=buf['y_sub_idx'], src=buf['y_sub'].expand_as(buf['y_sub_idx']))
mag, _ = Stylizer.decompose_magnitude_direction(buf['src_sorted'].to(torch.float64), dim)
_, dir = Stylizer.decompose_magnitude_direction(buf['ref_sorted'].to(torch.float64), dim)
buf['ref_sorted'] = (mag * dir).to(x)
x.scatter_(dim=dim, index=buf['src_idx'], src=buf['ref_sorted'].expand_as(buf['src_idx']))
return x
@staticmethod
def scattersort_dir(x, idx=1):
x[0:1] = Stylizer.scattersort_dir_(x[0:1], x[idx:idx+1])
return x
@staticmethod
def scattersort_dir2(x, idx=1):
x[0:1] = Stylizer.scattersort_dir2_(x[0:1], x[idx:idx+1])
return x
@staticmethod
def scattersort_(x, y, slc=slice(None)):
buf = Stylizer.buffer
buf['src_idx'] = x.argsort(dim=-2)
buf['ref_sorted'], buf['ref_idx'] = y .sort(dim=-2)
return x.scatter_(dim=-2, index=buf['src_idx'][...,slc,:], src=buf['ref_sorted'][...,slc,:].expand_as(buf['src_idx'][...,slc,:]))
@staticmethod
def scattersort_double(x, y):
buf = Stylizer.buffer
buf['src_sorted'], buf['src_idx'] = x.sort(dim=-2)
buf['ref_sorted'], buf['ref_idx'] = y.sort(dim=-2)
buf['x_sub_idx'] = buf['src_sorted'].argsort(dim=-1)
buf['y_sub'], buf['y_sub_idx'] = buf['ref_sorted'].sort(dim=-1)
x.scatter_(dim=-1, index=buf['x_sub_idx'], src=buf['y_sub'].expand_as(buf['x_sub_idx']))
return x.scatter_(dim=-2, index=buf['src_idx'], src=buf['ref_sorted'].expand_as(buf['src_idx']))
def scattersort_aoeu(self, x, idx=1, slc=slice(None)):
x[0:1] = Stylizer.scattersort_(x[0:1], x[idx:idx+1], slc)
return x
def scattersort(self, x, idx=1, slc=slice(None)):
if x.shape[0] != 2:
x[0:1] = Stylizer.scattersort_(x[0:1], x[idx:idx+1], slc)
return x
buf = Stylizer.buffer
buf['sorted'], buf['idx'] = x.sort(dim=-2)
return x.scatter_(dim=-2, index=buf['idx'][0:1][...,slc,:], src=buf['sorted'][1:2][...,slc,:].expand_as(buf['idx'][0:1][...,slc,:]))
def tiled_scattersort(self, x, idx=1): #, h_tile=None, w_tile=None):
#if HDModel.RECON_MODE:
# return denoised_embed
#den = x[0:1] [:,:self.img_len,:].view(-1, 2560, self.h_len, self.w_len)
#style = x[idx:idx+1][:,:self.img_len,:].view(-1, 2560, self.h_len, self.w_len)
#h_tile = self.h_tile[idx-1] if h_tile is None else h_tile
#w_tile = self.w_tile[idx-1] if w_tile is None else w_tile
C = x.shape[-1]
den = x[0:1] [:,self.img_slice,:].reshape(-1, C, self.h_len, self.w_len)
style = x[idx:idx+1][:,self.img_slice,:].reshape(-1, C, self.h_len, self.w_len)
tiles = Stylizer.get_tiles_as_strided(den, self.h_tile[idx-1], self.w_tile[idx-1])
ref_tile = Stylizer.get_tiles_as_strided(style, self.h_tile[idx-1], self.w_tile[idx-1])
# rearrange for vmap to run on (nH, nW) ( as outer axes)
tiles_v = tiles .permute(2, 3, 0, 1, 4, 5) # (nH, nW, B, C, tile_h, tile_w)
ref_tile_v = ref_tile.permute(2, 3, 0, 1, 4, 5) # (nH, nW, B, C, tile_h, tile_w)
# vmap over spatial dimms (nH, nW)... num of tiles high, num tiles wide
vmap2 = torch.vmap(torch.vmap(Stylizer.apply_scattersort_per_tile, in_dims=0), in_dims=0)
result = vmap2(tiles_v, ref_tile_v) # (nH, nW, B, C, tile_h, tile_w)
# --> (B, C, nH, nW, tile_h, tile_w)
result = result.permute(2, 3, 0, 1, 4, 5) #( B, C, nH, nW, tile_h, tile_w)
# in-place copy, werx if result has same shape/strides as tiles... overwrites same mem location "content" is using
tiles.copy_(result)
return x
def tiled_AdaIN(self, x, idx=1):
#if HDModel.RECON_MODE:
# return denoised_embed
#den = x[0:1] [:,:self.img_len,:].view(-1, 2560, self.h_len, self.w_len)
#style = x[idx:idx+1][:,:self.img_len,:].view(-1, 2560, self.h_len, self.w_len)
C = x.shape[-1]
den = x[0:1] [:,self.img_slice,:].reshape(-1, C, self.h_len, self.w_len)
style = x[idx:idx+1][:,self.img_slice,:].reshape(-1, C, self.h_len, self.w_len)
tiles = Stylizer.get_tiles_as_strided(den, self.h_tile[idx-1], self.w_tile[idx-1])
ref_tile = Stylizer.get_tiles_as_strided(style, self.h_tile[idx-1], self.w_tile[idx-1])
# rearrange for vmap to run on (nH, nW) ( as outer axes)
tiles_v = tiles .permute(2, 3, 0, 1, 4, 5) # (nH, nW, B, C, tile_h, tile_w)
ref_tile_v = ref_tile.permute(2, 3, 0, 1, 4, 5) # (nH, nW, B, C, tile_h, tile_w)
# vmap over spatial dimms (nH, nW)... num of tiles high, num tiles wide
vmap2 = torch.vmap(torch.vmap(Stylizer.apply_AdaIN_per_tile, in_dims=0), in_dims=0)
result = vmap2(tiles_v, ref_tile_v) # (nH, nW, B, C, tile_h, tile_w)
# --> (B, C, nH, nW, tile_h, tile_w)
result = result.permute(2, 3, 0, 1, 4, 5) #( B, C, nH, nW, tile_h, tile_w)
# in-place copy, werx if result has same shape/strides as tiles... overwrites same mem location "content" is using
tiles.copy_(result)
return x
@staticmethod
def get_tiles_as_strided(x, tile_h, tile_w):
B, C, H, W = x.shape
stride = x.stride()
nH = H // tile_h
nW = W // tile_w
tiles = x.as_strided(
size=(B, C, nH, nW, tile_h, tile_w),
stride=(stride[0], stride[1], stride[2] * tile_h, stride[3] * tile_w, stride[2], stride[3])
)
return tiles # shape: (B, C, nH, nW, tile_h, tile_w)
@staticmethod
def apply_scattersort_per_tile(tile, ref_tile):
flat = tile .flatten(-2, -1)
ref_flat = ref_tile.flatten(-2, -1)
sorted_ref, _ = ref_flat .sort(dim=-1)
src_sorted, src_idx = flat.sort(dim=-1)
out = flat.scatter(dim=-1, index=src_idx, src=sorted_ref)
return out.view_as(tile)
@staticmethod
def apply_AdaIN_per_tile(tile, ref_tile, eps: float = 1e-7):
mean_c = tile.mean(-2, keepdim=True)
std_c = tile.std (-2, keepdim=True).add_(eps) # in-place add
mean_s = ref_tile.mean (-2, keepdim=True)
std_s = ref_tile.std (-2, keepdim=True).add_(eps)
tile.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain
return tile
class StyleMMDiT_Attn(Stylizer):
def __init__(self, mode):
super().__init__()
self.q_proj = [0.0]
self.k_proj = [0.0]
self.v_proj = [0.0]
self.q_norm = [0.0]
self.k_norm = [0.0]
self.out = [0.0]
class StyleMMDiT_FF(Stylizer): # these hit img or joint only, never txt
def __init__(self, mode):
super().__init__()
self.ff_1 = [0.0]
self.ff_1_silu = [0.0]
self.ff_3 = [0.0]
self.ff_13 = [0.0]
self.ff_2 = [0.0]
class StyleMMDiT_MoE(Stylizer): # these hit img or joint only, never txt
def __init__(self, mode):
super().__init__()
self.FF_SHARED = StyleMMDiT_FF(mode)
self.FF_SEPARATE = StyleMMDiT_FF(mode)
self.shared = [0.0]
self.gate = [False]
self.topk_weight = [0.0]
self.separate = [0.0]
self.sum = [0.0]
self.out = [0.0]
class StyleMMDiT_SubBlock(Stylizer):
def __init__(self, mode):
super().__init__()
self.ATTN = StyleMMDiT_Attn(mode) # options for attn itself: qkv proj, qk norm, attn out
self.attn_norm = [0.0]
self.attn_norm_mod = [0.0]
self.attn = [0.0]
self.attn_gated = [0.0]
self.attn_res = [0.0]
self.ff_norm = [0.0]
self.ff_norm_mod = [0.0]
self.ff = [0.0]
self.ff_gated = [0.0]
self.ff_res = [0.0]
self.mask = [None]
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
super().set_len(h_len, w_len, img_slice, txt_slice, HEADS)
self.ATTN.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
class StyleMMDiT_IMG_Block(StyleMMDiT_SubBlock): # img or joint
def __init__(self, mode):
super().__init__(mode)
self.FF = StyleMMDiT_MoE(mode) # options for MoE if img or joint
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
super().set_len(h_len, w_len, img_slice, txt_slice, HEADS)
self.FF.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
class StyleMMDiT_TXT_Block(StyleMMDiT_SubBlock): # txt only
def __init__(self, mode):
super().__init__(mode)
self.FF = StyleMMDiT_FF(mode) # options for FF within MoE for img or joint; or for txt alone
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
super().set_len(h_len, w_len, img_slice, txt_slice, HEADS)
self.FF.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
class StyleMMDiT_BaseBlock:
def __init__(self, mode="passthrough"):
self.img = StyleMMDiT_IMG_Block(mode)
self.txt = StyleMMDiT_TXT_Block(mode)
self.mask = [None]
self.attn_mask = [None]
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
self.h_len = h_len
self.w_len = w_len
self.img_len = h_len * w_len
self.img_slice = img_slice
self.txt_slice = txt_slice
self.HEADS = HEADS
self.img.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
self.txt.set_len(-1, -1, img_slice, txt_slice, HEADS)
for i, mask in enumerate(self.mask):
if mask is not None and mask.ndim > 1:
self.mask[i] = F.interpolate(mask.unsqueeze(0), size=(h_len, w_len)).flatten().to(torch.bfloat16).cuda()
self.img.mask = self.mask
for i, mask in enumerate(self.attn_mask):
if mask is not None and mask.ndim > 1:
self.attn_mask[i] = F.interpolate(mask.unsqueeze(0), size=(h_len, w_len)).flatten().to(torch.bfloat16).cuda()
self.img.ATTN.mask = self.attn_mask
class StyleMMDiT_DoubleBlock(StyleMMDiT_BaseBlock):
def __init__(self, mode="passthrough"):
super().__init__(mode)
self.txt = StyleMMDiT_TXT_Block(mode)
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
super().set_len(h_len, w_len, img_slice, txt_slice, HEADS)
self.txt.set_len(-1, -1, img_slice, txt_slice, HEADS)
class StyleMMDiT_SingleBlock(StyleMMDiT_BaseBlock):
def __init__(self, mode="passthrough"):
super().__init__(mode)
class StyleUNet_Resample(Stylizer):
def __init__(self, mode):
super().__init__()
self.conv = [0.0]
class StyleUNet_Attn(Stylizer):
def __init__(self, mode):
super().__init__()
self.q_proj = [0.0]
self.k_proj = [0.0]
self.v_proj = [0.0]
self.out = [0.0]
class StyleUNet_FF(Stylizer):
def __init__(self, mode):
super().__init__()
self.proj = [0.0]
self.geglu = [0.0]
self.linear = [0.0]
class StyleUNet_TransformerBlock(Stylizer):
def __init__(self, mode):
super().__init__()
self.ATTN1 = StyleUNet_Attn(mode) # self-attn
self.FF = StyleUNet_FF (mode)
self.ATTN2 = StyleUNet_Attn(mode) # cross-attn
self.self_attn = [0.0]
self.ff = [0.0]
self.cross_attn = [0.0]
self.self_attn_res = [0.0]
self.cross_attn_res = [0.0]
self.ff_res = [0.0]
self.norm1 = [0.0]
self.norm2 = [0.0]
self.norm3 = [0.0]
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
super().set_len(h_len, w_len, img_slice, txt_slice, HEADS)
self.ATTN1.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
self.ATTN2.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
class StyleUNet_SpatialTransformer(Stylizer):
def __init__(self, mode):
super().__init__()
self.TFMR = StyleUNet_TransformerBlock(mode)
self.spatial_norm_in = [0.0]
self.spatial_proj_in = [0.0]
self.spatial_transformer_block = [0.0]
self.spatial_transformer = [0.0]
self.spatial_proj_out = [0.0]
self.spatial_res = [0.0]
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
super().set_len(h_len, w_len, img_slice, txt_slice, HEADS)
self.TFMR.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
class StyleUNet_ResBlock(Stylizer):
def __init__(self, mode):
super().__init__()
self.in_norm = [0.0]
self.in_silu = [0.0]
self.in_conv = [0.0]
self.emb_silu = [0.0]
self.emb_linear = [0.0]
self.emb_res = [0.0]
self.out_norm = [0.0]
self.out_silu = [0.0]
self.out_conv = [0.0]
self.residual = [0.0]
class StyleUNet_BaseBlock(Stylizer):
def __init__(self, mode="passthrough"):
self.resample_block = StyleUNet_Resample(mode)
self.res_block = StyleUNet_ResBlock(mode)
self.spatial_block = StyleUNet_SpatialTransformer(mode)
self.resample = [0.0]
self.res = [0.0]
self.spatial = [0.0]
self.mask = [None]
self.attn_mask = [None]
self.KONTEXT = 0
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
self.h_len = h_len
self.w_len = w_len
self.img_len = h_len * w_len
self.img_slice = img_slice
self.txt_slice = txt_slice
self.HEADS = HEADS
self.resample_block.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
self.res_block .set_len(h_len, w_len, img_slice, txt_slice, HEADS)
self.spatial_block .set_len(h_len, w_len, img_slice, txt_slice, HEADS)
for i, mask in enumerate(self.mask):
if mask is not None and mask.ndim > 1:
self.mask[i] = F.interpolate(mask.unsqueeze(0), size=(h_len, w_len)).flatten().to(torch.bfloat16).cuda()
self.resample_block.mask = self.mask
self.res_block.mask = self.mask
self.spatial_block.mask = self.mask
self.spatial_block.TFMR.mask = self.mask
for i, mask in enumerate(self.attn_mask):
if mask is not None and mask.ndim > 1:
self.attn_mask[i] = F.interpolate(mask.unsqueeze(0), size=(h_len, w_len)).flatten().to(torch.bfloat16).cuda()
self.spatial_block.TFMR.ATTN1.mask = self.attn_mask
def __call__(self, x, attr):
B, C, H, W = x.shape
x = super().__call__(x.reshape(B, H*W, C), attr)
return x.reshape(B,C,H,W)
class StyleUNet_InputBlock(StyleUNet_BaseBlock):
def __init__(self, mode="passthrough"):
super().__init__(mode)
class StyleUNet_MiddleBlock(StyleUNet_BaseBlock):
def __init__(self, mode="passthrough"):
super().__init__(mode)
class StyleUNet_OutputBlock(StyleUNet_BaseBlock):
def __init__(self, mode="passthrough"):
super().__init__(mode)
class Style_Model(Stylizer):
def __init__(self, dtype=torch.float64, device=torch.device("cuda")):
super().__init__(dtype, device)
self.guides = []
self.GUIDES_INITIALIZED = False
#self.double_blocks = [StyleMMDiT_DoubleBlock() for _ in range(100)]
#self.single_blocks = [StyleMMDiT_SingleBlock() for _ in range(100)]
self.h_len = -1
self.w_len = -1
self.img_len = -1
self.h_tile = [-1]
self.w_tile = [-1]
self.proj_in = [0.0] # these are for img only! not sliced
self.proj_out = [0.0]
self.cond_pos = [None]
self.cond_neg = [None]
self.noise_mode = "update"
self.recon_lure = "none"
self.data_shock = "none"
self.data_shock_start_step = 0
self.data_shock_end_step = 0
self.Retrojector = None
self.Endojector = None
self.IMG_1ST = True
self.HEADS = 0
self.KONTEXT = 0
def __call__(self, x, attr):
if x.shape[0] == 1 and not self.KONTEXT:
return x
weight_list = getattr(self, attr)
weights_all_zero = all(weight == 0.0 for weight in weight_list)
if weights_all_zero:
return x
"""x_ndim = x.ndim
if x_ndim == 4:
B, HEAD, HW, C = x.shape
if x_ndim == 3:
B, HW, C = x.shape
if x.shape[-2] != self.HEADS and self.HEADS != 0:
x = x.reshape(B,self.HEADS,HW,-1)"""
HEAD_DIM = x.shape[1]
if HEAD_DIM == self.HEADS:
B, HEAD_DIM, HW, C = x.shape
x = x.reshape(B, HW, C*HEAD_DIM)
if self.KONTEXT == 1:
x = x.reshape(2, x.shape[1] // 2, x.shape[2])
weights_all_one = all(weight == 1.0 for weight in weight_list)
methods_all_scattersort = all(name == "scattersort" for name in self.method)
masks_all_none = all(mask is None for mask in self.mask)
if weights_all_one and methods_all_scattersort and len(weight_list) > 1 and masks_all_none:
buf = Stylizer.buffer
buf['src_idx'] = x[0:1].argsort(dim=-2)
buf['ref_sorted'], buf['ref_idx'] = x[1:].reshape(1, -1, x.shape[-1]).sort(dim=-2)
buf['src'] = buf['ref_sorted'][:,::len(weight_list)].expand_as(buf['src_idx']) # interleave_stride = len(weight_list)
x[0:1] = x[0:1].scatter_(dim=-2, index=buf['src_idx'], src=buf['src'],)
else:
for i, (weight, mask) in enumerate(zip(weight_list, self.mask)):
if weight > 0 and weight < 1:
x_clone = x.clone()
if mask is not None:
x01 = x[0:1].clone()
slc = Stylizer.middle_slice(x.shape[-2], weight)
method = getattr(self, self.method[i])
if weight == 0.0:
continue
elif weight == 1.0:
x = method(x, idx=i+1)
else:
x = method(x, idx=i+1, slc=slc)
if weight > 0 and weight < 1 and self.method[i] != "scattersort":
x = torch.lerp(x_clone, x, weight)
#else:
# x = torch.lerp(x, method(x.clone(), idx=i), weight)
if mask is not None:
x[0:1] = torch.lerp(x01, x[0:1], mask.view(1, -1, 1))
#if x_ndim == 3:
# return x.view(B,HW,C)
if self.KONTEXT == 1:
x = x.reshape(1, x.shape[1] * 2, x.shape[2])
if HEAD_DIM == self.HEADS:
return x.reshape(B, HEAD_DIM, HW, C)
else:
return x
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
self.h_len = h_len
self.w_len = w_len
self.img_len = h_len * w_len
self.img_slice = img_slice
self.txt_slice = txt_slice
self.HEADS = HEADS
#for block in self.double_blocks:
# block.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
#for block in self.single_blocks:
# block.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
for i, mask in enumerate(self.mask):
if mask is not None and mask.ndim > 1:
self.mask[i] = F.interpolate(mask.unsqueeze(0), size=(h_len, w_len)).flatten().to(torch.bfloat16).cuda()
def init_guides(self, model):
if not self.GUIDES_INITIALIZED:
if self.guides == []:
self.guides = None
elif self.guides is not None:
for i, latent in enumerate(self.guides):
if type(latent) is dict:
latent = model.inner_model.inner_model.process_latent_in(latent['samples']).to(dtype=self.dtype, device=self.device)
elif type(latent) is torch.Tensor:
latent = latent.to(dtype=self.dtype, device=self.device)
else:
latent = None
#raise ValueError(f"Invalid latent type: {type(latent)}")
#if self.VIDEO and latent.shape[2] == 1:
# latent = latent.repeat(1, 1, x.shape[2], 1, 1)
self.guides[i] = latent
if any(g is None for g in self.guides):
self.guides = None
print("Style guide nonetype set for Kontext.")
else:
self.guides = torch.cat(self.guides, dim=0)
self.GUIDES_INITIALIZED = True
def set_conditioning(self, positive, negative):
self.cond_pos = [positive]
self.cond_neg = [negative]
def apply_style_conditioning(self, UNCOND, base_context, base_y=None, base_llama3=None):
def get_max_token_lengths(style_conditioning, base_context, base_y=None, base_llama3=None):
context_max_len = base_context.shape[-2]
llama3_max_len = base_llama3.shape[-2] if base_llama3 is not None else -1
y_max_len = base_y.shape[-1] if base_y is not None else -1
for style_cond in style_conditioning:
if style_cond is None:
continue
context_max_len = max(context_max_len, style_cond[0][0].shape[-2])
if base_llama3 is not None:
llama3_max_len = max(llama3_max_len, style_cond[0][1]['conditioning_llama3'].shape[-2])
if base_y is not None:
y_max_len = max(y_max_len, style_cond[0][1]['pooled_output'].shape[-1])
return context_max_len, llama3_max_len, y_max_len
def pad_to_len(x, target_len, pad_value=0.0, dim=1):
if target_len < 0:
return x
cur_len = x.shape[dim]
if cur_len == target_len:
return x
return F.pad(x, (0, 0, 0, target_len - cur_len), value=pad_value)
style_conditioning = self.cond_pos if not UNCOND else self.cond_neg
context_max_len, llama3_max_len, y_max_len = get_max_token_lengths(
style_conditioning = style_conditioning,
base_context = base_context,
base_y = base_y,
base_llama3 = base_llama3,
)
bsz_style = len(style_conditioning)
context = base_context.repeat(bsz_style + 1, 1, 1)
y = base_y.repeat(bsz_style + 1, 1) if base_y is not None else None
llama3 = base_llama3.repeat(bsz_style + 1, 1, 1, 1) if base_llama3 is not None else None
context = pad_to_len(context, context_max_len, dim=-2)
llama3 = pad_to_len(llama3, llama3_max_len, dim=-2) if base_llama3 is not None else None
y = pad_to_len(y, y_max_len, dim=-1) if base_y is not None else None
for ci, style_cond in enumerate(style_conditioning):
if style_cond is None:
continue
context[ci+1:ci+2] = pad_to_len(style_cond[0][0], context_max_len, dim=-2).to(context)
if llama3 is not None:
llama3 [ci+1:ci+2] = pad_to_len(style_cond[0][1]['conditioning_llama3'], llama3_max_len, dim=-2).to(llama3)
if y is not None:
y [ci+1:ci+2] = pad_to_len(style_cond[0][1]['pooled_output'], y_max_len, dim=-1).to(y)
return context, y, llama3
def WCT_data(self, denoised_embed, y0_style_embed):
Stylizer.CLS_WCT.set(y0_style_embed.to(denoised_embed))
return Stylizer.CLS_WCT.get(denoised_embed)
def WCT2_data(self, denoised_embed, y0_style_embed):
Stylizer.CLS_WCT2.set(y0_style_embed.to(denoised_embed))
return Stylizer.CLS_WCT2.get(denoised_embed)
def apply_to_data(self, denoised, y0_style=None, mode="none"):
if mode == "none":
return denoised
y0_style = self.guides if y0_style is None else y0_style
y0_style_embed = self.Retrojector.embed(y0_style)
denoised_embed = self.Retrojector.embed(denoised)
B,HW,C = y0_style_embed.shape
embed = torch.cat([denoised_embed, y0_style_embed.view(1,B*HW,C)[:,::B,:]], dim=0)
method = getattr(self, mode)
if mode == "scattersort":
slc = Stylizer.middle_slice(embed.shape[-2], self.data_shock_weight)
embed = method(embed, slc=slc)
else:
embed = method(embed)
return self.Retrojector.unembed(embed[0:1])
def apply_recon_lure(self, denoised, y0_style):
if self.recon_lure == "none":
return denoised
for i in range(denoised.shape[0]):
denoised[i:i+1] = self.apply_to_data(denoised[i:i+1], y0_style, self.recon_lure)
return denoised
def apply_data_shock(self, denoised):
if self.data_shock == "none":
return denoised
datashock_ref = getattr(self, "datashock_ref", None)
if self.data_shock == "scattersort":
return self.apply_to_data(denoised, datashock_ref, self.data_shock)
else:
return torch.lerp(denoised, self.apply_to_data(denoised, datashock_ref, self.data_shock), torch.Tensor([self.data_shock_weight]).double().cuda())
class StyleMMDiT_Model(Style_Model):
def __init__(self, dtype=torch.float64, device=torch.device("cuda")):
super().__init__(dtype, device)
self.double_blocks = [StyleMMDiT_DoubleBlock() for _ in range(100)]
self.single_blocks = [StyleMMDiT_SingleBlock() for _ in range(100)]
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
super().set_len(h_len, w_len, img_slice, txt_slice, HEADS)
for block in self.double_blocks:
block.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
for block in self.single_blocks:
block.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
class StyleUNet_Model(Style_Model):
def __init__(self, dtype=torch.float64, device=torch.device("cuda")):
super().__init__(dtype, device)
self.input_blocks = [StyleUNet_InputBlock() for _ in range(100)]
self.middle_blocks = [StyleUNet_MiddleBlock() for _ in range(100)]
self.output_blocks = [StyleUNet_OutputBlock() for _ in range(100)]
def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS):
super().set_len(h_len, w_len, img_slice, txt_slice, HEADS)
for block in self.input_blocks:
block.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
for block in self.middle_blocks:
block.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
for block in self.output_blocks:
block.set_len(h_len, w_len, img_slice, txt_slice, HEADS)
def __call__(self, x, attr):
B, C, H, W = x.shape
x = super().__call__(x.reshape(B, H*W, C), attr)
return x.reshape(B,C,H,W)
================================================
FILE: wan/model.py
================================================
# original version: https://github.com/Wan-Video/Wan2.1/blob/main/wan/modules/model.py
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
import math
from typing import Optional, Callable, Tuple, Dict, Any, Union
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import repeat
from comfy.ldm.modules.attention import optimized_attention, attention_pytorch
from comfy.ldm.flux.layers import EmbedND
from comfy.ldm.flux.math import apply_rope
from comfy.ldm.modules.diffusionmodules.mmdit import RMSNorm
import comfy.ldm.common_dit
import comfy.model_management
from ..latents import interpolate_spd
from ..helper import ExtraOptions
def sinusoidal_embedding_1d(dim, position):
# preprocess
assert dim % 2 == 0
half = dim // 2
position = position.type(torch.float32)
# calculation
sinusoid = torch.outer(
position, torch.pow(10000, -torch.arange(half).to(position).div(half)))
x = torch.cat([torch.cos(sinusoid), torch.sin(sinusoid)], dim=1)
return x
class ReWanRawSelfAttention(nn.Module):
def __init__(self,
dim,
num_heads,
window_size = (-1, -1),
qk_norm = True,
eps = 1e-6,
operation_settings = {}):
assert dim % num_heads == 0
super().__init__()
self.dim = dim
self.num_heads = num_heads
self.head_dim = dim // num_heads
self.window_size = window_size
self.qk_norm = qk_norm
self.eps = eps
# layers
self.q = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.k = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.v = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.o = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.norm_q = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity()
self.norm_k = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity()
def forward(self, x, freqs, mask=None):
r"""
Args:
x(Tensor): Shape [B, L, num_heads, C / num_heads]
freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2]
"""
b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim
# query, key, value function
def qkv_fn(x):
q = self.norm_q(self.q(x)).view(b, s, n, d)
k = self.norm_k(self.k(x)).view(b, s, n, d)
v = self.v(x).view(b, s, n * d)
return q, k, v
q, k, v = qkv_fn(x)
q, k = apply_rope(q, k, freqs)
# q,k.shape = 2,14040,12,128 v.shape = 2,14040,1536
x = optimized_attention(
q.view(b, s, n * d),
k.view(b, s, n * d),
v,
heads=self.num_heads,
)
x = self.o(x)
return x
def attention_weights(q, k):
# implementation of in-place softmax to reduce memory req
scores = torch.matmul(q, k.transpose(-2, -1))
scores.div_(math.sqrt(q.size(-1)))
torch.exp(scores, out=scores)
summed = torch.sum(scores, dim=-1, keepdim=True)
scores /= summed
return scores.nan_to_num_(0.0, 65504., -65504.)
class ReWanSlidingSelfAttention(nn.Module):
def __init__(self,
dim,
num_heads,
window_size = (-1, -1),
qk_norm = True,
eps = 1e-6,
operation_settings = {}):
assert dim % num_heads == 0
super().__init__()
self.dim = dim
self.num_heads = num_heads
self.head_dim = dim // num_heads
self.window_size = window_size
self.qk_norm = qk_norm
self.eps = eps
self.winderz = 15
self.winderz_type= "standard"
# layers
self.q = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.k = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.v = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.o = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.norm_q = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity()
self.norm_k = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity()
def forward(self, x, freqs, mask=None, grid_sizes=None):
r"""
Args:
x(Tensor): Shape [B, L, num_heads, C / num_heads]
freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2]
"""
b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim
# query, key, value function
def qkv_fn(x):
q = self.norm_q(self.q(x)).view(b, s, n, d)
k = self.norm_k(self.k(x)).view(b, s, n, d)
v = self.v(x).view(b, s, n * d)
return q, k, v
q, k, v = qkv_fn(x)
q, k = apply_rope(q, k, freqs)
# q,k.shape = 2,14040,12,128 v.shape = 2,14040,1536
img_len = grid_sizes[1] * grid_sizes[2]
total_frames = int(q.shape[1] // img_len)
window_size = self.winderz
half_window = window_size // 2
q_ = q.view(b, s, n * d)
k_ = k.view(b, s, n * d)
x_list = []
for i in range(total_frames):
q_start = i * img_len
q_end = (i + 1) * img_len
# circular frame indices for key/value window
center = i
#window_indices = [(center + offset) % total_frames for offset in range(-half_window, half_window + 1)]
if self.winderz_type == "standard":
start = max(0, center - half_window)
end = min(total_frames, center + half_window + 1)
# Shift window if it would be too short
if end - start < window_size:
if start == 0:
end = min(total_frames, start + window_size)
elif end == total_frames:
start = max(0, end - window_size)
window_indices = list(range(start, end))
elif self.winderz_type == "circular":
window_indices = [(center + offset) % total_frames for offset in range(-half_window, half_window + 1)]
# frame indices to token indices
token_indices = []
for frame in window_indices:
start = frame * img_len
token_indices.extend(range(start, start + img_len))
token_indices = torch.tensor(token_indices, device=q.device)
x = optimized_attention(
q_[:, q_start:q_end, :], # [B, img_len, C]
k_.index_select(1, token_indices), # [B, window_size * img_len, C]
v .index_select(1, token_indices),
heads=self.num_heads,
)
x_list.append(x)
x = torch.cat(x_list, dim=1)
del x_list, q, k, v, q_, k_
x = self.o(x)
return x
class ReWanT2VSlidingCrossAttention(ReWanSlidingSelfAttention):
def forward(self, x, context, context_clip=None, mask=None, grid_sizes=None):
r"""
Args:
x(Tensor): Shape [B, L1, C]
context(Tensor): Shape [B, L2, C]
"""
# compute query, key, value
q = self.norm_q(self.q(x))
k = self.norm_k(self.k(context))
v = self.v(context)
img_len = grid_sizes[1] * grid_sizes[2]
total_frames = int(q.shape[1] // img_len)
window_size = self.winderz
half_window = window_size // 2
b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim
q_, k_ = q, k
#q_ = q.view(b, s, n * d)
#k_ = k.view(b, s, n * d)
x_list = []
for i in range(total_frames):
q_start = i * img_len
q_end = (i + 1) * img_len
# circular frame indices for key/value window
center = i
#window_indices = [(center + offset) % total_frames for offset in range(-half_window, half_window + 1)]
if self.winderz_type == "standard":
start = max(0, center - half_window)
end = min(total_frames, center + half_window + 1)
# Shift window if it would be too short
if end - start < window_size:
if start == 0:
end = min(total_frames, start + window_size)
elif end == total_frames:
start = max(0, end - window_size)
window_indices = list(range(start, end))
elif self.winderz_type == "circular":
window_indices = [(center + offset) % total_frames for offset in range(-half_window, half_window + 1)]
# frame indices to token indices
token_indices = []
for frame in window_indices:
start = frame * img_len
token_indices.extend(range(start, start + img_len))
token_indices = torch.tensor(token_indices, device=q.device)
x = optimized_attention(
q_[:, q_start:q_end, :], # [B, img_len, C]
k_, #.index_select(1, token_indices), # [B, window_size * img_len, C]
v , #.index_select(1, token_indices),
heads=self.num_heads,
)
x_list.append(x)
x = torch.cat(x_list, dim=1)
del x_list, q, k, v, q_, k_
x = self.o(x)
return x
class ReWanSelfAttention(nn.Module):
def __init__(self,
dim,
num_heads,
window_size = (-1, -1),
qk_norm = True,
eps = 1e-6,
operation_settings = {}):
assert dim % num_heads == 0
super().__init__()
self.dim = dim
self.num_heads = num_heads
self.head_dim = dim // num_heads
self.window_size = window_size
self.qk_norm = qk_norm
self.eps = eps
# layers
self.q = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.k = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.v = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.o = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.norm_q = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity()
self.norm_k = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity()
def forward(self, x, freqs, mask=None, grid_sizes=None):
r"""
Args:
x(Tensor): Shape [B, L, num_heads, C / num_heads]
freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2]
"""
b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim
# query, key, value function
def qkv_fn(x):
q = self.norm_q(self.q(x)).view(b, s, n, d)
k = self.norm_k(self.k(x)).view(b, s, n, d)
v = self.v(x).view(b, s, n * d)
return q, k, v
q, k, v = qkv_fn(x)
q, k = apply_rope(q, k, freqs)
# q,k.shape = 2,14040,12,128 v.shape = 2,14040,1536
if mask is not None and mask.shape[-1] > 0:
#dtype = mask.dtype if mask.dtype == torch.bool else q.dtype
#txt_len = mask.shape[1] - mask.shape[0]
x = attention_pytorch(
q.view(b, s, n * d),
k.view(b, s, n * d),
v,
heads=self.num_heads,
mask=mask#[:,txt_len:].to(dtype)
)
else:
x = optimized_attention(
q.view(b, s, n * d),
k.view(b, s, n * d),
v,
heads=self.num_heads,
)
x = self.o(x)
return x
class ReWanT2VRawCrossAttention(ReWanSelfAttention):
def forward(self, x, context, context_clip=None, mask=None, grid_sizes=None):
r"""
Args:
x(Tensor): Shape [B, L1, C]
context(Tensor): Shape [B, L2, C]
"""
# compute query, key, value
q = self.norm_q(self.q(x))
k = self.norm_k(self.k(context))
v = self.v(context)
x = optimized_attention(q, k, v, heads=self.num_heads, mask=None)
x = self.o(x)
return x
class ReWanT2VCrossAttention(ReWanSelfAttention):
def forward(self, x, context, context_clip=None, mask=None, grid_sizes=None):
r"""
Args:
x(Tensor): Shape [B, L1, C]
context(Tensor): Shape [B, L2, C]
"""
# compute query, key, value
q = self.norm_q(self.q(x))
k = self.norm_k(self.k(context))
v = self.v(context)
#if mask is not None:
# num_repeats = q.shape[1] // mask.shape[0]
# mask = mask.repeat(num_repeats, 1)
# compute attention # x.shape 2,14040,1536 q.shape 2,14040,1536 k,v.shape = 2,512,1536 mask = 14040,512 num_heads=12
if mask is not None: # and (mask.shape[-1] - mask.shape[-2]) == k.shape[-2]: # need mask shape 11664,5120
#dtype = mask.dtype if mask.dtype == torch.bool else q.dtype
dtype = torch.bool
x = attention_pytorch(q, k, v, heads=self.num_heads, mask=mask.to(q.device).bool())
#x = attention_pytorch(q, k, v, heads=self.num_heads, mask=mask[:,:k.shape[-2]].to(q.device).bool())
else:
x = optimized_attention(q, k, v, heads=self.num_heads, mask=None)
x = self.o(x)
return x
class ReWanI2VCrossAttention(ReWanSelfAttention): # image2video only
def __init__(self,
dim,
num_heads,
window_size=(-1, -1),
qk_norm=True,
eps=1e-6, operation_settings={}, ):
super().__init__(dim, num_heads, window_size, qk_norm, eps, operation_settings=operation_settings)
self.k_img = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.v_img = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
# self.alpha = nn.Parameter(torch.zeros((1, )))
self.norm_k_img = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity()
def forward(self, x, context, context_clip=None, mask=None, grid_sizes=None):
r"""
Args:
x(Tensor): Shape [B, L1, C]
context(Tensor): Shape [B, L2, C]
"""
"""context_img = context[:, :257]
context = context[:, 257:]
mask_clip = None"""
context_img = context_clip
mask_clip = None
if mask is not None:
mask_clip = F.interpolate(mask[None, None, ...].to(torch.float16), (mask.shape[0], 257 * mask.shape[1]//512), mode='nearest-exact').squeeze().to(mask.dtype)
"""mask_clip = []
for i in range(mask.shape[-1]//512):
mask_clip.append(mask[:,i*512:i*512 + 257])
mask_clip = torch.cat(mask_clip, dim=-1)"""
# compute query, key, value
q = self.norm_q(self.q(x))
k = self.norm_k(self.k(context))
v = self.v(context)
k_img = self.norm_k_img(self.k_img(context_img))
v_img = self.v_img(context_img)
img_x = optimized_attention(q, k_img, v_img, heads=self.num_heads, mask=mask_clip)
# compute attention
x = optimized_attention(q, k, v, heads=self.num_heads, mask=mask)
# output
x = x + img_x
x = self.o(x)
return x
WAN_CROSSATTENTION_CLASSES = {
't2v_cross_attn': ReWanT2VCrossAttention,
'i2v_cross_attn': ReWanI2VCrossAttention,
}
class ReWanAttentionBlock(nn.Module):
def __init__(self,
cross_attn_type,
dim,
ffn_dim,
num_heads,
window_size = (-1, -1),
qk_norm = True,
cross_attn_norm = False,
eps = 1e-6,
operation_settings = {}):
super().__init__()
self.dim = dim
self.ffn_dim = ffn_dim
self.num_heads = num_heads
self.window_size = window_size
self.qk_norm = qk_norm
self.cross_attn_norm = cross_attn_norm
self.eps = eps
# layers
self.norm1 = operation_settings.get("operations").LayerNorm(dim, eps, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.self_attn = ReWanSelfAttention( dim, num_heads, window_size, qk_norm,
eps, operation_settings=operation_settings)
self.norm3 = operation_settings.get("operations").LayerNorm(
dim, eps,
elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if cross_attn_norm else nn.Identity()
self.cross_attn = WAN_CROSSATTENTION_CLASSES[cross_attn_type](
dim,
num_heads,
(-1, -1),
qk_norm,
eps,
operation_settings=operation_settings)
self.norm2 = operation_settings.get("operations").LayerNorm(dim, eps, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.ffn = nn.Sequential(
operation_settings.get("operations").Linear(dim, ffn_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), nn.GELU(approximate='tanh'),
operation_settings.get("operations").Linear(ffn_dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
# modulation
self.modulation = nn.Parameter(torch.empty(1, 6, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
def forward(
self,
x,
e,
freqs,
context,
context_clip=None,
self_mask=None,
cross_mask=None,
grid_sizes = None,
#mask=None,
):
r"""
Args:
x(Tensor): Shape [B, L, C]
e(Tensor): Shape [B, 6, C]
freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2]
"""
# assert e.dtype == torch.float32
e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e).chunk(6, dim=1)
# assert e[0].dtype == torch.float32
# e = tuple with 6 elem, shape = 2,1,1536 # with length = 33 so 9 frames
# self-attention
y = self.self_attn(
self.norm1(x) * (1 + e[1]) + e[0],
freqs,
grid_sizes=grid_sizes,
mask=self_mask) # mask[:,txt_len:])
x = x + y * e[2]
# cross-attention & ffn # x,y.shape 2,14040,1536
x = x + self.cross_attn(self.norm3(x), context, context_clip=context_clip, mask=cross_mask, grid_sizes=grid_sizes,) #mask[:,:txt_len])
#print("before norm2 ", torch.cuda.memory_allocated() / 1024**3)
y = self.ffn(self.norm2(x) * (1 + e[4]) + e[3])
#print("after norm2 ", torch.cuda.memory_allocated() / 1024**3)
x = x + y * e[5]
return x
class Head(nn.Module):
def __init__(self, dim, out_dim, patch_size, eps=1e-6, operation_settings={}):
super().__init__()
self.dim = dim
self.out_dim = out_dim
self.patch_size = patch_size
self.eps = eps
# layers
out_dim = math.prod(patch_size) * out_dim
self.norm = operation_settings.get("operations").LayerNorm(dim, eps, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.head = operation_settings.get("operations").Linear (dim, out_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
# modulation
self.modulation = nn.Parameter(torch.empty(1, 2, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
def forward(self, x, e):
r"""
Args:
x(Tensor): Shape [B, L1, C]
e(Tensor): Shape [B, C]
"""
# assert e.dtype == torch.float32
e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e.unsqueeze(1)).chunk(2, dim=1)
x = (self.head(self.norm(x) * (1 + e[1]) + e[0]))
return x
class MLPProj(torch.nn.Module):
def __init__(self, in_dim, out_dim, operation_settings={}):
super().__init__()
self.proj = torch.nn.Sequential(
operation_settings .get("operations").LayerNorm(in_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), operation_settings.get("operations").Linear(in_dim, in_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")),
torch.nn.GELU(), operation_settings.get("operations").Linear (in_dim, out_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")),
operation_settings .get("operations").LayerNorm(out_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
def forward(self, image_embeds):
clip_extra_context_tokens = self.proj(image_embeds)
return clip_extra_context_tokens
class ReWanModel(torch.nn.Module):
r"""
Wan diffusion backbone supporting both text-to-video and image-to-video.
"""
def __init__(self,
model_type = 't2v',
patch_size = (1, 2, 2),
text_len = 512,
in_dim = 16,
dim = 2048,
ffn_dim = 8192,
freq_dim = 256,
text_dim = 4096,
out_dim = 16,
num_heads = 16,
num_layers = 32,
window_size = (-1, -1),
qk_norm = True,
cross_attn_norm = True,
eps = 1e-6,
image_model = None,
device = None,
dtype = None,
operations = None,
):
r"""
Initialize the diffusion model backbone.
Args:
model_type (`str`, *optional*, defaults to 't2v'):
Model variant - 't2v' (text-to-video) or 'i2v' (image-to-video)
patch_size (`tuple`, *optional*, defaults to (1, 2, 2)):
3D patch dimensions for video embedding (t_patch, h_patch, w_patch)
text_len (`int`, *optional*, defaults to 512):
Fixed length for text embeddings
in_dim (`int`, *optional*, defaults to 16):
Input video channels (C_in)
dim (`int`, *optional*, defaults to 2048):
Hidden dimension of the transformer
ffn_dim (`int`, *optional*, defaults to 8192):
Intermediate dimension in feed-forward network
freq_dim (`int`, *optional*, defaults to 256):
Dimension for sinusoidal time embeddings
text_dim (`int`, *optional*, defaults to 4096):
Input dimension for text embeddings
out_dim (`int`, *optional*, defaults to 16):
Output video channels (C_out)
num_heads (`int`, *optional*, defaults to 16):
Number of attention heads
num_layers (`int`, *optional*, defaults to 32):
Number of transformer blocks
window_size (`tuple`, *optional*, defaults to (-1, -1)):
Window size for local attention (-1 indicates global attention)
qk_norm (`bool`, *optional*, defaults to True):
Enable query/key normalization
cross_attn_norm (`bool`, *optional*, defaults to False):
Enable cross-attention normalization
eps (`float`, *optional*, defaults to 1e-6):
Epsilon value for normalization layers
"""
super().__init__()
self.dtype = dtype
operation_settings = {"operations": operations, "device": device, "dtype": dtype}
assert model_type in ['t2v', 'i2v']
self.model_type = model_type
self.patch_size = patch_size
self.text_len = text_len
self.in_dim = in_dim
self.dim = dim
self.ffn_dim = ffn_dim
self.freq_dim = freq_dim
self.text_dim = text_dim
self.out_dim = out_dim
self.num_heads = num_heads
self.num_layers = num_layers
self.window_size = window_size
self.qk_norm = qk_norm
self.cross_attn_norm = cross_attn_norm
self.eps = eps
# embeddings
self.patch_embedding = operations.Conv3d(
in_dim, dim, kernel_size=patch_size, stride=patch_size, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) #dtype=torch.float32)
self.text_embedding = nn.Sequential(
operations.Linear(text_dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), nn.GELU(approximate='tanh'),
operations.Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
self.time_embedding = nn.Sequential(
operations.Linear(freq_dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), nn.SiLU(), operations.Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
self.time_projection = nn.Sequential(nn.SiLU(), operations.Linear(dim, dim * 6, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
# blocks
cross_attn_type = 't2v_cross_attn' if model_type == 't2v' else 'i2v_cross_attn'
self.blocks = nn.ModuleList([
ReWanAttentionBlock(
cross_attn_type,
dim,
ffn_dim, num_heads,
window_size,
qk_norm,
cross_attn_norm,
eps,
operation_settings=operation_settings)
for _ in range(num_layers)
])
# head
self.head = Head(dim, out_dim, patch_size, eps, operation_settings=operation_settings)
d = dim // num_heads
self.rope_embedder = EmbedND(dim=d, theta=10000.0, axes_dim=[d - 4 * (d // 6), 2 * (d // 6), 2 * (d // 6)])
if model_type == 'i2v':
self.img_emb = MLPProj(1280, dim, operation_settings=operation_settings)
else:
self.img_emb = None
def invert_patch_embedding(self, z: torch.Tensor, original_shape: torch.Size, grid_sizes: Optional[Tuple[int,int,int]] = None) -> torch.Tensor:
import torch.nn.functional as F
B, C_in, D, H, W = original_shape
pD, pH, pW = self.patch_size
sD, sH, sW = pD, pH, pW
if z.ndim == 3:
# [B, S, C_out] -> reshape to [B, C_out, D', H', W']
S = z.shape[1]
if grid_sizes is None:
Dp = D // pD
Hp = H // pH
Wp = W // pW
else:
Dp, Hp, Wp = grid_sizes
C_out = z.shape[2]
z = z.transpose(1, 2).reshape(B, C_out, Dp, Hp, Wp)
else:
B2, C_out, Dp, Hp, Wp = z.shape
assert B2 == B, "Batch size mismatch... ya sharked it."
# kncokout bias
b = self.patch_embedding.bias.view(1, C_out, 1, 1, 1)
z_nobias = z - b
# 2D filter -> pinv
w3 = self.patch_embedding.weight # [C_out, C_in, 1, pH, pW]
w2 = w3.squeeze(2) # [C_out, C_in, pH, pW]
out_ch, in_ch, kH, kW = w2.shape
W_flat = w2.view(out_ch, -1) # [C_out, in_ch*pH*pW]
W_pinv = torch.linalg.pinv(W_flat) # [in_ch*pH*pW, C_out]
# merge depth for 2D unfold wackiness
z2 = z_nobias.permute(0,2,1,3,4).reshape(B*Dp, C_out, Hp, Wp)
# apply pinv ... get patch vectors
z_flat = z2.reshape(B*Dp, C_out, -1) # [B*Dp, C_out, L]
x_patches = W_pinv @ z_flat # [B*Dp, in_ch*pH*pW, L]
# fold -> spatial frames
x2 = F.fold(
x_patches,
output_size=(H, W),
kernel_size=(pH, pW),
stride=(sH, sW)
) # → [B*Dp, C_in, H, W]
# un-merge depth
x2 = x2.reshape(B, Dp, in_ch, H, W) # [B, Dp, C_in, H, W]
x_recon = x2.permute(0,2,1,3,4).contiguous() # [B, C_in, D, H, W]
return x_recon
def forward_orig(
self,
x,
t,
context,
clip_fea = None,
freqs = None,
transformer_options = {},
UNCOND = False,
):
r"""
Forward pass through the diffusion model
Args:
x (Tensor):
List of input video tensors with shape [B, C_in, F, H, W]
t (Tensor):
Diffusion timesteps tensor of shape [B]
context (List[Tensor]):
List of text embeddings each with shape [B, L, C]
seq_len (`int`):
Maximum sequence length for positional encoding
clip_fea (Tensor, *optional*):
CLIP image features for image-to-video mode
y (List[Tensor], *optional*):
Conditional video inputs for image-to-video mode, same shape as x
Returns:
List[Tensor]:
List of denoised video tensors with original input shapes [C_out, F, H / 8, W / 8]
"""
"""trash = x[:,16:,...]
x_slice_flip = torch.cat([x[:,:16,...], torch.flip(trash, dims=[2])], dim=1)
x_slice_flip = self.patch_embedding(x_slice_flip.float()).to(x.dtype)
x = self.patch_embedding(x.float()).to(x.dtype)
x = torch.cat([x[:,:,:9,...], x_slice_flip[:,:,9:,...]], dim=2)"""
"""x1 = self.patch_embedding(x[:,:,:8,...].float()).to(x.dtype)
x_slice = torch.cat([x[:,:16,8:,...], trash[:,:,0:9, ...]], dim=1)
x2 = self.patch_embedding(x_slice.float()).to(x.dtype)
x = torch.cat([x1, x2], dim=2)"""
y0_style_pos = transformer_options.get("y0_style_pos")
y0_style_neg = transformer_options.get("y0_style_neg")
SIGMA = t[0].clone() / 1000
EO = transformer_options.get("ExtraOptions", ExtraOptions(""))
# embeddings
#self.patch_embedding.to(self.time_embedding[0].weight.dtype)
x_orig = x.clone()
#x = self.patch_embedding(x.float()).to(self.time_embedding[0].weight.dtype) #next line to torch.Size([1, 5120, 17, 30, 30]) from 1,36,17,30,30
x = self.patch_embedding(x.float()).to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn
grid_sizes = x.shape[2:]
x = x.flatten(2).transpose(1, 2) # x.shape 1,32400,5120 bfloat16 316.4 MB
# time embeddings
e = self.time_embedding(
sinusoidal_embedding_1d(self.freq_dim, t).to(dtype=x[0].dtype))
e0 = self.time_projection(e).unflatten(1, (6, self.dim)) # e0.shape = 2,6,1536 tiny ( < 0.1 MB)
# context
context = self.text_embedding(context)
context_clip = None
if clip_fea is not None and self.img_emb is not None:
context_clip = self.img_emb(clip_fea) # bs x 257 x dim
#context = torch.concat([context_clip, context], dim=1)
# arguments
kwargs = dict(
e = e0,
freqs = freqs, # 1,32400,1,64,2,2 bfloat16 15.8 MB
context = context, # 1,1536,5120 bfloat16 15.0 MB
context_clip = context_clip,
grid_sizes = grid_sizes)
weight = transformer_options['reg_cond_weight'] if 'reg_cond_weight' in transformer_options else 0.0
floor = transformer_options['reg_cond_floor'] if 'reg_cond_floor' in transformer_options else 0.0
floor = min(floor, weight)
if type(weight) == float or type(weight) == int:
pass
else:
weight = weight.item()
AttnMask = transformer_options.get('AttnMask') # somewhere around here, jumped to 20.6GB
mask = None
if AttnMask is not None and weight > 0:
mask = AttnMask.get(weight=weight) #mask_obj[0](transformer_options, weight.item()) # 32400,33936 bool 1048.6 MB
mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False
if not mask_type_bool:
mask = mask.to(x.dtype)
#text_len = context.shape[1] # mask_obj[0].text_len
#mask[text_len:,text_len:] = torch.clamp(mask[text_len:,text_len:], min=floor.to(mask.device)) #ORIGINAL SELF-ATTN REGION BLEED
#reg_cond_mask = reg_cond_mask_expanded.unsqueeze(0).clone() if reg_cond_mask_expanded is not None else None
mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False
txt_len = context.shape[1] # mask_obj[0].text_len
#txt_len = mask.shape[-1] - mask.shape[-2] if mask is not None else "Unlogic Condition" #what's the point of this?
#self_attn_mask = mask[:, txt_len:]
#cross_attn_mask = mask[:,:txt_len ].bool()
#i = 0
#for block in self.blocks:
for i, block in enumerate(self.blocks):
if mask_type_bool and weight < (i / (len(self.blocks)-1)) and mask is not None:
mask = mask.to(x.dtype)
#if mask_type_bool and weight < (i / (len(self.blocks)-1)) and mask is not None:
# mask = mask.to(x.dtype)
if mask is not None:
#if True:
# x = block(x, self_mask=None, cross_mask=mask.bool(), **kwargs)
if mask_type_bool and floor < 0 and (i / (len(self.blocks)-1)) < (-floor): # use self-attn mask until block number
x = block(x, self_mask=mask[:,txt_len:], cross_mask=mask[:,:txt_len].bool(), **kwargs)
elif mask_type_bool and floor > 0 and floor < (i / (len(self.blocks)-1)): # use self-attn mask after block number
x = block(x, self_mask=mask[:,txt_len:], cross_mask=mask[:,:txt_len].bool(), **kwargs)
#x = block(x, self_mask=None, cross_mask=mask[:,:txt_len].bool(), **kwargs)
elif floor == 0:
x = block(x, self_mask=mask[:,txt_len:], cross_mask=mask[:,:txt_len].bool(), **kwargs)
else:
#x = block(x, self_mask=mask[:,txt_len:], cross_mask=mask[:,:txt_len].bool(), **kwargs)
x = block(x, self_mask=None, cross_mask=mask[:,:txt_len].bool(), **kwargs)
else:
x = block(x, **kwargs)
#x = block(x, mask=mask, **kwargs)
#i += 1
# head
x = self.head(x, e)
# unpatchify
eps = self.unpatchify(x, grid_sizes)
dtype = eps.dtype if self.style_dtype is None else self.style_dtype
pinv_dtype = torch.float32 if dtype != torch.float64 else dtype
W_inv = None
#if eps.shape[0] == 2 or (eps.shape[0] == 1 and not UNCOND):
if y0_style_pos is not None:
y0_style_pos_weight = transformer_options.get("y0_style_pos_weight")
y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight")
y0_style_pos_synweight *= y0_style_pos_weight
y0_style_pos = y0_style_pos.to(torch.float32)
x = x_orig.clone().to(torch.float32)
eps = eps.to(torch.float32)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
img = comfy.ldm.common_dit.pad_to_patch_size(denoised, self.patch_size)
patch_size = self.patch_size
denoised_embed = self.patch_embedding(img.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn
grid_sizes = denoised_embed.shape[2:]
denoised_embed = denoised_embed.flatten(2).transpose(1, 2)
img_y0_adain = comfy.ldm.common_dit.pad_to_patch_size(y0_style_pos, self.patch_size)
patch_size = self.patch_size
y0_adain_embed = self.patch_embedding(img_y0_adain.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn
grid_sizes = y0_adain_embed.shape[2:]
y0_adain_embed = y0_adain_embed.flatten(2).transpose(1, 2)
if transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
#denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype)
denoised_embed = self.invert_patch_embedding(denoised_embed, x_orig.shape, grid_sizes)
denoised_embed = self.patch_embedding(denoised_embed.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn
grid_sizes = denoised_embed.shape[2:]
denoised_embed = denoised_embed.flatten(2).transpose(1, 2)
#denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img)
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
self.y0_adain_embed = y0_adain_embed
f_s = y0_adain_embed[0].clone()
self.mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - self.mu_s
cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values
whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T
self.y0_color = whiten.to(f_s_centered)
for wct_i in range(eps.shape[0]):
f_c = denoised_embed[wct_i].clone()
mu_c = f_c.mean(dim=0, keepdim=True)
f_c_centered = f_c - mu_c
cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
inv_sqrt_eig = S_eig.clamp(min=0).rsqrt()
whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T
whiten = whiten.to(f_c_centered)
f_c_whitened = f_c_centered @ whiten.T
f_cs = f_c_whitened @ self.y0_color.T + self.mu_s
denoised_embed[wct_i] = f_cs
denoised_approx = self.invert_patch_embedding(denoised_embed, x_orig.shape, grid_sizes)
denoised_approx = denoised_approx.to(eps)
eps = (x - denoised_approx) / sigma
#if eps.shape[0] == 2:
# eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
# eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
#else:
# eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
if not UNCOND:
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1])
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
else:
eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0])
elif eps.shape[0] == 1 and UNCOND:
eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
#if eps.shape[0] == 2 or (eps.shape[0] == 1 and UNCOND):
if y0_style_neg is not None:
y0_style_neg_weight = transformer_options.get("y0_style_neg_weight")
y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight")
y0_style_neg_synweight *= y0_style_neg_weight
y0_style_neg = y0_style_neg.to(torch.float32)
x = x_orig.clone().to(torch.float32)
eps = eps.to(torch.float32)
eps_orig = eps.clone()
sigma = SIGMA #t_orig[0].to(torch.float32) / 1000
denoised = x - sigma * eps
img = comfy.ldm.common_dit.pad_to_patch_size(denoised, self.patch_size)
patch_size = self.patch_size
denoised_embed = self.patch_embedding(img.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn
grid_sizes = denoised_embed.shape[2:]
denoised_embed = denoised_embed.flatten(2).transpose(1, 2)
img_y0_adain = comfy.ldm.common_dit.pad_to_patch_size(y0_style_neg, self.patch_size)
patch_size = self.patch_size
y0_adain_embed = self.patch_embedding(img_y0_adain.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn
grid_sizes = y0_adain_embed.shape[2:]
y0_adain_embed = y0_adain_embed.flatten(2).transpose(1, 2)
if transformer_options['y0_style_method'] == "AdaIN":
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
for adain_iter in range(EO("style_iter", 0)):
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
#denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype)
denoised_embed = self.invert_patch_embedding(denoised_embed, x_orig.shape, grid_sizes)
denoised_embed = self.patch_embedding(denoised_embed.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn
grid_sizes = denoised_embed.shape[2:]
denoised_embed = denoised_embed.flatten(2).transpose(1, 2)
#denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img)
denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)
elif transformer_options['y0_style_method'] == "WCT":
if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0:
self.y0_adain_embed = y0_adain_embed
f_s = y0_adain_embed[0].clone()
self.mu_s = f_s.mean(dim=0, keepdim=True)
f_s_centered = f_s - self.mu_s
cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values
whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T
self.y0_color = whiten.to(f_s_centered)
for wct_i in range(eps.shape[0]):
f_c = denoised_embed[wct_i].clone()
mu_c = f_c.mean(dim=0, keepdim=True)
f_c_centered = f_c - mu_c
cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1)
S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device))
inv_sqrt_eig = S_eig.clamp(min=0).rsqrt()
whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T
whiten = whiten.to(f_c_centered)
f_c_whitened = f_c_centered @ whiten.T
f_cs = f_c_whitened @ self.y0_color.T + self.mu_s
denoised_embed[wct_i] = f_cs
denoised_approx = self.invert_patch_embedding(denoised_embed, x_orig.shape, grid_sizes)
denoised_approx = denoised_approx.to(eps)
#eps = (x - denoised_approx) / sigma
#eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
#if eps.shape[0] == 2:
# eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
if UNCOND:
eps = (x - denoised_approx) / sigma
eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0])
if eps.shape[0] == 2:
eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1])
elif eps.shape[0] == 1 and not UNCOND:
eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0])
eps = eps.float()
return eps
# context.shape = 2,512,1536 x.shape = 2,14040,1536 timestep.shape h_len=30, w_len=52 30 * 52 = 1560
def forward(self, x, timestep, context, clip_fea=None, transformer_options={}, **kwargs):
"""if False: #clip_fea is not None:
bs, c, t, h, w = x.shape
x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size)
patch_size = self.patch_size # tuple = 1,2,2,
t_len = ((t + (patch_size[0] // 2)) // patch_size[0])
h_len = ((h + (patch_size[1] // 2)) // patch_size[1])
w_len = ((w + (patch_size[2] // 2)) // patch_size[2])
img_ids = torch.zeros((t_len, h_len, w_len, 3), device=x.device, dtype=x.dtype)
img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(0, t_len - 1, steps=t_len, device=x.device, dtype=x.dtype).reshape(-1, 1, 1)
img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).reshape(1, -1, 1)
img_ids[:, :, :, 2] = img_ids[:, :, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).reshape(1, 1, -1)
img_ids = repeat(img_ids, "t h w c -> b (t h w) c", b=bs)
# 14040 = 9 * 1560 1560 = 1536 + 24 1560/24 = 65
freqs = self.rope_embedder(img_ids).movedim(1, 2)
return self.forward_orig(x, timestep, context, clip_fea=clip_fea, freqs=freqs)[:, :, :t, :h, :w]"""
#x = torch.cat([x[:,:,:8,...], torch.flip(x[:,:,8:,...], dims=[2])], dim=2)
x_orig = x.clone() # 1,16,36,60,60 bfloat16
timestep_orig = timestep.clone() # 1000 float32
context_orig = context.clone() # 1,512,4096 bfloat16
out_list = []
for i in range(len(transformer_options['cond_or_uncond'])):
UNCOND = transformer_options['cond_or_uncond'][i] == 1
x = x_orig.clone()
timestep = timestep_orig.clone()
context = context_orig.clone()
bs, c, t, h, w = x.shape
x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size)
patch_size = self.patch_size
transformer_options['original_shape'] = x.shape
transformer_options['patch_size'] = patch_size
"""if UNCOND:
transformer_options['reg_cond_weight'] = 0.0 # -1
context_tmp = context[i][None,...].clone()"""
if UNCOND:
#transformer_options['reg_cond_weight'] = -1
#context_tmp = context[i][None,...].clone()
transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0) #transformer_options['regional_conditioning_weight']
transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0) #transformer_options['regional_conditioning_floor'] #if "regional_conditioning_floor" in transformer_options else 0.0
transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig')
AttnMask = transformer_options.get('AttnMask', None)
RegContext = transformer_options.get('RegContext', None)
if AttnMask is not None and transformer_options['reg_cond_weight'] != 0.0:
AttnMask.attn_mask_recast(x.dtype)
context_tmp = RegContext.get().to(context.dtype)
clip_fea = RegContext.get_clip_fea()
clip_fea = clip_fea.to(x.dtype) if clip_fea else None
A = context[i][None,...].clone()
B = context_tmp
context_tmp = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :]
else:
context_tmp = context[i][None,...].clone()
elif UNCOND == False:
transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0) #transformer_options['regional_conditioning_weight']
transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0) #transformer_options['regional_conditioning_floor'] #if "regional_conditioning_floor" in transformer_options else 0.0
transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig')
AttnMask = transformer_options.get('AttnMask', None)
RegContext = transformer_options.get('RegContext', None)
if AttnMask is not None and transformer_options['reg_cond_weight'] != 0.0:
AttnMask.attn_mask_recast(x.dtype)
context_tmp = RegContext.get()
clip_fea = RegContext.get_clip_fea()
clip_fea = clip_fea.to(x.dtype) if clip_fea else None
else:
context_tmp = context[i][None,...].clone()
if context_tmp is None:
context_tmp = context[i][None,...].clone()
context_tmp = context_tmp.to(context.dtype)
t_len = ((t + (patch_size[0] // 2)) // patch_size[0])
h_len = ((h + (patch_size[1] // 2)) // patch_size[1])
w_len = ((w + (patch_size[2] // 2)) // patch_size[2])
img_ids = torch.zeros((t_len, h_len, w_len, 3), device=x.device, dtype=x.dtype)
img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(0, t_len - 1, steps=t_len, device=x.device, dtype=x.dtype).reshape(-1, 1, 1)
img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).reshape(1, -1, 1)
img_ids[:, :, :, 2] = img_ids[:, :, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).reshape(1, 1, -1)
img_ids = repeat(img_ids, "t h w c -> b (t h w) c", b=bs)
# 14040 = 9 * 1560 1560 = 1536 + 24 1560/24 = 65
freqs = self.rope_embedder(img_ids).movedim(1, 2).to(x.dtype)
out_x = self.forward_orig(
x [i][None,...],
timestep [i][None,...],
context_tmp,
clip_fea = clip_fea,
freqs = freqs[i][None,...],
transformer_options = transformer_options,
UNCOND = UNCOND,
)[:, :, :t, :h, :w]
#out_x = torch.cat([out_x[:,:,:8,...], torch.flip(out_x[:,:,8:,...], dims=[2])], dim=2)
out_list.append(out_x)
out_stack = torch.stack(out_list, dim=0).squeeze(dim=1)
return out_stack
def unpatchify(self, x, grid_sizes):
r"""
Reconstruct video tensors from patch embeddings.
Args:
x (List[Tensor]):
List of patchified features, each with shape [L, C_out * prod(patch_size)]
grid_sizes (Tensor):
Original spatial-temporal grid dimensions before patching,
shape [B, 3] (3 dimensions correspond to F_patches, H_patches, W_patches)
Returns:
List[Tensor]:
Reconstructed video tensors with shape [L, C_out, F, H / 8, W / 8]
"""
c = self.out_dim
u = x
b = u.shape[0]
u = u[:, :math.prod(grid_sizes)].view(b, *grid_sizes, *self.patch_size, c)
u = torch.einsum('bfhwpqrc->bcfphqwr', u)
u = u.reshape(b, c, *[i * j for i, j in zip(grid_sizes, self.patch_size)])
return u
def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
mean_c = content.mean(1, keepdim=True)
std_c = content.std (1, keepdim=True).add_(eps)
mean_s = style.mean (1, keepdim=True)
std_s = style.std (1, keepdim=True).add_(eps)
content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s)
return content
================================================
FILE: wan/vae.py
================================================
# original version: https://github.com/Wan-Video/Wan2.1/blob/main/wan/modules/vae.py
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange
from comfy.ldm.modules.diffusionmodules.model import vae_attention
import comfy.ops
ops = comfy.ops.disable_weight_init
CACHE_T = 2
class CausalConv3d(ops.Conv3d):
"""
Causal 3d convolusion.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._padding = (self.padding[2], self.padding[2], self.padding[1],
self.padding[1], 2 * self.padding[0], 0)
self.padding = (0, 0, 0)
def forward(self, x, cache_x=None):
padding = list(self._padding)
if cache_x is not None and self._padding[4] > 0:
cache_x = cache_x.to(x.device)
x = torch.cat([cache_x, x], dim=2)
padding[4] -= cache_x.shape[2]
x = F.pad(x, padding)
return super().forward(x)
class RMS_norm(nn.Module):
def __init__(self, dim, channel_first=True, images=True, bias=False):
super().__init__()
broadcastable_dims = (1, 1, 1) if not images else (1, 1)
shape = (dim, *broadcastable_dims) if channel_first else (dim,)
self.channel_first = channel_first
self.scale = dim**0.5
self.gamma = nn.Parameter(torch.ones(shape))
self.bias = nn.Parameter(torch.zeros(shape)) if bias else None
def forward(self, x):
return F.normalize(
x, dim=(1 if self.channel_first else -1)) * self.scale * self.gamma.to(x) + (self.bias.to(x) if self.bias is not None else 0)
class Upsample(nn.Upsample):
def forward(self, x):
"""
Fix bfloat16 support for nearest neighbor interpolation.
"""
return super().forward(x.float()).type_as(x)
class Resample(nn.Module):
def __init__(self, dim, mode):
assert mode in ('none', 'upsample2d', 'upsample3d', 'downsample2d',
'downsample3d')
super().__init__()
self.dim = dim
self.mode = mode
# layers
if mode == 'upsample2d':
self.resample = nn.Sequential(
Upsample(scale_factor=(2., 2.), mode='nearest-exact'),
ops.Conv2d(dim, dim // 2, 3, padding=1))
elif mode == 'upsample3d':
self.resample = nn.Sequential(
Upsample(scale_factor=(2., 2.), mode='nearest-exact'),
ops.Conv2d(dim, dim // 2, 3, padding=1))
self.time_conv = CausalConv3d(
dim, dim * 2, (3, 1, 1), padding=(1, 0, 0))
elif mode == 'downsample2d':
self.resample = nn.Sequential(
nn.ZeroPad2d((0, 1, 0, 1)),
ops.Conv2d(dim, dim, 3, stride=(2, 2)))
elif mode == 'downsample3d':
self.resample = nn.Sequential(
nn.ZeroPad2d((0, 1, 0, 1)),
ops.Conv2d(dim, dim, 3, stride=(2, 2)))
self.time_conv = CausalConv3d(
dim, dim, (3, 1, 1), stride=(2, 1, 1), padding=(0, 0, 0))
else:
self.resample = nn.Identity()
def forward(self, x, feat_cache=None, feat_idx=[0]):
b, c, t, h, w = x.size()
if self.mode == 'upsample3d':
if feat_cache is not None:
idx = feat_idx[0]
if feat_cache[idx] is None:
feat_cache[idx] = 'Rep'
feat_idx[0] += 1
else:
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[
idx] is not None and feat_cache[idx] != 'Rep':
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
if cache_x.shape[2] < 2 and feat_cache[
idx] is not None and feat_cache[idx] == 'Rep':
cache_x = torch.cat([
torch.zeros_like(cache_x).to(cache_x.device),
cache_x
],
dim=2)
if feat_cache[idx] == 'Rep':
x = self.time_conv(x)
else:
x = self.time_conv(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
x = x.reshape(b, 2, c, t, h, w)
x = torch.stack((x[:, 0, :, :, :, :], x[:, 1, :, :, :, :]),
3)
x = x.reshape(b, c, t * 2, h, w)
t = x.shape[2]
x = rearrange(x, 'b c t h w -> (b t) c h w')
x = self.resample(x)
x = rearrange(x, '(b t) c h w -> b c t h w', t=t)
if self.mode == 'downsample3d':
if feat_cache is not None:
idx = feat_idx[0]
if feat_cache[idx] is None:
feat_cache[idx] = x.clone()
feat_idx[0] += 1
else:
cache_x = x[:, :, -1:, :, :].clone()
# if cache_x.shape[2] < 2 and feat_cache[idx] is not None and feat_cache[idx]!='Rep':
# # cache last frame of last two chunk
# cache_x = torch.cat([feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(cache_x.device), cache_x], dim=2)
x = self.time_conv(
torch.cat([feat_cache[idx][:, :, -1:, :, :], x], 2))
feat_cache[idx] = cache_x
feat_idx[0] += 1
return x
def init_weight(self, conv):
conv_weight = conv.weight
nn.init.zeros_(conv_weight)
c1, c2, t, h, w = conv_weight.size()
one_matrix = torch.eye(c1, c2)
init_matrix = one_matrix
nn.init.zeros_(conv_weight)
#conv_weight.data[:,:,-1,1,1] = init_matrix * 0.5
conv_weight.data[:, :, 1, 0, 0] = init_matrix #* 0.5
conv.weight.data.copy_(conv_weight)
nn.init.zeros_(conv.bias.data)
def init_weight2(self, conv):
conv_weight = conv.weight.data
nn.init.zeros_(conv_weight)
c1, c2, t, h, w = conv_weight.size()
init_matrix = torch.eye(c1 // 2, c2)
#init_matrix = repeat(init_matrix, 'o ... -> (o 2) ...').permute(1,0,2).contiguous().reshape(c1,c2)
conv_weight[:c1 // 2, :, -1, 0, 0] = init_matrix
conv_weight[c1 // 2:, :, -1, 0, 0] = init_matrix
conv.weight.data.copy_(conv_weight)
nn.init.zeros_(conv.bias.data)
class ResidualBlock(nn.Module):
def __init__(self, in_dim, out_dim, dropout=0.0):
super().__init__()
self.in_dim = in_dim
self.out_dim = out_dim
# layers
self.residual = nn.Sequential(
RMS_norm(in_dim, images=False), nn.SiLU(),
CausalConv3d(in_dim, out_dim, 3, padding=1),
RMS_norm(out_dim, images=False), nn.SiLU(), nn.Dropout(dropout),
CausalConv3d(out_dim, out_dim, 3, padding=1))
self.shortcut = CausalConv3d(in_dim, out_dim, 1) \
if in_dim != out_dim else nn.Identity()
def forward(self, x, feat_cache=None, feat_idx=[0]):
h = self.shortcut(x)
for layer in self.residual:
if isinstance(layer, CausalConv3d) and feat_cache is not None:
idx = feat_idx[0]
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
x = layer(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
else:
x = layer(x)
return x + h
class AttentionBlock(nn.Module):
"""
Causal self-attention with a single head.
"""
def __init__(self, dim):
super().__init__()
self.dim = dim
# layers
self.norm = RMS_norm(dim)
self.to_qkv = ops.Conv2d(dim, dim * 3, 1)
self.proj = ops.Conv2d(dim, dim, 1)
self.optimized_attention = vae_attention()
def forward(self, x):
identity = x
b, c, t, h, w = x.size()
x = rearrange(x, 'b c t h w -> (b t) c h w')
x = self.norm(x)
# compute query, key, value
q, k, v = self.to_qkv(x).chunk(3, dim=1)
x = self.optimized_attention(q, k, v)
# output
x = self.proj(x)
x = rearrange(x, '(b t) c h w-> b c t h w', t=t)
return x + identity
class Encoder3d(nn.Module):
def __init__(self,
dim=128,
z_dim=4,
dim_mult=[1, 2, 4, 4],
num_res_blocks=2,
attn_scales=[],
temperal_downsample=[True, True, False],
dropout=0.0):
super().__init__()
self.dim = dim
self.z_dim = z_dim
self.dim_mult = dim_mult
self.num_res_blocks = num_res_blocks
self.attn_scales = attn_scales
self.temperal_downsample = temperal_downsample
# dimensions
dims = [dim * u for u in [1] + dim_mult]
scale = 1.0
# init block
self.conv1 = CausalConv3d(3, dims[0], 3, padding=1)
# downsample blocks
downsamples = []
for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])):
# residual (+attention) blocks
for _ in range(num_res_blocks):
downsamples.append(ResidualBlock(in_dim, out_dim, dropout))
if scale in attn_scales:
downsamples.append(AttentionBlock(out_dim))
in_dim = out_dim
# downsample block
if i != len(dim_mult) - 1:
mode = 'downsample3d' if temperal_downsample[
i] else 'downsample2d'
downsamples.append(Resample(out_dim, mode=mode))
scale /= 2.0
self.downsamples = nn.Sequential(*downsamples)
# middle blocks
self.middle = nn.Sequential(
ResidualBlock(out_dim, out_dim, dropout), AttentionBlock(out_dim),
ResidualBlock(out_dim, out_dim, dropout))
# output blocks
self.head = nn.Sequential(
RMS_norm(out_dim, images=False), nn.SiLU(),
CausalConv3d(out_dim, z_dim, 3, padding=1))
def forward(self, x, feat_cache=None, feat_idx=[0]):
if feat_cache is not None:
idx = feat_idx[0]
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
x = self.conv1(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
else:
x = self.conv1(x)
## downsamples
for layer in self.downsamples:
if feat_cache is not None:
x = layer(x, feat_cache, feat_idx)
else:
x = layer(x)
## middle
for layer in self.middle:
if isinstance(layer, ResidualBlock) and feat_cache is not None:
x = layer(x, feat_cache, feat_idx)
else:
x = layer(x)
## head
for layer in self.head:
if isinstance(layer, CausalConv3d) and feat_cache is not None:
idx = feat_idx[0]
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
x = layer(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
else:
x = layer(x)
return x
class Decoder3d(nn.Module):
def __init__(self,
dim=128,
z_dim=4,
dim_mult=[1, 2, 4, 4],
num_res_blocks=2,
attn_scales=[],
temperal_upsample=[False, True, True],
dropout=0.0):
super().__init__()
self.dim = dim
self.z_dim = z_dim
self.dim_mult = dim_mult
self.num_res_blocks = num_res_blocks
self.attn_scales = attn_scales
self.temperal_upsample = temperal_upsample
# dimensions
dims = [dim * u for u in [dim_mult[-1]] + dim_mult[::-1]]
scale = 1.0 / 2**(len(dim_mult) - 2)
# init block
self.conv1 = CausalConv3d(z_dim, dims[0], 3, padding=1)
# middle blocks
self.middle = nn.Sequential(
ResidualBlock(dims[0], dims[0], dropout), AttentionBlock(dims[0]),
ResidualBlock(dims[0], dims[0], dropout))
# upsample blocks
upsamples = []
for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])):
# residual (+attention) blocks
if i == 1 or i == 2 or i == 3:
in_dim = in_dim // 2
for _ in range(num_res_blocks + 1):
upsamples.append(ResidualBlock(in_dim, out_dim, dropout))
if scale in attn_scales:
upsamples.append(AttentionBlock(out_dim))
in_dim = out_dim
# upsample block
if i != len(dim_mult) - 1:
mode = 'upsample3d' if temperal_upsample[i] else 'upsample2d'
upsamples.append(Resample(out_dim, mode=mode))
scale *= 2.0
self.upsamples = nn.Sequential(*upsamples)
# output blocks
self.head = nn.Sequential(
RMS_norm(out_dim, images=False), nn.SiLU(),
CausalConv3d(out_dim, 3, 3, padding=1))
def forward(self, x, feat_cache=None, feat_idx=[0]):
## conv1
if feat_cache is not None:
idx = feat_idx[0]
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
x = self.conv1(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
else:
x = self.conv1(x)
## middle
for layer in self.middle:
if isinstance(layer, ResidualBlock) and feat_cache is not None:
x = layer(x, feat_cache, feat_idx)
else:
x = layer(x)
## upsamples
for layer in self.upsamples:
if feat_cache is not None:
x = layer(x, feat_cache, feat_idx)
else:
x = layer(x)
## head
for layer in self.head:
if isinstance(layer, CausalConv3d) and feat_cache is not None:
idx = feat_idx[0]
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
x = layer(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
else:
x = layer(x)
return x
def count_conv3d(model):
count = 0
for m in model.modules():
if isinstance(m, CausalConv3d):
count += 1
return count
class WanVAE(nn.Module):
def __init__(self,
dim=128,
z_dim=4,
dim_mult=[1, 2, 4, 4],
num_res_blocks=2,
attn_scales=[],
temperal_downsample=[True, True, False],
dropout=0.0):
super().__init__()
self.dim = dim
self.z_dim = z_dim
self.dim_mult = dim_mult
self.num_res_blocks = num_res_blocks
self.attn_scales = attn_scales
self.temperal_downsample = temperal_downsample
self.temperal_upsample = temperal_downsample[::-1]
# modules
self.encoder = Encoder3d(dim, z_dim * 2, dim_mult, num_res_blocks,
attn_scales, self.temperal_downsample, dropout)
self.conv1 = CausalConv3d(z_dim * 2, z_dim * 2, 1)
self.conv2 = CausalConv3d(z_dim, z_dim, 1)
self.decoder = Decoder3d(dim, z_dim, dim_mult, num_res_blocks,
attn_scales, self.temperal_upsample, dropout)
def forward(self, x):
mu, log_var = self.encode(x)
z = self.reparameterize(mu, log_var)
x_recon = self.decode(z)
return x_recon, mu, log_var
def encode(self, x):
self.clear_cache()
## cache
t = x.shape[2]
iter_ = 1 + (t - 1) // 4
## 对encode输入的x,按时间拆分为1、4、4、4....
for i in range(iter_):
self._enc_conv_idx = [0]
if i == 0:
out = self.encoder(
x[:, :, :1, :, :],
feat_cache=self._enc_feat_map,
feat_idx=self._enc_conv_idx)
else:
out_ = self.encoder(
x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :],
feat_cache=self._enc_feat_map,
feat_idx=self._enc_conv_idx)
out = torch.cat([out, out_], 2)
mu, log_var = self.conv1(out).chunk(2, dim=1)
self.clear_cache()
return mu
def decode(self, z):
self.clear_cache()
# z: [b,c,t,h,w]
iter_ = z.shape[2]
x = self.conv2(z)
for i in range(iter_):
self._conv_idx = [0]
if i == 0:
out = self.decoder(
x[:, :, i:i + 1, :, :],
feat_cache=self._feat_map,
feat_idx=self._conv_idx)
else:
out_ = self.decoder(
x[:, :, i:i + 1, :, :],
feat_cache=self._feat_map,
feat_idx=self._conv_idx)
out = torch.cat([out, out_], 2)
self.clear_cache()
return out
def reparameterize(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps * std + mu
def sample(self, imgs, deterministic=False):
mu, log_var = self.encode(imgs)
if deterministic:
return mu
std = torch.exp(0.5 * log_var.clamp(-30.0, 20.0))
return mu + std * torch.randn_like(std)
def clear_cache(self):
self._conv_num = count_conv3d(self.decoder)
self._conv_idx = [0]
self._feat_map = [None] * self._conv_num
#cache encode
self._enc_conv_num = count_conv3d(self.encoder)
self._enc_conv_idx = [0]
self._enc_feat_map = [None] * self._enc_conv_num
================================================
FILE: web/js/RES4LYF_dynamicWidgets.js
================================================
import { app } from "../../scripts/app.js";
import { ComfyWidgets } from "../../scripts/widgets.js";
let RESDEBUG = false;
let TOP_CLOWNDOG = true;
let DISPLAY_CATEGORY = true;
let nodeCounter = 1;
const processedNodeMap = new WeakMap();
const originalGetNodeTypesCategories = typeof LiteGraph.getNodeTypesCategories === 'function' ? LiteGraph.getNodeTypesCategories : null;
// Override the getNodeTypesCategories method if it exists
if (originalGetNodeTypesCategories) {
LiteGraph.getNodeTypesCategories = function(filter) {
if (TOP_CLOWNDOG == false) {
return originalGetNodeTypesCategories.call(this, filter);
}
try {
// Get the original categories
const categories = originalGetNodeTypesCategories.call(this, filter);
categories.sort((a, b) => {
const isARes4Lyf = a.startsWith("RES4LYF");
const isBRes4Lyf = b.startsWith("RES4LYF");
if (isARes4Lyf && !isBRes4Lyf) return -1;
if (!isARes4Lyf && isBRes4Lyf) return 1;
// Do the other auto sorting if enabled
if (LiteGraph.auto_sort_node_types) {
return a.localeCompare(b);
}
return 0;
});
return categories;
} catch (error) {
return originalGetNodeTypesCategories.call(this, filter);
}
};
}
function debugLog(...args) {
let force = false;
if (typeof args[args.length - 1] === "boolean") {
force = args.pop();
}
if (RESDEBUG || force) {
console.log(...args);
// Attempt to post the log text to the Python backend
const logText = args.join(' ');
fetch('/reslyf/log', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ log: logText })
}).catch(error => {
console.error('Error posting log to backend:', error);
});
}
}
const resDebugLog = debugLog;
// Adapted from essentials.DisplayAny from ComfyUI_essentials
app.registerExtension({
name: "Comfy.RES4LYF.DisplayInfo",
async beforeRegisterNodeDef(nodeType, nodeData, app) {
if (!nodeData?.category?.startsWith("RES4LYF")) {
return;
}
if (nodeData.name === "Latent Display State Info") {
const onExecuted = nodeType.prototype.onExecuted;
nodeType.prototype.onExecuted = function (message) {
onExecuted?.apply(this, arguments);
if (this.widgets && this.widgets.length === 0) {
for (let i = 1; i < this.widgets.length; i++) {
this.widgets[i].onRemove?.();
}
this.widgets.length = 0;
}
// Check if the "text" widget already exists.
let textWidget = this.widgets && this.widgets.length > 0 && this.widgets.find(w => w.name === "displaytext");
if (!textWidget) {
textWidget = ComfyWidgets["STRING"](this, "displaytext", ["STRING", { multiline: true }], app).widget;
textWidget.inputEl.readOnly = true;
textWidget.inputEl.style.border = "none";
textWidget.inputEl.style.backgroundColor = "transparent";
}
textWidget.value = message["text"].join("");
};
}
},
});
app.registerExtension({
name: "Comfy.RES4LYF.DynamicWidgets",
async setup(app) {
app.ui.settings.addSetting({
id: "RES4LYF.topClownDog",
name: "RES4LYF: Top ClownDog",
defaultValue: true,
type: "boolean",
options: [
{ value: true, text: "On" },
{ value: false, text: "Off" },
],
onChange: (value) => {
TOP_CLOWNDOG = value;
debugLog(`Top ClownDog ${value ? "enabled" : "disabled"}`);
// Send to backend
fetch('/reslyf/settings', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
setting: "topClownDog",
value: value
})
}).catch(error => {
debugLog(`Error updating topClownDog setting: ${error}`);
});
},
});
app.ui.settings.addSetting({
id: "RES4LYF.enableDebugLogs",
name: "RES4LYF: Enable debug logging to console",
defaultValue: false,
type: "boolean",
options: [
{ value: true, text: "On" },
{ value: false, text: "Off" },
],
onChange: (value) => {
RESDEBUG = value;
debugLog(`Debug logging ${value ? "enabled" : "disabled"}`);
// Send to backend
fetch('/reslyf/settings', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
setting: "enableDebugLogs",
value: value
})
}).catch(error => {
debugLog(`Error updating enableDebugLogs setting: ${error}`);
});
},
});
app.ui.settings.addSetting({
id: "RES4LYF.displayCategory",
name: "RES4LYF: Display Category in Sampler Names (requires browser refresh)",
defaultValue: true,
type: "boolean",
options: [
{ value: true, text: "On" },
{ value: false, text: "Off" },
],
onChange: (value) => {
DISPLAY_CATEGORY = value;
resDebugLog(`Display Category ${value ? "enabled" : "disabled"}`);
// Send to backend
fetch('/reslyf/settings', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
setting: "displayCategory",
value: value
})
}).catch(error => {
resDebugLog(`Error updating displayCategory setting: ${error}`);
});
},
});
},
nodeCreated(node) {
if (NODES_WITH_EXPANDABLE_OPTIONS.includes(node.comfyClass)) {
//debugLog(`Setting up expandable options for ${node.comfyClass}`, true);
setupExpandableOptions(node);
}
}
});
const NODES_WITH_EXPANDABLE_OPTIONS = [
"ClownsharKSampler_Beta",
"ClownsharkChainsampler_Beta",
"SharkChainsampler_Beta",
"ClownSampler_Beta",
"ClownSamplerAdvanced_Beta",
"SharkSampler",
"SharkSampler_Beta",
"SharkSamplerAdvanced_Beta",
"ClownOptions_Combine",
]
function setupExpandableOptions(node) {
if (!processedNodeMap.has(node)) {
processedNodeMap.set(node, ++nodeCounter);
//debugLog(`Assigned ID ${nodeCounter} to node ${node.comfyClass}`);
} else {
//debugLog(`Node ${node.comfyClass} already processed with ID ${processedNodeMap.get(node)} - skipping`);
return;
}
const originalOnConnectionsChange = node.onConnectionsChange;
const hasOptionsInput = node.inputs.some(input => input.name === "options");
if (!hasOptionsInput) {
//debugLog(`Node ${node.comfyClass} doesn't have an options input - skipping`);
return;
}
node.onConnectionsChange = function(type, index, connected, link_info) {
if (originalOnConnectionsChange) {
originalOnConnectionsChange.call(this, type, index, connected, link_info);
}
if (type === LiteGraph.INPUT && !connected) {
const input = this.inputs[index];
if (!input || !input.name.startsWith("options")) {
return;
}
//debugLog(`Options input disconnected: ${input.name}`);
// setTimeout to let the graph update first
setTimeout(() => {
cleanupOptionsInputs(this);
}, 100);
return;
}
if (type === LiteGraph.INPUT && connected && link_info) {
const input = this.inputs[index];
if (!input || !input.name.startsWith("options")) {
return;
}
let hasEmptyOptions = false;
for (let i = 0; i < this.inputs.length; i++) {
const input = this.inputs[i];
if (input.name.startsWith("options") && input.link === null) {
hasEmptyOptions = true;
break;
}
}
if (!hasEmptyOptions) {
//debugLog(`All options inputs are connected, adding a new one`);
// Find the highest index number in existing options inputs
let maxIndex = 0;
for (let i = 0; i < this.inputs.length; i++) {
const input = this.inputs[i];
if (input.name === "options") {
continue; // Skip the base "options" input
} else if (input.name.startsWith("options ")) {
const match = input.name.match(/options (\d+)/);
if (match) {
const index = parseInt(match[1]) - 1;
maxIndex = Math.max(maxIndex, index);
}
}
}
const newName = maxIndex === 0 ? "options 2" : `options ${maxIndex + 2}`;
this.addInput(newName, "OPTIONS");
//debugLog(`Created new options input: ${newName}`);
this.setDirtyCanvas(true, true);
}
}
};
const optionsInputs = node.inputs.filter(input =>
input.name.startsWith("options")
);
const baseOptionsInput = optionsInputs.find(input => input.name === "options");
const hasOptionsWithIndex = optionsInputs.some(input => input.name !== "options");
// if (baseOptionsInput && !hasOptionsWithIndex) {
// debugLog(`Adding initial options 1 input to ${node.comfyClass}`);
// node.addInput("options 1", "OPTIONS");
// node.setDirtyCanvas(true, true);
// }
const originalOnConfigure = node.onConfigure;
node.onConfigure = function(info) {
if (originalOnConfigure) {
originalOnConfigure.call(this, info);
}
let hasEmptyOptions = false;
for (let i = 0; i < this.inputs.length; i++) {
const input = this.inputs[i];
if (input.name.startsWith("options") && input.link === null) {
hasEmptyOptions = true;
break;
}
}
if (!hasEmptyOptions && this.inputs.some(i => i.name.startsWith("options"))) {
let maxIndex = 0;
for (let i = 0; i < this.inputs.length; i++) {
const input = this.inputs[i];
if (input.name === "options") {
continue;
} else if (input.name.startsWith("options ")) {
const match = input.name.match(/options (\d+)/);
if (match) {
const index = parseInt(match[1]) - 1;
maxIndex = Math.max(maxIndex, index);
}
}
}
const newName = maxIndex === 0 ? "options 2" : `options ${maxIndex + 2}`;
this.addInput(newName, "OPTIONS");
}
};
function cleanupOptionsInputs(node) {
const optionsInputs = [];
for (let i = 0; i < node.inputs.length; i++) {
const input = node.inputs[i];
if (input.name.startsWith("options")) {
optionsInputs.push({
index: i,
name: input.name,
connected: input.link !== null,
isBase: input.name === "options"
});
}
}
const baseInput = optionsInputs.find(info => info.isBase);
const nonBaseInputs = optionsInputs.filter(info => !info.isBase);
let needsRenumbering = false;
if (baseInput && !baseInput.connected && nonBaseInputs.every(info => !info.connected)) {
nonBaseInputs.sort((a, b) => b.index - a.index);
for (const inputInfo of nonBaseInputs) {
//debugLog(`Removing unnecessary options input: ${inputInfo.name} (index ${inputInfo.index})`);
node.removeInput(inputInfo.index);
needsRenumbering = true;
}
node.setDirtyCanvas(true, true);
return;
}
const disconnectedInputs = nonBaseInputs.filter(info => !info.connected);
if (disconnectedInputs.length > 1) {
disconnectedInputs.sort((a, b) => b.index - a.index);
for (let i = 1; i < disconnectedInputs.length; i++) {
//debugLog(`Removing unnecessary options input: ${disconnectedInputs[i].name} (index ${disconnectedInputs[i].index})`);
node.removeInput(disconnectedInputs[i].index);
needsRenumbering = true;
}
}
const hasConnectedOptions = optionsInputs.some(info => info.connected);
const hasEmptyOptions = optionsInputs.some(info => !info.connected && !info.isBase);
if (hasConnectedOptions && !hasEmptyOptions) {
node.addInput("options temp", "OPTIONS");
//debugLog(`Added new empty options input`);
needsRenumbering = true;
}
if (needsRenumbering) {
renumberOptionsInputs(node);
node.setDirtyCanvas(true, true);
}
}
function renumberOptionsInputs(node) {
const optionsInfo = [];
for (let i = 0; i < node.inputs.length; i++) {
const input = node.inputs[i];
if (input.name.startsWith("options")) {
if (input.name === "options") {
continue;
}
optionsInfo.push({
index: i,
connected: input.link !== null,
name: input.name
});
}
}
optionsInfo.sort((a, b) => {
if (a.connected !== b.connected) {
return b.connected ? 1 : -1; // Connected inputs first
}
return a.index - b.index;
});
for (let i = 0; i < optionsInfo.length; i++) {
const inputInfo = optionsInfo[i];
const newName = `options ${i + 2}`;
if (inputInfo.name !== newName) {
//debugLog(`Renaming ${inputInfo.name} to ${newName}`);
node.inputs[inputInfo.index].name = newName;
}
}
}
}
================================================
FILE: web/js/conditioningToBase64.js
================================================
import { app } from "../../../scripts/app.js";
import { ComfyWidgets } from "../../../scripts/widgets.js";
// Displays input text on a node
app.registerExtension({
name: "res4lyf.ConditioningToBase64",
async beforeRegisterNodeDef(nodeType, nodeData, app) {
if (nodeData.name === "ConditioningToBase64") {
function populate(text) {
if (this.widgets) {
for (let i = 1; i < this.widgets.length; i++) {
this.widgets[i].onRemove?.();
}
this.widgets.length = 1;
}
const v = [...text];
if (!v[0]) {
v.shift();
}
for (const list of v) {
const w = ComfyWidgets["STRING"](this, "text2", ["STRING", { multiline: true }], app).widget;
w.inputEl.readOnly = true;
w.inputEl.style.opacity = 0.6;
w.value = list;
}
requestAnimationFrame(() => {
const sz = this.computeSize();
if (sz[0] < this.size[0]) {
sz[0] = this.size[0];
}
if (sz[1] < this.size[1]) {
sz[1] = this.size[1];
}
this.onResize?.(sz);
app.graph.setDirtyCanvas(true, false);
});
}
// When the node is executed we will be sent the input text, display this in the widget
const onExecuted = nodeType.prototype.onExecuted;
nodeType.prototype.onExecuted = function (message) {
onExecuted?.apply(this, arguments);
populate.call(this, message.text);
};
const onConfigure = nodeType.prototype.onConfigure;
nodeType.prototype.onConfigure = function () {
onConfigure?.apply(this, arguments);
if (this.widgets_values?.length) {
populate.call(this, this.widgets_values.slice(+this.widgets_values.length > 1));
}
};
}
},
});
================================================
FILE: web/js/res4lyf.default.json
================================================
{
"name": "RES4LYF",
"topClownDog": true,
"enableDebugLogs": false,
"displayCategory": true
}