Repository: ClownsharkBatwing/RES4LYF Branch: main Commit: 0dc91c00c4c3 Files: 134 Total size: 3.9 MB Directory structure: gitextract__f8xgulp/ ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── attention_masks.py ├── aura/ │ └── mmdit.py ├── beta/ │ ├── __init__.py │ ├── constants.py │ ├── deis_coefficients.py │ ├── noise_classes.py │ ├── phi_functions.py │ ├── rk_coefficients_beta.py │ ├── rk_guide_func_beta.py │ ├── rk_method_beta.py │ ├── rk_noise_sampler_beta.py │ ├── rk_sampler_beta.py │ ├── samplers.py │ └── samplers_extensions.py ├── chroma/ │ ├── layers.py │ ├── math.py │ └── model.py ├── conditioning.py ├── example_workflows/ │ ├── chroma regional antiblur.json │ ├── chroma txt2img.json │ ├── comparison ksampler vs csksampler chain workflows.json │ ├── flux faceswap sync pulid.json │ ├── flux faceswap sync.json │ ├── flux faceswap.json │ ├── flux inpaint area.json │ ├── flux inpaint bongmath.json │ ├── flux inpainting.json │ ├── flux regional antiblur.json │ ├── flux regional redux (2 zone).json │ ├── flux regional redux (3 zone, nested).json │ ├── flux regional redux (3 zone, overlapping).json │ ├── flux regional redux (3 zones).json │ ├── flux style antiblur.json │ ├── flux style transfer gguf.json │ ├── flux upscale thumbnail large multistage.json │ ├── flux upscale thumbnail large.json │ ├── flux upscale thumbnail widescreen.json │ ├── hidream guide data projection.json │ ├── hidream guide epsilon projection.json │ ├── hidream guide flow.json │ ├── hidream guide fully_pseudoimplicit.json │ ├── hidream guide lure.json │ ├── hidream guide pseudoimplicit.json │ ├── hidream hires fix.json │ ├── hidream regional 3 zones.json │ ├── hidream regional antiblur.json │ ├── hidream style antiblur.json │ ├── hidream style transfer txt2img.json │ ├── hidream style transfer v2.json │ ├── hidream style transfer.json │ ├── hidream txt2img.json │ ├── hidream unsampling data WF.json │ ├── hidream unsampling data.json │ ├── hidream unsampling pseudoimplicit.json │ ├── hidream unsampling.json │ ├── intro to clownsampling.json │ ├── sd35 medium unsampling data.json │ ├── sd35 medium unsampling.json │ ├── sdxl regional antiblur.json │ ├── sdxl style transfer.json │ ├── style transfer.json │ ├── ultracascade txt2img style transfer.json │ ├── ultracascade txt2img.json │ ├── wan img2vid 720p (fp8 fast).json │ ├── wan txt2img (fp8 fast).json │ └── wan vid2vid.json ├── flux/ │ ├── controlnet.py │ ├── layers.py │ ├── math.py │ ├── model.py │ └── redux.py ├── helper.py ├── helper_sigma_preview_image_preproc.py ├── hidream/ │ └── model.py ├── images.py ├── latent_images.py ├── latents.py ├── legacy/ │ ├── __init__.py │ ├── conditioning.py │ ├── constants.py │ ├── deis_coefficients.py │ ├── flux/ │ │ ├── controlnet.py │ │ ├── layers.py │ │ ├── math.py │ │ ├── model.py │ │ └── redux.py │ ├── helper.py │ ├── latents.py │ ├── legacy_sampler_rk.py │ ├── legacy_samplers.py │ ├── models.py │ ├── noise_classes.py │ ├── noise_sigmas_timesteps_scaling.py │ ├── phi_functions.py │ ├── rk_coefficients.py │ ├── rk_guide_func.py │ ├── rk_method.py │ ├── rk_sampler.py │ ├── samplers.py │ ├── samplers_extensions.py │ ├── samplers_tiled.py │ ├── sigmas.py │ └── tiling.py ├── lightricks/ │ ├── model.py │ ├── symmetric_patchifier.py │ └── vae/ │ ├── causal_conv3d.py │ ├── causal_video_autoencoder.py │ ├── conv_nd_factory.py │ ├── dual_conv3d.py │ └── pixel_norm.py ├── loaders.py ├── misc_scripts/ │ └── replace_metadata.py ├── models.py ├── nodes_latents.py ├── nodes_misc.py ├── nodes_precision.py ├── requirements.txt ├── res4lyf.py ├── rk_method_beta.py ├── samplers_extensions.py ├── sd/ │ ├── attention.py │ └── openaimodel.py ├── sd35/ │ └── mmdit.py ├── sigmas.py ├── style_transfer.py ├── wan/ │ ├── model.py │ └── vae.py └── web/ └── js/ ├── RES4LYF_dynamicWidgets.js ├── conditioningToBase64.js └── res4lyf.default.json ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ __pycache__/ .idea/ .vscode/ .tmp .cache tests/ /*.json *.config.json ================================================ FILE: LICENSE ================================================ The use of this software or any derivative work for the purpose of providing a commercial service, such as (but not limited to) an AI image generation service, is strictly prohibited without obtaining permission and/or a separate commercial license from the copyright holder. This includes any service that charges users directly or indirectly for access to this software's functionality, whether standalone or integrated into a larger product. GNU AFFERO GENERAL PUBLIC Version 3, 19 November 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software. A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public. The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version. An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU Affero General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Remote Network Interaction; Use with the GNU General Public License. Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a "Source" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements. You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see . ================================================ FILE: README.md ================================================ # SUPERIOR SAMPLING WITH RES4LYF: THE POWER OF BONGMATH RES_3M vs. Uni-PC (WAN). Typically only 20 steps are needed with RES samplers. Far more are needed with Uni-PC and other common samplers, and they never reach the same level of quality. ![res_3m_vs_unipc_1](https://github.com/user-attachments/assets/9321baf9-2d68-4fe8-9427-fcf0609bd02b) ![res_3m_vs_unipc_2](https://github.com/user-attachments/assets/d7ab48e4-51dd-4fa7-8622-160c8f9e33d6) # INSTALLATION If you are using a venv, you will need to first run from within your ComfyUI folder (that contains your "venv" folder): _Linux:_ source venv/bin/activate _Windows:_ venv\Scripts\activate _Then, "cd" into your "custom_nodes" folder and run the following commands:_ git clone https://github.com/ClownsharkBatwing/RES4LYF/ cd RES4LYF _If you are using a venv, run these commands:_ pip install -r requirements.txt _Alternatively, if you are using the portable version of ComfyUI you will need to replace "pip" with the path to your embedded pip executable. For example, on Windows:_ X:\path\to\your\comfy_portable_folder\python_embedded\Scripts\pip.exe install -r requirements.txt # IMPORTANT UPDATE INFO The previous versions will remain available but with "Legacy" prepended to their names. If you wish to use the sampler menu shown below, you will need to install https://github.com/rgthree/rgthree-comfy (which I highly recommend you have regardless). ![image](https://github.com/user-attachments/assets/b36360bb-a59e-4654-aed7-6b6f53673826) If these menus do not show up after restarting ComfyUI and refreshing the page (hit F5, not just "r") verify that these menus are enabled in the rgthree settings (click the gear in the bottom left of ComfyUI, select rgthree, and ensure "Auto Nest Subdirectories" is checked): ![image](https://github.com/user-attachments/assets/db46fc90-df1a-4d1c-b6ed-c44d26b8a9b3) # NEW VERSION DOCUMENTATION I have prepared a detailed explanation of many of the concepts of sampling with exmaples in this workflow. There's also many tips, explanations of parameters, and all of the most important nodes are laid out for you to see. Some new workflow-enhancing tricks like "chainsamplers" are demonstrated, and **regional AND temporal prompting** are explained (supporting Flux, HiDream, SD3.5, AuraFlow, and WAN - you can even change the conditioning on a frame-by-frame basis!). [[example_workflows/intro to clownsampling.json ]((https://github.com/ClownsharkBatwing/RES4LYF/blob/main/example_workflows/intro%20to%20clownsampling.json))](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/example_workflows/intro%20to%20clownsampling.json) ![intro to clownsampling](https://github.com/user-attachments/assets/40c23993-c70e-4a71-9207-4cee4b7e71e0) # STYLE TRANSFER Supported models: HiDream, Flux, Chroma, AuraFlow, SD1.5, SDXL, SD3.5, Stable Cascade, LTXV, and WAN. Also supported: Stable Cascade (and UltraPixel) which has an excellent understanding of style (https://github.com/ClownsharkBatwing/UltraCascade). Currently, best results are with HiDream or Chroma, or Flux with a style lora (Flux Dev is very lacking with style knowledge). Include some mention of the style you wish to use in the prompt. (Try with the guide off to confirm the prompt is not doing the heavy lifting!) ![image](https://github.com/user-attachments/assets/a62593fa-b104-4347-bf69-e1e50217ce2d) For example, the prompt for the below was simply "a gritty illustration of a japanese woman with traditional hair in traditional clothes". Mostly you just need to make clear whether it's supposed to be a photo or an illustration, etc. so that the conditioning isn't fighting the style guide (every model has its inherent biases). ![image](https://github.com/user-attachments/assets/e872e258-c786-4475-8369-c8487ee5ec72) **COMPOSITION GUIDE; OUTPUT; STYLE GUIDE** ![style example](https://github.com/user-attachments/assets/4970c6ea-d142-4e4e-967a-59ff93528840) ![image](https://github.com/user-attachments/assets/fb071885-48b8-4698-9288-63a2866cb67b) # KILL FLUX BLUR (and HiDream blur) **Consecutive seeds, no cherrypicking.** ![antiblur](https://github.com/user-attachments/assets/5bc0e1e3-82e1-4ccc-8d39-64a939815e57) # REGIONAL CONDITIONING Unlimited zones! Over 10 zones have been used in one image before. Currently supported models: HiDream, Flux, Chroma, SD3.5, SD1.5, SDXL, AuraFlow, and WAN. Masks can be drawn freely, or more traditional rigid ones may be used, such as in this example: ![image](https://github.com/user-attachments/assets/edfb076a-78e2-4077-b53f-3e8bab07040a) ![ComfyUI_16020_](https://github.com/user-attachments/assets/5f45cdcb-f879-43ca-bcf4-bcae60aa4bbc) ![ComfyUI_12157_](https://github.com/user-attachments/assets/b9e385d2-3359-4a13-99b9-4a7243863b0d) ![ComfyUI_12039_](https://github.com/user-attachments/assets/6d36ae62-ce8c-41e3-b52c-823e9c1b1d50) # TEMPORAL CONDITIONING Unlimited zones! Ability to change the prompt for each frame. Currently supported models: WAN. ![image](https://github.com/user-attachments/assets/743bc972-cfbf-45a8-8745-d6ca1a6b0bab) ![temporal conditioning 09580](https://github.com/user-attachments/assets/eef0e04c-d1b2-49b7-a1ca-f8cb651dd3a7) # VIDEO 2 VIDEO EDITING Viable with any video model, demo with WAN: ![wan vid2vid compressed](https://github.com/user-attachments/assets/431c30f7-339e-4b86-8d02-6180b09b15b2) # PREVIOUS VERSION NODE DOCUMENTATION At the heart of this repository is the "ClownsharKSampler", which was specifically designed to support both rectified flow and probability flow models. It features 69 different selectible samplers (44 explicit, 18 fully implicit, 7 diagonally implicit) all available in both ODE or SDE modes with 20 noise types, 9 noise scaling modes, and options for implicit Runge-Kutta sampling refinement steps. Several new explicit samplers are implemented, most notably RES_2M, RES_3S, and RES_5S. Additionally, img2img capabilities include both latent image guidance and unsampling/resampling (via new forms of rectified noise inversion). A particular emphasis of this project has been to facilitate modulating parameters vs. time, which can facilitate large gains in image quality from the sampling process. To this end, a wide variety of sigma, latent, and noise manipulation nodes are included. Much of this work remains experimental and is subject to further changes. # ClownSampler ![image](https://github.com/user-attachments/assets/f787ad74-0d95-4d8f-84b6-af4c4c1ac5e5) # SharkSampler ![image](https://github.com/user-attachments/assets/299c9285-b298-4452-b0dd-48ae425ce30a) # ClownsharKSampler ![image](https://github.com/user-attachments/assets/430fb77a-7353-4b40-acb6-cbd33392f7fc) This is an all-in-one sampling node designed for convenience without compromising on control or quality. There are several key sections to the parameters which will be explained below. ## INPUTS ![image](https://github.com/user-attachments/assets/e8fe825d-2fb1-4e93-874c-89fb73ba68f7) The only two mandatory inputs here are "model" and "latent_image". **POSITIVE and NEGATIVE:** If you connect nothing to either of these inputs, the node will automatically generate null conditioning. If you are unsampling, you actually don't need to hook up any conditioning at all (and will set CFG = 1.0). In most cases, merely using the positive conditioning will suffice, unless you really need to use a specific negative prompt. **SIGMAS:** If a sigmas scheduler node is connected to this input, it will override the scheduler and steps settings chosen within the node. ## NOISE SETTINGS ![image](https://github.com/user-attachments/assets/caaa41a4-5afa-4c3c-8fb2-003b9a6b2578) **NOISE_TYPE_INIT:** This sets the initial noise type applied to the latent image. **NOISE_TYPE_SDE:** This sets the noise type used during SDE sampling. Note that SDE sampling is identical to ODE sampling in most ways - the difference is that noise is added after each step. It's like a form of carefully controlled continuous noise injection. **NOISE_MODE_SDE:** This determines what method is used for scaling the amount of noise to be added based on the "eta" setting below. They are listed in order of strength of the effect. **ETA:** This controls how much noise is added after each step. Note that for most of the noise modes, anything equal to or greater than 1.0 will trigger internal scaling to prevent NaN errors. The exception is the noise mode "exp" which allows for settings far above 1.0. **NOISE_SEED:** Largely identical to the setting in KSampler. Set to -1 to have it increment the most recently used seed (by the workflow) by 1. **CONTROL_AFTER_GENERATE:** Self-explanatory. I recommend setting to "fixed" or "increment" (as you don't have to reload the workflow to regenerate something, you can just decement it by one). ## SAMPLER SETTINGS ![image](https://github.com/user-attachments/assets/d5ef0bef-7388-44f0-a119-220beec9883d) **SAMPLER_MODE:** In virtually all situations, use "standard". However, if you are unsampling, set to "unsample", and if you are resampling (the stage after unsampling), set to "resample". Both of these modes will disable noise addition within ComfyUI, which is essential for these methods to work properly. **SAMPLER_NAME:** This is used similarly to the KSampler setting. This selects the explicit sampler type. Note the use of numbers and letters at the end of each sampler name: "2m, 3m, 2s, 3s, 5s, etc." Samplers that end in "s" use substeps between each step. One ending with "2s" has two stages per step, therefore costs two model calls per step (Euler costs one - model calls are what determine inference time). "3s" would take three model calls per step, and therefore take three times as long to run as Euler. However, the increase in accuracy can be very dramatic, especially when using noise (SDE sampling). The "res" family of samplers are particularly notable (they are effectively refinements of the dpmpp family, with new, higher order, much more accurate versions implemented here). Samplers that end in "m" are "multistep" samplers, which instead of issuing new model calls for substeps, recycle previous steps as estimations for these substeps. They're less accurate, but all run at Euler speed (one model call per step). Sometimes this can be an advantage, as multistep samplers tend to converge more linearly toward a target image. This can be useful for img2img transformations, unsampling, or when using latent image guides. **IMPLICIT_SAMPLER_NAME:** This is very useful with SD3.5 Medium for improving coherence, reducing artifacts and mutations, etc. It may be difficult to use with a model like Flux unless you plan on setting up a queue of generations and walking away. It will use the explicit step type as a predictor for each of the implicit substeps, so if you choose a slow explicit sampler, you will be waiting a long time. Euler, res_2m, deis_2m, etc. will often suffice as a predictor for implicit sampling, though any sampler may be used. Try "res_5s" as your explicit sampler type, and "gauss-legendre_5s", if you wish to demonstrate your commitment to climate change (and image quality). Setting this to "none" has the same effect as setting implicit_steps = 0. ## SCHEDULER AND DENOISE SETTINGS ![image](https://github.com/user-attachments/assets/b89d3956-1734-4368-8bb4-429b9989cd4d) These are identical in most ways to the settings by the same name in KSampler. **SCHEDULER:** There is one extra sigma scheduler offered by default: "beta57" which is the beta schedule with modified parameters (alpha = 0.5, beta = 0.7). **IMPLICIT_STEPS:** This controls the number of implicit steps to run. Note that it will double, triple, etc. the runtime as you increase the stepcount. Typically, gains diminish quickly after 2-3 implicit steps. **DENOISE:** This is identical to the KSampler setting. Controls the amount of noise removed from the image. Note that with this method, the effect will change significantly depending on your choice of scheduler. **DENOISE_ALT:** Instead of splitting the sigma schedule like "denoise", this multiplies them. The results are different, but track more closely from one scheduler to another when using the same value. This can be particularly useful for img2img workflows. **CFG:** This is identical to the KSampler setting. Typically, you'll set this to 1.0 (to disable it) when using Flux, if you're using Flux guidance. However, the effect is quite nice when using dedistilled models if you use "CLIP Text Encode" without any Flux guidance, and set CFG to 3.0. If you've never quite understood CFG, you can think of it this way. Imagine you're walking down the street and see what looks like an enticing music festival in the distance (your positive conditioning). You're on the fence about attending, but then, suddenly, a horde of pickleshark cannibals come storming out of a nearby bar (your negative conditioning). Together, the two team up to drive you toward the music festival. That's CFG. ## SHIFT SETTINGS ![image](https://github.com/user-attachments/assets/e9a2e2d7-be5c-4b63-8647-275409600b56) These are present for convenience as they are used in virtually every workflow. **SHIFT:** This is the same as "shift" for the ModelSampling nodes for SD3.5, AuraFlow, etc., and is equivalent to "max_shift" for Flux. Set this value to -1 to disable setting shift (or max_shift) within the node. **BASE_SHIFT:** This is only used by Flux. Set this value to -1 to disable setting base_shift within the node. **SHIFT_SCALING:** This changes how the shift values are calculated. "exponential" is the default used by Flux, whereas "linear" is the default used by SD3.5 and AuraFlow. In most cases, "exponential" leads to better results, though "linear" has some niche uses. # Sampler and noise mode list ## Explicit samplers Bolded samplers are added as options to the sampler dropdown in ComyfUI (an ODE and SDE version for each). **res_2m** **res_2/3/5s** **deis_2/3/4m** ralston_2/3/4s dpmpp_2/3m dpmpp_sde_2s dpmpp_2/3s midpoint_2s heun_2/3s houwen-wray_3s kutta_3s ssprk3_3s rk38_4s rk4_4s dormand-prince_6s dormand-prince_13s bogacki-shampine_7s ddim euler ## Fully Implicit Samplers gauss-legendre_2/3/4/5s radau_(i/ii)a_2/3s lobatto_iii(a/b/c/d/star)_2/3s ## Diagonally Implicit Samplers kraaijevanger_spijker_2s qin_zhang_2s pareschi_russo_2s pareschi_russo_alt_2s crouzeix_2/3s irk_exp_diag_2s (features an exponential integrator) # PREVIOUS FLUX WORKFLOWS ## TXT2IMG: This uses my amateur cell phone lora, which is freely available (https://huggingface.co/ClownsharkBatwing/CSBW_Style/blob/main/amateurphotos_1_amateurcellphonephoto_recapt2.safetensors). It significantly reduces the plastic, blurred look of Flux Dev. ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20flux.png) ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20WF%20flux.png) ## INPAINTING: ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/inpainting%20flux.png) ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/inpainting%20WF%20flux.png) ## UNSAMPLING (Dual guides with masks): ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20dual%20guides%20masked%20flux.png) ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20dual%20guides%20masked%20WF%20flux.png) # PREVIOUS WORKFLOWS **THE FOLLOWING WORKFLOWS ARE FOR A PREVIOUS VERSION OF THE NODE.** These will still work! You will, however, need to manually delete and recreate the sampler and guide nodes and input the settings as they appear in the screenshots. The layout of the nodes has been changed slightly. To replicate their behavior precisely, add to the new extra_options box in ClownsharKSampler: truncate_conditioning=true (if that setting was used in the screenshot for the node). ![image](https://github.com/user-attachments/assets/a55ec484-1339-45a2-bcc4-76934f4648d4) **TXT2IMG Workflow:** ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20SD35M%20output.png) ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20SD35M.png) **TXT2IMG Workflow (Latent Image Guides):** ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20guided%20SD35M%20output.png) ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20guided%20SD35M.png) Input image: https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20guided%20SD35M%20input.png **TXT2IMG Workflow (Dual Guides with Masking):** ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20dual%20guides%20with%20mask%20SD35M%20output.png) ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20dual%20guides%20with%20mask%20SD35M.png) Input images and mask: https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20dual%20guides%20with%20mask%20SD35M%20input1.png https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20dual%20guides%20with%20mask%20SD35M%20input2.png https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/txt2img%20dual%20guides%20with%20mask%20SD35M%20mask.png **IMG2IMG Workflow (Unsampling):** ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20unsampling%20SD35L%20output.png) ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20unsampling%20SD35L.png) Input image: https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20unsampling%20SD35L%20input.png **IMG2IMG Workflow (Unsampling with SDXL):** ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20unsampling%20SDXL%20output.png) ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20unsampling%20SDXL.png) Input image: https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20unsampling%20SDXL%20input.png **IMG2IMG Workflow (Unsampling with latent image guide):** ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20guided%20unsampling%20SD35M%20output.png) ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20guided%20unsampling%20SD35M.png) Input image: https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20guided%20unsampling%20SD35M%20input.png **IMG2IMG Workflow (Unsampling with dual latent image guides and masking):** ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20dual%20guided%20masked%20unsampling%20SD35M%20output.png) ![image](https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20dual%20guided%20masked%20unsampling%20SD35M.png) Input images and mask: https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20dual%20guided%20masked%20unsampling%20SD35M%20input1.png https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20dual%20guided%20masked%20unsampling%20SD35M%20input2.png https://github.com/ClownsharkBatwing/RES4LYF/blob/main/workflows/img2img%20dual%20guided%20masked%20unsampling%20SD35M%20mask.png ================================================ FILE: __init__.py ================================================ import importlib import os from . import loaders from . import sigmas from . import conditioning from . import images from . import models from . import helper_sigma_preview_image_preproc from . import nodes_misc from . import nodes_latents from . import nodes_precision import torch from math import * from comfy.samplers import SchedulerHandler, SCHEDULER_HANDLERS, SCHEDULER_NAMES new_scheduler_name = "bong_tangent" if new_scheduler_name not in SCHEDULER_HANDLERS: bong_tangent_handler = SchedulerHandler(handler=sigmas.bong_tangent_scheduler, use_ms=True) SCHEDULER_HANDLERS[new_scheduler_name] = bong_tangent_handler SCHEDULER_NAMES.append(new_scheduler_name) from .res4lyf import RESplain #torch.use_deterministic_algorithms(True) #torch.backends.cudnn.deterministic = True #torch.backends.cudnn.benchmark = False res4lyf.init() discard_penultimate_sigma_samplers = set(( )) def add_samplers(): from comfy.samplers import KSampler, k_diffusion_sampling if hasattr(KSampler, "DISCARD_PENULTIMATE_SIGMA_SAMPLERS"): KSampler.DISCARD_PENULTIMATE_SIGMA_SAMPLERS |= discard_penultimate_sigma_samplers added = 0 for sampler in extra_samplers: #getattr(self, "sample_{}".format(extra_samplers)) if sampler not in KSampler.SAMPLERS: try: idx = KSampler.SAMPLERS.index("uni_pc_bh2") # *should* be last item in samplers list KSampler.SAMPLERS.insert(idx+1, sampler) # add custom samplers (presumably) to end of list setattr(k_diffusion_sampling, "sample_{}".format(sampler), extra_samplers[sampler]) added += 1 except ValueError as _err: pass if added > 0: import importlib importlib.reload(k_diffusion_sampling) extra_samplers = {} extra_samplers = dict(reversed(extra_samplers.items())) NODE_CLASS_MAPPINGS = { "FluxLoader" : loaders.FluxLoader, "SD35Loader" : loaders.SD35Loader, "ClownModelLoader" : loaders.RES4LYFModelLoader, "TextBox1" : nodes_misc.TextBox1, "TextBox2" : nodes_misc.TextBox2, "TextBox3" : nodes_misc.TextBox3, "TextConcatenate" : nodes_misc.TextConcatenate, "TextBoxConcatenate" : nodes_misc.TextBoxConcatenate, "TextLoadFile" : nodes_misc.TextLoadFile, "TextShuffle" : nodes_misc.TextShuffle, "TextShuffleAndTruncate" : nodes_misc.TextShuffleAndTruncate, "TextTruncateTokens" : nodes_misc.TextTruncateTokens, "SeedGenerator" : nodes_misc.SeedGenerator, "ClownRegionalConditioning" : conditioning.ClownRegionalConditioning, "ClownRegionalConditionings" : conditioning.ClownRegionalConditionings, "ClownRegionalConditioning2" : conditioning.ClownRegionalConditioning2, "ClownRegionalConditioning3" : conditioning.ClownRegionalConditioning3, "ClownRegionalConditioning_AB" : conditioning.ClownRegionalConditioning_AB, "ClownRegionalConditioning_ABC" : conditioning.ClownRegionalConditioning_ABC, "CLIPTextEncodeFluxUnguided" : conditioning.CLIPTextEncodeFluxUnguided, "ConditioningOrthoCollin" : conditioning.ConditioningOrthoCollin, "ConditioningAverageScheduler" : conditioning.ConditioningAverageScheduler, "ConditioningMultiply" : conditioning.ConditioningMultiply, "ConditioningAdd" : conditioning.ConditioningAdd, "Conditioning Recast FP64" : conditioning.Conditioning_Recast64, "StableCascade_StageB_Conditioning64" : conditioning.StableCascade_StageB_Conditioning64, "ConditioningZeroAndTruncate" : conditioning.ConditioningZeroAndTruncate, "ConditioningTruncate" : conditioning.ConditioningTruncate, "StyleModelApplyStyle" : conditioning.StyleModelApplyStyle, "CrossAttn_EraseReplace_HiDream" : conditioning.CrossAttn_EraseReplace_HiDream, "ConditioningDownsample (T5)" : conditioning.ConditioningDownsampleT5, "ConditioningToBase64" : conditioning.ConditioningToBase64, "Base64ToConditioning" : conditioning.Base64ToConditioning, "ConditioningBatch4" : conditioning.ConditioningBatch4, "ConditioningBatch8" : conditioning.ConditioningBatch8, "TemporalMaskGenerator" : conditioning.TemporalMaskGenerator, "TemporalSplitAttnMask" : conditioning.TemporalSplitAttnMask, "TemporalSplitAttnMask (Midframe)" : conditioning.TemporalSplitAttnMask_Midframe, "TemporalCrossAttnMask" : conditioning.TemporalCrossAttnMask, "Set Precision" : nodes_precision.set_precision, "Set Precision Universal" : nodes_precision.set_precision_universal, "Set Precision Advanced" : nodes_precision.set_precision_advanced, "LatentUpscaleWithVAE" : helper_sigma_preview_image_preproc.LatentUpscaleWithVAE, "LatentNoised" : nodes_latents.LatentNoised, "LatentNoiseList" : nodes_latents.LatentNoiseList, "AdvancedNoise" : nodes_latents.AdvancedNoise, "LatentNoiseBatch_perlin" : nodes_latents.LatentNoiseBatch_perlin, "LatentNoiseBatch_fractal" : nodes_latents.LatentNoiseBatch_fractal, "LatentNoiseBatch_gaussian" : nodes_latents.LatentNoiseBatch_gaussian, "LatentNoiseBatch_gaussian_channels" : nodes_latents.LatentNoiseBatch_gaussian_channels, "LatentBatch_channels" : nodes_latents.LatentBatch_channels, "LatentBatch_channels_16" : nodes_latents.LatentBatch_channels_16, "Latent Get Channel Means" : nodes_latents.latent_get_channel_means, "Latent Match Channelwise" : nodes_latents.latent_channelwise_match, "Latent to RawX" : nodes_latents.latent_to_raw_x, "Latent Clear State Info" : nodes_latents.latent_clear_state_info, "Latent Replace State Info" : nodes_latents.latent_replace_state_info, "Latent Display State Info" : nodes_latents.latent_display_state_info, "Latent Transfer State Info" : nodes_latents.latent_transfer_state_info, "Latent TrimVideo State Info" : nodes_latents.TrimVideoLatent_state_info, "Latent to Cuda" : nodes_latents.latent_to_cuda, "Latent Batcher" : nodes_latents.latent_batch, "Latent Normalize Channels" : nodes_latents.latent_normalize_channels, "Latent Channels From To" : nodes_latents.latent_mean_channels_from_to, "LatentPhaseMagnitude" : nodes_latents.LatentPhaseMagnitude, "LatentPhaseMagnitudeMultiply" : nodes_latents.LatentPhaseMagnitudeMultiply, "LatentPhaseMagnitudeOffset" : nodes_latents.LatentPhaseMagnitudeOffset, "LatentPhaseMagnitudePower" : nodes_latents.LatentPhaseMagnitudePower, "MaskFloatToBoolean" : nodes_latents.MaskFloatToBoolean, "MaskToggle" : nodes_latents.MaskToggle, "MaskEdge" : nodes_latents.MaskEdge, #"MaskEdgeRatio" : nodes_latents.MaskEdgeRatio, "Frames Masks Uninterpolate" : nodes_latents.Frames_Masks_Uninterpolate, "Frames Masks ZeroOut" : nodes_latents.Frames_Masks_ZeroOut, "Frames Latent ReverseOrder" : nodes_latents.Frames_Latent_ReverseOrder, "EmptyLatentImage64" : nodes_latents.EmptyLatentImage64, "EmptyLatentImageCustom" : nodes_latents.EmptyLatentImageCustom, "StableCascade_StageC_VAEEncode_Exact": nodes_latents.StableCascade_StageC_VAEEncode_Exact, "PrepForUnsampling" : helper_sigma_preview_image_preproc.VAEEncodeAdvanced, "VAEEncodeAdvanced" : helper_sigma_preview_image_preproc.VAEEncodeAdvanced, "VAEStyleTransferLatent" : helper_sigma_preview_image_preproc.VAEStyleTransferLatent, "SigmasPreview" : helper_sigma_preview_image_preproc.SigmasPreview, "SigmasSchedulePreview" : helper_sigma_preview_image_preproc.SigmasSchedulePreview, "TorchCompileModelFluxAdv" : models.TorchCompileModelFluxAdvanced, "TorchCompileModelAura" : models.TorchCompileModelAura, "TorchCompileModelSD35" : models.TorchCompileModelSD35, "TorchCompileModels" : models.TorchCompileModels, "ClownpileModelWanVideo" : models.ClownpileModelWanVideo, "ModelTimestepPatcher" : models.ModelSamplingAdvanced, "ModelSamplingAdvanced" : models.ModelSamplingAdvanced, "ModelSamplingAdvancedResolution" : models.ModelSamplingAdvancedResolution, "FluxGuidanceDisable" : models.FluxGuidanceDisable, "ReWanPatcher" : models.ReWanPatcher, "ReFluxPatcher" : models.ReFluxPatcher, "ReChromaPatcher" : models.ReChromaPatcher, "ReSD35Patcher" : models.ReSD35Patcher, "ReAuraPatcher" : models.ReAuraPatcher, "ReLTXVPatcher" : models.ReLTXVPatcher, "ReHiDreamPatcher" : models.ReHiDreamPatcher, "ReSDPatcher" : models.ReSDPatcher, "ReReduxPatcher" : models.ReReduxPatcher, "ReWanPatcherAdvanced" : models.ReWanPatcherAdvanced, "ReFluxPatcherAdvanced" : models.ReFluxPatcherAdvanced, "ReChromaPatcherAdvanced" : models.ReChromaPatcherAdvanced, "ReSD35PatcherAdvanced" : models.ReSD35PatcherAdvanced, "ReAuraPatcherAdvanced" : models.ReAuraPatcherAdvanced, "ReLTXVPatcherAdvanced" : models.ReLTXVPatcherAdvanced, "ReHiDreamPatcherAdvanced" : models.ReHiDreamPatcherAdvanced, "LayerPatcher" : loaders.LayerPatcher, "FluxOrthoCFGPatcher" : models.FluxOrthoCFGPatcher, "UNetSave" : models.UNetSave, "Sigmas Recast" : sigmas.set_precision_sigmas, "Sigmas Noise Inversion" : sigmas.sigmas_noise_inversion, "Sigmas From Text" : sigmas.sigmas_from_text, "Sigmas Variance Floor" : sigmas.sigmas_variance_floor, "Sigmas Truncate" : sigmas.sigmas_truncate, "Sigmas Start" : sigmas.sigmas_start, "Sigmas Split" : sigmas.sigmas_split, "Sigmas Split Value" : sigmas.sigmas_split_value, "Sigmas Concat" : sigmas.sigmas_concatenate, "Sigmas Pad" : sigmas.sigmas_pad, "Sigmas Unpad" : sigmas.sigmas_unpad, "Sigmas SetFloor" : sigmas.sigmas_set_floor, "Sigmas DeleteBelowFloor" : sigmas.sigmas_delete_below_floor, "Sigmas DeleteDuplicates" : sigmas.sigmas_delete_consecutive_duplicates, "Sigmas Cleanup" : sigmas.sigmas_cleanup, "Sigmas Mult" : sigmas.sigmas_mult, "Sigmas Modulus" : sigmas.sigmas_modulus, "Sigmas Quotient" : sigmas.sigmas_quotient, "Sigmas Add" : sigmas.sigmas_add, "Sigmas Power" : sigmas.sigmas_power, "Sigmas Abs" : sigmas.sigmas_abs, "Sigmas2 Mult" : sigmas.sigmas2_mult, "Sigmas2 Add" : sigmas.sigmas2_add, "Sigmas Rescale" : sigmas.sigmas_rescale, "Sigmas Count" : sigmas.sigmas_count, "Sigmas Resample" : sigmas.sigmas_interpolate, "Sigmas Math1" : sigmas.sigmas_math1, "Sigmas Math3" : sigmas.sigmas_math3, "Sigmas Iteration Karras" : sigmas.sigmas_iteration_karras, "Sigmas Iteration Polyexp" : sigmas.sigmas_iteration_polyexp, # New Sigma Nodes "Sigmas Lerp" : sigmas.sigmas_lerp, "Sigmas InvLerp" : sigmas.sigmas_invlerp, "Sigmas ArcSine" : sigmas.sigmas_arcsine, "Sigmas LinearSine" : sigmas.sigmas_linearsine, "Sigmas Append" : sigmas.sigmas_append, "Sigmas ArcCosine" : sigmas.sigmas_arccosine, "Sigmas ArcTangent" : sigmas.sigmas_arctangent, "Sigmas CrossProduct" : sigmas.sigmas_crossproduct, "Sigmas DotProduct" : sigmas.sigmas_dotproduct, "Sigmas Fmod" : sigmas.sigmas_fmod, "Sigmas Frac" : sigmas.sigmas_frac, "Sigmas If" : sigmas.sigmas_if, "Sigmas Logarithm2" : sigmas.sigmas_logarithm2, "Sigmas SmoothStep" : sigmas.sigmas_smoothstep, "Sigmas SquareRoot" : sigmas.sigmas_squareroot, "Sigmas TimeStep" : sigmas.sigmas_timestep, "Sigmas Sigmoid" : sigmas.sigmas_sigmoid, "Sigmas Easing" : sigmas.sigmas_easing, "Sigmas Hyperbolic" : sigmas.sigmas_hyperbolic, "Sigmas Gaussian" : sigmas.sigmas_gaussian, "Sigmas Percentile" : sigmas.sigmas_percentile, "Sigmas KernelSmooth" : sigmas.sigmas_kernel_smooth, "Sigmas QuantileNorm" : sigmas.sigmas_quantile_norm, "Sigmas AdaptiveStep" : sigmas.sigmas_adaptive_step, "Sigmas Chaos" : sigmas.sigmas_chaos, "Sigmas ReactionDiffusion" : sigmas.sigmas_reaction_diffusion, "Sigmas Attractor" : sigmas.sigmas_attractor, "Sigmas CatmullRom" : sigmas.sigmas_catmull_rom, "Sigmas LambertW" : sigmas.sigmas_lambert_w, "Sigmas ZetaEta" : sigmas.sigmas_zeta_eta, "Sigmas GammaBeta" : sigmas.sigmas_gamma_beta, "Sigmas GaussianCDF" : sigmas.sigmas_gaussian_cdf, "Sigmas StepwiseMultirate" : sigmas.sigmas_stepwise_multirate, "Sigmas HarmonicDecay" : sigmas.sigmas_harmonic_decay, "Sigmas AdaptiveNoiseFloor" : sigmas.sigmas_adaptive_noise_floor, "Sigmas CollatzIteration" : sigmas.sigmas_collatz_iteration, "Sigmas ConwaySequence" : sigmas.sigmas_conway_sequence, "Sigmas GilbreathSequence" : sigmas.sigmas_gilbreath_sequence, "Sigmas CNFInverse" : sigmas.sigmas_cnf_inverse, "Sigmas RiemannianFlow" : sigmas.sigmas_riemannian_flow, "Sigmas LangevinDynamics" : sigmas.sigmas_langevin_dynamics, "Sigmas PersistentHomology" : sigmas.sigmas_persistent_homology, "Sigmas NormalizingFlows" : sigmas.sigmas_normalizing_flows, "ClownScheduler" : sigmas.ClownScheduler, # for modulating parameters "Tan Scheduler" : sigmas.tan_scheduler, "Tan Scheduler 2" : sigmas.tan_scheduler_2stage, "Tan Scheduler 2 Simple" : sigmas.tan_scheduler_2stage_simple, "Constant Scheduler" : sigmas.constant_scheduler, "Linear Quadratic Advanced" : sigmas.linear_quadratic_advanced, "SetImageSizeWithScale" : nodes_misc.SetImageSizeWithScale, "SetImageSize" : nodes_misc.SetImageSize, "Mask Bounding Box Aspect Ratio" : images.MaskBoundingBoxAspectRatio, "Image Get Color Swatches" : images.Image_Get_Color_Swatches, "Masks From Color Swatches" : images.Masks_From_Color_Swatches, "Masks From Colors" : images.Masks_From_Colors, "Masks Unpack 4" : images.Masks_Unpack4, "Masks Unpack 8" : images.Masks_Unpack8, "Masks Unpack 16" : images.Masks_Unpack16, "Image Sharpen FS" : images.ImageSharpenFS, "Image Channels LAB" : images.Image_Channels_LAB, "Image Median Blur" : images.ImageMedianBlur, "Image Gaussian Blur" : images.ImageGaussianBlur, "Image Pair Split" : images.Image_Pair_Split, "Image Crop Location Exact" : images.Image_Crop_Location_Exact, "Film Grain" : images.Film_Grain, "Frequency Separation Linear Light" : images.Frequency_Separation_Linear_Light, "Frequency Separation Hard Light" : images.Frequency_Separation_Hard_Light, "Frequency Separation Hard Light LAB" : images.Frequency_Separation_Hard_Light_LAB, "Frame Select" : images.Frame_Select, "Frames Slice" : images.Frames_Slice, "Frames Concat" : images.Frames_Concat, "Mask Sketch" : images.MaskSketch, "Image Grain Add" : images.Image_Grain_Add, "Image Repeat Tile To Size" : images.ImageRepeatTileToSize, "Frames Concat Masks" : nodes_latents.Frames_Concat_Masks, "Frame Select Latent" : nodes_latents.Frame_Select_Latent, "Frames Slice Latent" : nodes_latents.Frames_Slice_Latent, "Frames Concat Latent" : nodes_latents.Frames_Concat_Latent, "Frame Select Latent Raw" : nodes_latents.Frame_Select_Latent_Raw, "Frames Slice Latent Raw" : nodes_latents.Frames_Slice_Latent_Raw, "Frames Concat Latent Raw" : nodes_latents.Frames_Concat_Latent_Raw, } NODE_DISPLAY_NAME_MAPPINGS = { } WEB_DIRECTORY = "./web/js" flags = { "zampler" : False, "beta_samplers" : False, "legacy_samplers": False, } file_path = os.path.join(os.path.dirname(__file__), "zampler_test_code.txt") if os.path.exists(file_path): try: from .zampler import add_zamplers NODE_CLASS_MAPPINGS, extra_samplers = add_zamplers(NODE_CLASS_MAPPINGS, extra_samplers) flags["zampler"] = True RESplain("Importing zampler.") except ImportError: try: import importlib for module_name in ["RES4LYF.zampler", "res4lyf.zampler"]: try: zampler_module = importlib.import_module(module_name) add_zamplers = zampler_module.add_zamplers NODE_CLASS_MAPPINGS, extra_samplers = add_zamplers(NODE_CLASS_MAPPINGS, extra_samplers) flags["zampler"] = True RESplain(f"Importing zampler via {module_name}.") break except ImportError: continue else: raise ImportError("Zampler module not found in any path") except Exception as e: print(f"(RES4LYF) Failed to import zamplers: {e}") try: from .beta import add_beta NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers = add_beta(NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers) flags["beta_samplers"] = True RESplain("Importing beta samplers.") except ImportError: try: import importlib for module_name in ["RES4LYF.beta", "res4lyf.beta"]: try: beta_module = importlib.import_module(module_name) add_beta = beta_module.add_beta NODE_CLASS_MAPPINGS, extra_samplers = add_beta(NODE_CLASS_MAPPINGS, extra_samplers) flags["beta_samplers"] = True RESplain(f"Importing beta samplers via {module_name}.") break except ImportError: continue else: raise ImportError("Beta module not found in any path") except Exception as e: print(f"(RES4LYF) Failed to import beta samplers: {e}") try: from .legacy import add_legacy NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers = add_legacy(NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers) flags["legacy_samplers"] = True RESplain("Importing legacy samplers.") except ImportError: try: import importlib for module_name in ["RES4LYF.legacy", "res4lyf.legacy"]: try: legacy_module = importlib.import_module(module_name) add_legacy = legacy_module.add_legacy NODE_CLASS_MAPPINGS, extra_samplers = add_legacy(NODE_CLASS_MAPPINGS, extra_samplers) flags["legacy_samplers"] = True RESplain(f"Importing legacy samplers via {module_name}.") break except ImportError: continue else: raise ImportError("Legacy module not found in any path") except Exception as e: print(f"(RES4LYF) Failed to import legacy samplers: {e}") add_samplers() __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS", "WEB_DIRECTORY"] ================================================ FILE: attention_masks.py ================================================ import torch import torch.nn.functional as F from torch import Tensor from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar from einops import rearrange import copy import base64 import comfy.supported_models import node_helpers import gc from .sigmas import get_sigmas from .helper import initialize_or_scale, precision_tool, get_res4lyf_scheduler_list from .latents import get_orthogonal, get_collinear, get_edge_mask, checkerboard_variable from .res4lyf import RESplain from .beta.constants import MAX_STEPS def fp_not(tensor): return 1 - tensor def fp_or(tensor1, tensor2): return torch.maximum(tensor1, tensor2) def fp_and(tensor1, tensor2): return torch.minimum(tensor1, tensor2) def fp_and2(tensor1, tensor2): triu = torch.triu(torch.ones_like(tensor1)) tril = torch.tril(torch.ones_like(tensor2)) triu.diagonal().fill_(0.0) tril.diagonal().fill_(0.0) new_tensor = tensor1 * triu + tensor2 * tril new_tensor.diagonal().fill_(1.0) return new_tensor class CoreAttnMask: def __init__(self, mask, mask_type=None, start_sigma=None, end_sigma=None, start_block=0, end_block=-1, idle_device='cpu', work_device='cuda'): self.mask = mask.to(idle_device) self.start_sigma = start_sigma self.end_sigma = end_sigma self.start_block = start_block self.end_block = end_block self.work_device = work_device self.idle_device = idle_device self.mask_type = mask_type def set_sigma_range(self, start_sigma, end_sigma): self.start_sigma = start_sigma self.end_sigma = end_sigma def set_block_range(self, start_block, end_block): self.start_block = start_block self.end_block = end_block def __call__(self, weight=1.0, mask_type=None, transformer_options=None, block_idx=0): """ Return mask if block_idx is in range, sigma passed via transformer_options is in range, else return None. If no range is specified, return mask. """ if block_idx < self.start_block: return None if block_idx > self.end_block and self.end_block > 0: return None mask_type = self.mask_type if mask_type is None else mask_type if transformer_options is None: return self.mask.to(self.work_device) * weight if mask_type.startswith("gradient") else self.mask.to(self.work_device) > 0 sigma = transformer_options['sigmas'][0].to(self.start_sigma.device) if self.start_sigma is not None and self.end_sigma is not None: if self.start_sigma >= sigma > self.end_sigma: return self.mask.to(self.work_device) * weight if mask_type.startswith("gradient") else self.mask.to(self.work_device) > 0 else: return self.mask.to(self.work_device) * weight if mask_type.startswith("gradient") else self.mask.to(self.work_device) > 0 return None class BaseAttentionMask: def __init__(self, mask_type="gradient", edge_width=0, edge_width_list=None, use_self_attn_mask_list=None, dtype=torch.float16): self.t = 1 self.img_len = 0 self.text_len = 0 self.text_off = 0 self.h = 0 self.w = 0 self.text_register_tokens = 0 self.context_lens = [] self.context_lens_list = [] self.masks = [] self.num_regions = 0 self.attn_mask = None self.mask_type = mask_type self.edge_width = edge_width self.edge_width_list = edge_width_list self.use_self_attn_mask_list = use_self_attn_mask_list if mask_type == "gradient": self.dtype = dtype else: self.dtype = torch.bool def set_latent(self, latent): if latent.ndim == 4: self.b, self.c, self.h, self.w = latent.shape elif latent.ndim == 5: self.b, self.c, self.t, self.h, self.w = latent.shape #if not isinstance(self.model_config, comfy.supported_models.Stable_Cascade_C): self.h //= 2 # 16x16 PE patch_size = 2 1024x1024 rgb -> 128x128 16ch latent -> 64x64 img self.w //= 2 self.img_len = self.h * self.w def add_region(self, context, mask): self.context_lens.append(context.shape[-2]) self.masks .append(mask) self.text_len = sum(self.context_lens) self.text_off = self.text_len self.num_regions += 1 def add_region_sizes(self, context_size_list, mask): self.context_lens .append(sum(context_size_list)) self.context_lens_list.append( context_size_list) self.masks .append(mask) self.text_len = sum(sum(sublist) for sublist in self.context_lens_list) self.text_off = self.text_len self.num_regions += 1 def add_regions(self, contexts, masks): for context, mask in zip(contexts, masks): self.add_region(context, mask) def clear_regions(self): self.context_lens = [] self.masks = [] self.text_len = 0 self.text_off = 0 self.num_regions = 0 def generate(self): print("Initializing ergosphere.") def get(self, **kwargs): return self.attn_mask(**kwargs) def attn_mask_recast(self, dtype): if self.attn_mask.mask.dtype != dtype: self.attn_mask.mask = self.attn_mask.mask.to(dtype) class FullAttentionMask(BaseAttentionMask): def generate(self, mask_type=None, dtype=None): mask_type = self.mask_type if mask_type is None else mask_type dtype = self.dtype if dtype is None else dtype text_off = self.text_off text_len = self.text_len img_len = self.img_len t = self.t h = self.h w = self.w if self.edge_width_list is None: self.edge_width_list = [self.edge_width] * self.num_regions attn_mask = torch.zeros((text_off+t*img_len, text_len+t*img_len), dtype=dtype) #cross_self_mask = torch.zeros((t*img_len, t*img_len), dtype=torch.float16) prev_len = 0 for context_len, mask in zip(self.context_lens, self.masks): img2txt_mask = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, context_len) img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) curr_len = prev_len + context_len attn_mask[prev_len:curr_len, prev_len:curr_len] = 1.0 # self TXT 2 TXT attn_mask[prev_len:curr_len, text_len: ] = img2txt_mask.transpose(-1, -2).repeat(1,t) # cross TXT 2 regional IMG # txt2img_mask attn_mask[text_off: , prev_len:curr_len] = img2txt_mask.repeat(t,1) # cross regional IMG 2 TXT attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], fp_and(img2txt_mask_sq.repeat(t,t), img2txt_mask_sq.transpose(-1, -2).repeat(t,t))) # img2txt_mask_sq, txt2img_mask_sq #cross_self_mask[:,:] = fp_or(cross_self_mask, fp_and(img2txt_mask_sq.repeat(t,t), (1-img2txt_mask_sq).transpose(-1, -2).repeat(t,t))) prev_len = curr_len if self.mask_type.endswith("_masked") or self.mask_type.endswith("_A") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_A,unmasked"): img2txt_mask_sq = F.interpolate(self.masks[0].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) if self.mask_type.endswith("_unmasked") or self.mask_type.endswith("_C") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_B,unmasked") or self.mask_type.endswith("_A,unmasked"): img2txt_mask_sq = F.interpolate(self.masks[-1].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) if self.mask_type.endswith("_B") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_B,unmasked"): img2txt_mask_sq = F.interpolate(self.masks[1].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) if self.edge_width > 0: edge_mask = torch.zeros_like(self.masks[0]) for mask in self.masks: edge_mask = fp_or(edge_mask, get_edge_mask(mask, dilation=self.edge_width)) img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) elif self.edge_width_list is not None: edge_mask = torch.zeros_like(self.masks[0]) for mask, edge_width in zip(self.masks, self.edge_width_list): if edge_width != 0: edge_mask_new = get_edge_mask(mask, dilation=abs(edge_width)) edge_mask = fp_or(edge_mask, fp_and(edge_mask_new, mask)) #fp_and here is to ensure edge_mask only grows into the region for current mask img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) if self.use_self_attn_mask_list is not None: for mask, use_self_attn_mask in zip(self.masks, self.use_self_attn_mask_list): if not use_self_attn_mask: img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) #cmask = torch.zeros((text_len+t*img_len), dtype=torch.bfloat16) #cmask[text_len:] = cross_self_mask #cmask[text_len:] + 0.25 * cross_self_mask #self.cross_self_mask = CoreAttnMask(cmask[None,None,...,None], mask_type=mask_type) # shape: 1, 1, txt_len+img_len, 1 #self.cross_self_mask = CoreAttnMask(cross_self_mask[None,None,...,None], mask_type=mask_type) # shape: 1, 1, txt_len+img_len, 1 #self.cross_self_mask = CoreAttnMask(cross_self_mask[None,None,...,None], mask_type=mask_type) # shape: 1, 1, txt_len+img_len, 1 """ cross_self_mask = F.interpolate(self.masks[0].unsqueeze(0).to(torch.bfloat16), (h, w), mode='nearest-exact').to(torch.bfloat16).flatten()#.unsqueeze(1) # .repeat(1, img_len) edge_mask = get_edge_mask(self.masks[0], dilation=80) edge_mask = F.interpolate(edge_mask.unsqueeze(0).to(torch.bfloat16), (h, w), mode='nearest-exact').flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = F.interpolate((1-self.masks[0]).unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask = attn_mask.to(torch.bfloat16) edge_mask = edge_mask.to(torch.bfloat16)""" self.cross_self_mask = CoreAttnMask(torch.zeros_like(img2txt_mask_sq).to(torch.bfloat16).squeeze(), mask_type=mask_type) self.attn_mask = CoreAttnMask(attn_mask, mask_type=mask_type) class FullAttentionMaskHiDream(BaseAttentionMask): def generate(self, mask_type=None, dtype=None): mask_type = self.mask_type if mask_type is None else mask_type dtype = self.dtype if dtype is None else dtype text_off = self.text_off text_len = self.text_len img_len = self.img_len t = self.t h = self.h w = self.w if self.edge_width_list is None: self.edge_width_list = [self.edge_width] * self.num_regions attn_mask = torch.zeros((text_off+t*img_len, text_len+t*img_len), dtype=dtype) reg_num = 0 prev_len = 0 for context_len, mask in zip(self.context_lens, self.masks): img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) curr_len = prev_len + context_len attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], fp_and(img2txt_mask_sq.repeat(t,t), img2txt_mask_sq.transpose(-1,-2).repeat(t,t))) # img2txt_mask_sq, txt2img_mask_sq prev_len = curr_len reg_num += 1 self.self_attn_mask = attn_mask[text_off:, text_len:].clone() if self.mask_type.endswith("_masked") or self.mask_type.endswith("_A") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_A,unmasked"): img2txt_mask_sq = F.interpolate(self.masks[0].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) if self.mask_type.endswith("_unmasked") or self.mask_type.endswith("_C") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_B,unmasked") or self.mask_type.endswith("_A,unmasked"): img2txt_mask_sq = F.interpolate(self.masks[-1].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) if self.mask_type.endswith("_B") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_B,unmasked"): img2txt_mask_sq = F.interpolate(self.masks[1].unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) if self.edge_width > 0: edge_mask = torch.zeros_like(self.masks[0]) for mask in self.masks: edge_mask_new = get_edge_mask(mask, dilation=abs(self.edge_width)) edge_mask = fp_or(edge_mask, edge_mask_new) #edge_mask = fp_or(edge_mask, get_edge_mask(mask, dilation=self.edge_width)) img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) elif self.edge_width < 0: # edge masks using cross-attn too edge_mask = torch.zeros_like(self.masks[0]) for mask in self.masks: edge_mask = fp_or(edge_mask, get_edge_mask(mask, dilation=abs(self.edge_width))) img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) elif self.edge_width_list is not None: edge_mask = torch.zeros_like(self.masks[0]) for mask, edge_width in zip(self.masks, self.edge_width_list): if edge_width != 0: edge_mask_new = get_edge_mask(mask, dilation=abs(edge_width)) edge_mask = fp_or(edge_mask, fp_and(edge_mask_new, mask)) #fp_and here is to ensure edge_mask only grows into the region for current mask img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) if self.use_self_attn_mask_list is not None: for mask, use_self_attn_mask in zip(self.masks, self.use_self_attn_mask_list): if not use_self_attn_mask: img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) text_len_t5 = sum(sublist[0] for sublist in self.context_lens_list) img2txt_mask_t5 = torch.empty((img_len, text_len_t5)).to(attn_mask) offset_t5_start = 0 reg_num_slice = 0 for context_len, mask_slice, edge_width in zip(self.context_lens, self.masks, self.edge_width_list): if self.edge_width < 0: # edge masks using cross-attn too mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(self.edge_width))) if edge_width < 0: # edge masks using cross-attn too mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(edge_width))) slice_len = self.context_lens_list[reg_num_slice][0] offset_t5_end = offset_t5_start + slice_len img2txt_mask_slice = F.interpolate(mask_slice.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, slice_len) img2txt_mask_t5[:, offset_t5_start:offset_t5_end] = img2txt_mask_slice offset_t5_start = offset_t5_end reg_num_slice += 1 text_len_llama = sum(sublist[1] for sublist in self.context_lens_list) img2txt_mask_llama = torch.empty((img_len, text_len_llama)).to(attn_mask) offset_llama_start = 0 reg_num_slice = 0 for context_len, mask_slice, edge_width in zip(self.context_lens, self.masks, self.edge_width_list): if self.edge_width < 0: # edge masks using cross-attn too mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(self.edge_width))) if edge_width < 0: # edge masks using cross-attn too mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(edge_width))) slice_len = self.context_lens_list[reg_num_slice][1] offset_llama_end = offset_llama_start + slice_len img2txt_mask_slice = F.interpolate(mask_slice.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, slice_len) img2txt_mask_llama[:, offset_llama_start:offset_llama_end] = img2txt_mask_slice offset_llama_start = offset_llama_end reg_num_slice += 1 img2txt_mask = torch.cat([img2txt_mask_t5, img2txt_mask_llama.repeat(1,2)], dim=-1) attn_mask[:-text_off , :-text_len ] = attn_mask[text_off:, text_len:].clone() attn_mask[:-text_off , -text_len:] = img2txt_mask attn_mask[ -text_off:, :-text_len ] = img2txt_mask.transpose(-2,-1) attn_mask[img_len:,img_len:] = 1.0 # txt -> txt "self-cross" attn is critical with hidream in most cases. checkerboard strategies are generally poo # mask cross attention between text embeds flat = [v for group in zip(*self.context_lens_list) for v in group] checkvar = checkerboard_variable(flat) attn_mask[img_len:, img_len:] = checkvar self.attn_mask = CoreAttnMask(attn_mask, mask_type=mask_type) #flat = [v for group in zip(*self.context_lens_list) for v in group] def gen_edge_mask(self, block_idx): mask_type = self.mask_type dtype = self.dtype text_off = self.text_off text_len = self.text_len img_len = self.img_len t = self.t h = self.h w = self.w if self.edge_width_list is None: return self.attn_mask.mask else: #attn_mask = self.attn_mask.mask.clone() attn_mask = torch.zeros_like(self.attn_mask.mask) attn_mask[text_off:, text_len:] = self.self_attn_mask.clone() edge_mask = torch.zeros_like(self.masks[0]) for mask, edge_width in zip(self.masks, self.edge_width_list): #edge_width *= (block_idx/48) edge_width *= torch.rand(1).item() edge_width = int(edge_width) if edge_width != 0: #edge_width *= (block_idx/48) #edge_width = int(edge_width) edge_mask_new = get_edge_mask(mask, dilation=abs(edge_width)) edge_mask = fp_or(edge_mask, fp_and(edge_mask_new, mask)) #fp_and here is to ensure edge_mask only grows into the region for current mask img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) if self.use_self_attn_mask_list is not None: for mask, use_self_attn_mask in zip(self.masks, self.use_self_attn_mask_list): if not use_self_attn_mask: img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, img_len) attn_mask[text_off:, text_len:] = fp_or(attn_mask[text_off:, text_len:], img2txt_mask_sq) text_len_t5 = sum(sublist[0] for sublist in self.context_lens_list) img2txt_mask_t5 = torch.empty((img_len, text_len_t5)).to(attn_mask) offset_t5_start = 0 reg_num_slice = 0 for context_len, mask_slice, edge_width in zip(self.context_lens, self.masks, self.edge_width_list): if self.edge_width < 0: # edge masks using cross-attn too mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(self.edge_width))) if edge_width < 0: # edge masks using cross-attn too mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(edge_width))) slice_len = self.context_lens_list[reg_num_slice][0] offset_t5_end = offset_t5_start + slice_len img2txt_mask_slice = F.interpolate(mask_slice.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, slice_len) img2txt_mask_t5[:, offset_t5_start:offset_t5_end] = img2txt_mask_slice offset_t5_start = offset_t5_end reg_num_slice += 1 text_len_llama = sum(sublist[1] for sublist in self.context_lens_list) img2txt_mask_llama = torch.empty((img_len, text_len_llama)).to(attn_mask) offset_llama_start = 0 reg_num_slice = 0 for context_len, mask_slice, edge_width in zip(self.context_lens, self.masks, self.edge_width_list): if self.edge_width < 0: # edge masks using cross-attn too mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(self.edge_width))) if edge_width < 0: # edge masks using cross-attn too mask_slice = fp_or(mask_slice, get_edge_mask(mask_slice, dilation=abs(edge_width))) slice_len = self.context_lens_list[reg_num_slice][1] offset_llama_end = offset_llama_start + slice_len img2txt_mask_slice = F.interpolate(mask_slice.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, slice_len) img2txt_mask_llama[:, offset_llama_start:offset_llama_end] = img2txt_mask_slice offset_llama_start = offset_llama_end reg_num_slice += 1 img2txt_mask = torch.cat([img2txt_mask_t5, img2txt_mask_llama.repeat(1,2)], dim=-1) attn_mask[:-text_off , :-text_len ] = attn_mask[text_off:, text_len:].clone() attn_mask[:-text_off , -text_len:] = img2txt_mask attn_mask[ -text_off:, :-text_len ] = img2txt_mask.transpose(-2,-1) attn_mask[img_len:,img_len:] = 1.0 # txt -> txt "self-cross" attn is critical with hidream in most cases. checkerboard strategies are generally poo # mask cross attention between text embeds flat = [v for group in zip(*self.context_lens_list) for v in group] checkvar = checkerboard_variable(flat) attn_mask[img_len:, img_len:] = checkvar return attn_mask.to('cuda') class RegionalContext: def __init__(self, idle_device='cpu', work_device='cuda'): self.context = None self.clip_fea = None self.llama3 = None self.context_list = [] self.clip_fea_list = [] self.clip_pooled_list = [] self.llama3_list = [] self.t5_list = [] self.pooled_output = None self.idle_device = idle_device self.work_device = work_device def add_region(self, context, pooled_output=None, clip_fea=None): if self.context is not None: self.context = torch.cat([self.context, context], dim=1) else: self.context = context self.context_list.append(context) if pooled_output is not None: self.clip_pooled_list.append(pooled_output) if clip_fea is not None: if self.clip_fea is not None: self.clip_fea = torch.cat([self.clip_fea, clip_fea], dim=1) else: self.clip_fea = clip_fea self.clip_fea_list.append(clip_fea) def add_region_clip_fea(self, clip_fea): if self.clip_fea is not None: self.clip_fea = torch.cat([self.clip_fea, clip_fea], dim=1) else: self.clip_fea = clip_fea self.clip_fea_list.append(clip_fea) def add_region_llama3(self, llama3): if self.llama3 is not None: self.llama3 = torch.cat([self.llama3, llama3], dim=-2) # base shape 1,32,128,4096 else: self.llama3 = llama3 def add_region_hidream(self, t5, llama3): self.t5_list .append(t5) self.llama3_list.append(llama3) def clear_regions(self): if self.context is not None: del self.context self.context = None if self.clip_fea is not None: del self.clip_fea self.clip_fea = None if self.llama3 is not None: del self.llama3 self.llama3 = None del self.t5_list del self.llama3_list self.t5_list = [] self.llama3_list = [] def get(self): return self.context.to(self.work_device) def get_clip_fea(self): if self.clip_fea is not None: return self.clip_fea.to(self.work_device) else: return None def get_llama3(self): if self.llama3 is not None: return self.llama3.to(self.work_device) else: return None class CrossAttentionMask(BaseAttentionMask): def generate(self, mask_type=None, dtype=None): mask_type = self.mask_type if mask_type is None else mask_type dtype = self.dtype if dtype is None else dtype text_off = self.text_off text_len = self.text_len img_len = self.img_len t = self.t h = self.h w = self.w cross_attn_mask = torch.zeros((t * img_len, text_len), dtype=dtype) prev_len = 0 for context_len, mask in zip(self.context_lens, self.masks): cross_mask, self_mask = None, None if mask.ndim == 6: mask.squeeze_(0) if mask.ndim == 3: t_mask = mask.shape[0] elif mask.ndim == 4: if mask.shape[0] > 1: cross_mask = mask[0] if cross_mask.shape[-3] > self.t: cross_mask = cross_mask[:self.t,...] elif cross_mask.shape[-3] < self.t: cross_mask = F.pad(cross_mask.permute(1,2,0), [0,self.t-cross_mask.shape[-3]], value=0).permute(2,0,1) t_mask = self.t else: t_mask = mask.shape[-3] mask.squeeze_(0) elif mask.ndim == 5: t_mask = mask.shape[-3] else: t_mask = 1 mask.unsqueeze_(0) if cross_mask is not None: img2txt_mask = F.interpolate(cross_mask.unsqueeze(0).unsqueeze(0).to(torch.float16), (t_mask, h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1) else: img2txt_mask = F.interpolate( mask.unsqueeze(0).unsqueeze(0).to(torch.float16), (t_mask, h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1) if t_mask == 1: # ...why only if == 1? img2txt_mask = img2txt_mask.repeat(1, context_len) curr_len = prev_len + context_len if t_mask == 1: cross_attn_mask[:, prev_len:curr_len] = img2txt_mask.repeat(t,1) else: cross_attn_mask[:, prev_len:curr_len] = img2txt_mask prev_len = curr_len self.attn_mask = CoreAttnMask(cross_attn_mask, mask_type=mask_type) class SplitAttentionMask(BaseAttentionMask): def generate(self, mask_type=None, dtype=None): mask_type = self.mask_type if mask_type is None else mask_type dtype = self.dtype if dtype is None else dtype text_off = self.text_off text_len = self.text_len img_len = self.img_len t = self.t h = self.h w = self.w if self.edge_width_list is None: self.edge_width_list = [self.edge_width] * self.num_regions cross_attn_mask = torch.zeros((t * img_len, text_len), dtype=dtype) self_attn_mask = torch.zeros((t * img_len, t * img_len), dtype=dtype) prev_len = 0 self_masks = [] for context_len, mask in zip(self.context_lens, self.masks): cross_mask, self_mask = None, None if mask.ndim == 6: mask.squeeze_(0) if mask.ndim == 3: t_mask = mask.shape[0] elif mask.ndim == 4: if mask.shape[0] > 1: cross_mask = mask[0] if cross_mask.shape[-3] > self.t: cross_mask = cross_mask[:self.t,...] elif cross_mask.shape[-3] < self.t: cross_mask = F.pad(cross_mask.permute(1,2,0), [0,self.t-cross_mask.shape[-3]], value=0).permute(2,0,1) self_mask = mask[1] if self_mask.shape[-3] > self.t: self_mask = self_mask[:self.t,...] elif self_mask.shape[-3] < self.t: self_mask = F.pad(self_mask.permute(1,2,0), [0,self.t-self_mask.shape[-3]], value=0).permute(2,0,1) t_mask = self.t else: t_mask = mask.shape[-3] mask.squeeze_(0) elif mask.ndim == 5: t_mask = mask.shape[-3] else: t_mask = 1 mask.unsqueeze_(0) if cross_mask is not None: img2txt_mask = F.interpolate(cross_mask.unsqueeze(0).unsqueeze(0).to(torch.float16), (t_mask, h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1) else: img2txt_mask = F.interpolate( mask.unsqueeze(0).unsqueeze(0).to(torch.float16), (t_mask, h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1) if t_mask == 1: # ...why only if == 1? img2txt_mask = img2txt_mask.repeat(1, context_len) curr_len = prev_len + context_len if t_mask == 1: cross_attn_mask[:, prev_len:curr_len] = img2txt_mask.repeat(t,1) else: cross_attn_mask[:, prev_len:curr_len] = img2txt_mask if self_mask is not None: img2txt_mask_sq = F.interpolate(self_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, t_mask * img_len) else: img2txt_mask_sq = F.interpolate( mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, t_mask * img_len) self_masks.append(img2txt_mask_sq) if t_mask > 1: self_attn_mask = fp_or(self_attn_mask, fp_and(img2txt_mask_sq, img2txt_mask_sq.transpose(-1,-2))) else: self_attn_mask = fp_or(self_attn_mask, fp_and(img2txt_mask_sq.repeat(t,t), img2txt_mask_sq.transpose(-1,-2)).repeat(t,t)) prev_len = curr_len if self.mask_type.endswith("_masked") or self.mask_type.endswith("_A") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_A,unmasked"): self_attn_mask = fp_or(self_attn_mask, self_masks[0]) if self.mask_type.endswith("_unmasked") or self.mask_type.endswith("_C") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_AC") or self.mask_type.endswith("_B,unmasked") or self.mask_type.endswith("_A,unmasked"): self_attn_mask = fp_or(self_attn_mask, self_masks[-1]) if self.mask_type.endswith("_B") or self.mask_type.endswith("_AB") or self.mask_type.endswith("_BC") or self.mask_type.endswith("_B,unmasked"): self_attn_mask = fp_or(self_attn_mask, self_masks[1]) if self.edge_width > 0: edge_mask = torch.zeros_like(self.masks[0]) for mask in self.masks: edge_mask_new = get_edge_mask(mask, dilation=abs(self.edge_width)) edge_mask = fp_or(edge_mask, edge_mask_new) #edge_mask = fp_or(edge_mask, get_edge_mask(mask, dilation=self.edge_width)) img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, t_mask * img_len) self_attn_mask = fp_or(self_attn_mask, img2txt_mask_sq) elif self.edge_width_list is not None: edge_mask = torch.zeros_like(self.masks[0]) for mask, edge_width in zip(self.masks, self.edge_width_list): if edge_width != 0: edge_mask_new = get_edge_mask(mask, dilation=abs(edge_width)) edge_mask = fp_or(edge_mask, fp_and(edge_mask_new, mask)) #fp_and here is to ensure edge_mask only grows into the region for current mask img2txt_mask_sq = F.interpolate(edge_mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, t_mask * img_len) self_attn_mask = fp_or(self_attn_mask, img2txt_mask_sq) if self.use_self_attn_mask_list is not None: for mask, use_self_attn_mask in zip(self.masks, self.use_self_attn_mask_list): if not use_self_attn_mask: img2txt_mask_sq = F.interpolate(mask.unsqueeze(0).to(torch.float16), (h, w), mode='nearest-exact').to(dtype).flatten().unsqueeze(1).repeat(1, t_mask * img_len) self_attn_mask = fp_or(self_attn_mask, img2txt_mask_sq) attn_mask = torch.cat([cross_attn_mask, self_attn_mask], dim=1) self.attn_mask = CoreAttnMask(attn_mask, mask_type=mask_type) ================================================ FILE: aura/mmdit.py ================================================ #AuraFlow MMDiT #Originally written by the AuraFlow Authors import math import torch import torch.nn as nn import torch.nn.functional as F #from comfy.ldm.modules.attention import optimized_attention from comfy.ldm.modules.attention import attention_pytorch import comfy.ops import comfy.ldm.common_dit from ..helper import ExtraOptions from typing import Dict, Optional, Tuple, List from ..latents import slerp_tensor, interpolate_spd, tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d from ..style_transfer import apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch from einops import rearrange def modulate(x, shift, scale): return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1) def find_multiple(n: int, k: int) -> int: if n % k == 0: return n return n + k - (n % k) class MLP(nn.Module): # not executed directly with ReAura? def __init__(self, dim, hidden_dim=None, dtype=None, device=None, operations=None) -> None: super().__init__() if hidden_dim is None: hidden_dim = 4 * dim n_hidden = int(2 * hidden_dim / 3) n_hidden = find_multiple(n_hidden, 256) self.c_fc1 = operations.Linear(dim, n_hidden, bias=False, dtype=dtype, device=device) self.c_fc2 = operations.Linear(dim, n_hidden, bias=False, dtype=dtype, device=device) self.c_proj = operations.Linear(n_hidden, dim, bias=False, dtype=dtype, device=device) #@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") def forward(self, x: torch.Tensor) -> torch.Tensor: x = F.silu(self.c_fc1(x)) * self.c_fc2(x) x = self.c_proj(x) return x class MultiHeadLayerNorm(nn.Module): def __init__(self, hidden_size=None, eps=1e-5, dtype=None, device=None): # Copy pasta from https://github.com/huggingface/transformers/blob/e5f71ecaae50ea476d1e12351003790273c4b2ed/src/transformers/models/cohere/modeling_cohere.py#L78 super().__init__() self.weight = nn.Parameter(torch.empty(hidden_size, dtype=dtype, device=device)) self.variance_epsilon = eps #@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") def forward(self, hidden_states): input_dtype = hidden_states.dtype hidden_states = hidden_states.to(torch.float32) mean = hidden_states.mean(-1, keepdim=True) variance = (hidden_states - mean).pow(2).mean(-1, keepdim=True) hidden_states = (hidden_states - mean) * torch.rsqrt( variance + self.variance_epsilon ) hidden_states = self.weight.to(torch.float32) * hidden_states return hidden_states.to(input_dtype) class ReSingleAttention(nn.Module): def __init__(self, dim, n_heads, mh_qknorm=False, dtype=None, device=None, operations=None): super().__init__() self.n_heads = n_heads self.head_dim = dim // n_heads # this is for cond self.w1q = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) self.w1k = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) self.w1v = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) self.w1o = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) self.q_norm1 = ( MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device) if mh_qknorm else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device) ) self.k_norm1 = ( MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device) if mh_qknorm else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device) ) #@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") # c = 1,4552,3072 #operations.Linear = torch.nn.Linear with recast def forward(self, c, mask=None): bsz, seqlen1, _ = c.shape q, k, v = self.w1q(c), self.w1k(c), self.w1v(c) q = q.view(bsz, seqlen1, self.n_heads, self.head_dim) k = k.view(bsz, seqlen1, self.n_heads, self.head_dim) v = v.view(bsz, seqlen1, self.n_heads, self.head_dim) q, k = self.q_norm1(q), self.k_norm1(k) output = attention_pytorch(q.permute(0, 2, 1, 3), k.permute(0, 2, 1, 3), v.permute(0, 2, 1, 3), self.n_heads, skip_reshape=True, mask=mask) c = self.w1o(output) return c class ReDoubleAttention(nn.Module): def __init__(self, dim, n_heads, mh_qknorm=False, dtype=None, device=None, operations=None): super().__init__() self.n_heads = n_heads self.head_dim = dim // n_heads # this is for cond 1 (one) not l (L) self.w1q = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) self.w1k = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) self.w1v = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) self.w1o = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) # this is for x self.w2q = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) self.w2k = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) self.w2v = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) self.w2o = operations.Linear(dim, dim, bias=False, dtype=dtype, device=device) self.q_norm1 = ( MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device) if mh_qknorm else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device) ) self.k_norm1 = ( MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device) if mh_qknorm else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device) ) self.q_norm2 = ( MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device) if mh_qknorm else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device) ) self.k_norm2 = ( MultiHeadLayerNorm((self.n_heads, self.head_dim), dtype=dtype, device=device) if mh_qknorm else operations.LayerNorm(self.head_dim, elementwise_affine=False, dtype=dtype, device=device) ) #@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") # c.shape 1,264,3072 x.shape 1,4032,3072 def forward(self, c, x, mask=None): bsz, seqlen1, _ = c.shape bsz, seqlen2, _ = x.shape cq, ck, cv = self.w1q(c), self.w1k(c), self.w1v(c) cq = cq.view(bsz, seqlen1, self.n_heads, self.head_dim) ck = ck.view(bsz, seqlen1, self.n_heads, self.head_dim) cv = cv.view(bsz, seqlen1, self.n_heads, self.head_dim) cq, ck = self.q_norm1(cq), self.k_norm1(ck) xq, xk, xv = self.w2q(x), self.w2k(x), self.w2v(x) xq = xq.view(bsz, seqlen2, self.n_heads, self.head_dim) xk = xk.view(bsz, seqlen2, self.n_heads, self.head_dim) xv = xv.view(bsz, seqlen2, self.n_heads, self.head_dim) xq, xk = self.q_norm2(xq), self.k_norm2(xk) # concat all q,k,v.shape 1,4299,12,256 cq 1,267,12,256 xq 1,4032,12,256 self.n_heads 12 q, k, v = ( torch.cat([cq, xq], dim=1), torch.cat([ck, xk], dim=1), torch.cat([cv, xv], dim=1), ) # attn mask would be 4299,4299 if mask is not None: pass output = attention_pytorch(q.permute(0, 2, 1, 3), k.permute(0, 2, 1, 3), v.permute(0, 2, 1, 3), self.n_heads, skip_reshape=True, mask=mask) c, x = output.split([seqlen1, seqlen2], dim=1) c = self.w1o(c) x = self.w2o(x) return c, x class ReMMDiTBlock(nn.Module): def __init__(self, dim, heads=8, global_conddim=1024, is_last=False, dtype=None, device=None, operations=None): super().__init__() self.normC1 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device) self.normC2 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device) if not is_last: self.mlpC = MLP(dim, hidden_dim=dim * 4, dtype=dtype, device=device, operations=operations) self.modC = nn.Sequential( nn.SiLU(), operations.Linear(global_conddim, 6 * dim, bias=False, dtype=dtype, device=device), ) else: self.modC = nn.Sequential( nn.SiLU(), operations.Linear(global_conddim, 2 * dim, bias=False, dtype=dtype, device=device), ) self.normX1 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device) self.normX2 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device) self.mlpX = MLP(dim, hidden_dim=dim * 4, dtype=dtype, device=device, operations=operations) self.modX = nn.Sequential( nn.SiLU(), operations.Linear(global_conddim, 6 * dim, bias=False, dtype=dtype, device=device), ) self.attn = ReDoubleAttention(dim, heads, dtype=dtype, device=device, operations=operations) self.is_last = is_last #@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") # MAIN BLOCK def forward(self, c, x, global_cond, mask=None, **kwargs): cres, xres = c, x cshift_msa, cscale_msa, cgate_msa, cshift_mlp, cscale_mlp, cgate_mlp = ( self.modC(global_cond).chunk(6, dim=1) ) c = modulate(self.normC1(c), cshift_msa, cscale_msa) # xpath xshift_msa, xscale_msa, xgate_msa, xshift_mlp, xscale_mlp, xgate_mlp = ( self.modX(global_cond).chunk(6, dim=1) ) x = modulate(self.normX1(x), xshift_msa, xscale_msa) # attention c.shape 1,520,3072 x.shape 1,6144,3072 c, x = self.attn(c, x, mask=mask) c = self.normC2(cres + cgate_msa.unsqueeze(1) * c) c = cgate_mlp.unsqueeze(1) * self.mlpC(modulate(c, cshift_mlp, cscale_mlp)) c = cres + c x = self.normX2(xres + xgate_msa.unsqueeze(1) * x) x = xgate_mlp.unsqueeze(1) * self.mlpX(modulate(x, xshift_mlp, xscale_mlp)) x = xres + x return c, x class ReDiTBlock(nn.Module): # like MMDiTBlock, but it only has X def __init__(self, dim, heads=8, global_conddim=1024, dtype=None, device=None, operations=None): super().__init__() self.norm1 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device) self.norm2 = operations.LayerNorm(dim, elementwise_affine=False, dtype=dtype, device=device) self.modCX = nn.Sequential( nn.SiLU(), operations.Linear(global_conddim, 6 * dim, bias=False, dtype=dtype, device=device), ) self.attn = ReSingleAttention(dim, heads, dtype=dtype, device=device, operations=operations) self.mlp = MLP(dim, hidden_dim=dim * 4, dtype=dtype, device=device, operations=operations) #@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") # cx.shape 1,6664,3072 global_cond.shape 1,3072 mlpout.shape 1,6664,3072 float16 def forward(self, cx, global_cond, mask=None, **kwargs): cxres = cx shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.modCX( global_cond ).chunk(6, dim=1) cx = modulate(self.norm1(cx), shift_msa, scale_msa) cx = self.attn(cx, mask=mask) cx = self.norm2(cxres + gate_msa.unsqueeze(1) * cx) mlpout = self.mlp(modulate(cx, shift_mlp, scale_mlp)) cx = gate_mlp.unsqueeze(1) * mlpout cx = cxres + cx # residual connection return cx class TimestepEmbedder(nn.Module): def __init__(self, hidden_size, frequency_embedding_size=256, dtype=None, device=None, operations=None): super().__init__() self.mlp = nn.Sequential( operations.Linear(frequency_embedding_size, hidden_size, dtype=dtype, device=device), nn.SiLU(), operations.Linear(hidden_size, hidden_size, dtype=dtype, device=device), ) self.frequency_embedding_size = frequency_embedding_size @staticmethod def timestep_embedding(t, dim, max_period=10000): half = dim // 2 freqs = 1000 * torch.exp( -math.log(max_period) * torch.arange(start=0, end=half) / half ).to(t.device) args = t[:, None] * freqs[None] embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) if dim % 2: embedding = torch.cat( [embedding, torch.zeros_like(embedding[:, :1])], dim=-1 ) return embedding #@torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") def forward(self, t, dtype): t_freq = self.timestep_embedding(t, self.frequency_embedding_size).to(dtype) t_emb = self.mlp(t_freq) return t_emb class ReMMDiT(nn.Module): def __init__( self, in_channels=4, out_channels=4, patch_size=2, dim=3072, n_layers=36, n_double_layers=4, n_heads=12, global_conddim=3072, cond_seq_dim=2048, max_seq=32 * 32, device=None, dtype=None, operations=None, ): super().__init__() self.dtype = dtype self.t_embedder = TimestepEmbedder(global_conddim, dtype=dtype, device=device, operations=operations) self.cond_seq_linear = operations.Linear( cond_seq_dim, dim, bias=False, dtype=dtype, device=device ) # linear for something like text sequence. self.init_x_linear = operations.Linear( patch_size * patch_size * in_channels, dim, dtype=dtype, device=device ) # init linear for patchified image. self.positional_encoding = nn.Parameter(torch.empty(1, max_seq, dim, dtype=dtype, device=device)) self.register_tokens = nn.Parameter(torch.empty(1, 8, dim, dtype=dtype, device=device)) self.double_layers = nn.ModuleList([]) self.single_layers = nn.ModuleList([]) for idx in range(n_double_layers): self.double_layers.append( ReMMDiTBlock(dim, n_heads, global_conddim, is_last=(idx == n_layers - 1), dtype=dtype, device=device, operations=operations) ) for idx in range(n_double_layers, n_layers): self.single_layers.append( ReDiTBlock(dim, n_heads, global_conddim, dtype=dtype, device=device, operations=operations) ) self.final_linear = operations.Linear( dim, patch_size * patch_size * out_channels, bias=False, dtype=dtype, device=device ) self.modF = nn.Sequential( nn.SiLU(), operations.Linear(global_conddim, 2 * dim, bias=False, dtype=dtype, device=device), ) self.out_channels = out_channels self.patch_size = patch_size self.n_double_layers = n_double_layers self.n_layers = n_layers self.h_max = round(max_seq**0.5) self.w_max = round(max_seq**0.5) @torch.no_grad() def extend_pe(self, init_dim=(16, 16), target_dim=(64, 64)): # extend pe pe_data = self.positional_encoding.data.squeeze(0)[: init_dim[0] * init_dim[1]] pe_as_2d = pe_data.view(init_dim[0], init_dim[1], -1).permute(2, 0, 1) # now we need to extend this to target_dim. for this we will use interpolation. # we will use torch.nn.functional.interpolate pe_as_2d = F.interpolate( pe_as_2d.unsqueeze(0), size=target_dim, mode="bilinear" ) pe_new = pe_as_2d.squeeze(0).permute(1, 2, 0).flatten(0, 1) self.positional_encoding.data = pe_new.unsqueeze(0).contiguous() self.h_max, self.w_max = target_dim def pe_selection_index_based_on_dim(self, h, w): h_p, w_p = h // self.patch_size, w // self.patch_size original_pe_indexes = torch.arange(self.positional_encoding.shape[1]) original_pe_indexes = original_pe_indexes.view(self.h_max, self.w_max) starth = self.h_max // 2 - h_p // 2 endh = starth + h_p startw = self.w_max // 2 - w_p // 2 endw = startw + w_p original_pe_indexes = original_pe_indexes[ starth:endh, startw:endw ] return original_pe_indexes.flatten() def unpatchify(self, x, h, w): c = self.out_channels p = self.patch_size x = x.reshape(shape=(x.shape[0], h, w, p, p, c)) x = torch.einsum("nhwpqc->nchpwq", x) imgs = x.reshape(shape=(x.shape[0], c, h * p, w * p)) return imgs def patchify(self, x): B, C, H, W = x.size() x = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size)) x = x.view( B, C, (H + 1) // self.patch_size, self.patch_size, (W + 1) // self.patch_size, self.patch_size, ) x = x.permute(0, 2, 4, 1, 3, 5).flatten(-3).flatten(1, 2) return x def apply_pos_embeds(self, x, h, w): h = (h + 1) // self.patch_size w = (w + 1) // self.patch_size max_dim = max(h, w) cur_dim = self.h_max pos_encoding = comfy.ops.cast_to_input(self.positional_encoding.reshape(1, cur_dim, cur_dim, -1), x) if max_dim > cur_dim: pos_encoding = F.interpolate(pos_encoding.movedim(-1, 1), (max_dim, max_dim), mode="bilinear").movedim(1, -1) cur_dim = max_dim from_h = (cur_dim - h) // 2 from_w = (cur_dim - w) // 2 pos_encoding = pos_encoding[:,from_h:from_h+h,from_w:from_w+w] return x + pos_encoding.reshape(1, -1, self.positional_encoding.shape[-1]) def forward(self, x, timestep, context, transformer_options={}, **kwargs): x_orig = x.clone() context_orig = context.clone() SIGMA = timestep[0].unsqueeze(0) #/ 1000 EO = transformer_options.get("ExtraOptions", ExtraOptions("")) if EO is not None: EO.mute = True y0_style_pos = transformer_options.get("y0_style_pos") y0_style_neg = transformer_options.get("y0_style_neg") y0_style_pos_weight = transformer_options.get("y0_style_pos_weight", 0.0) y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight", 0.0) y0_style_pos_synweight *= y0_style_pos_weight y0_style_neg_weight = transformer_options.get("y0_style_neg_weight", 0.0) y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight", 0.0) y0_style_neg_synweight *= y0_style_neg_weight out_list = [] for i in range(len(transformer_options['cond_or_uncond'])): UNCOND = transformer_options['cond_or_uncond'][i] == 1 x = x_orig[i][None,...].clone() context = context_orig.clone() patches_replace = transformer_options.get("patches_replace", {}) # patchify x, add PE b, c, h, w = x.shape h_len = ((h + (self.patch_size // 2)) // self.patch_size) # h_len 96 w_len = ((w + (self.patch_size // 2)) // self.patch_size) # w_len 96 x = self.init_x_linear(self.patchify(x)) # B, T_x, D x = self.apply_pos_embeds(x, h, w) if UNCOND: transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0) transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0) transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig') AttnMask = transformer_options.get('AttnMask', None) RegContext = transformer_options.get('RegContext', None) if AttnMask is not None and transformer_options['reg_cond_weight'] > 0.0: AttnMask.attn_mask_recast(x.dtype) context_tmp = RegContext.get().to(context.dtype) #context_tmp = 0 * context_tmp.clone() # If it's not a perfect factor, repeat and slice: A = context[i][None,...].clone() B = context_tmp context_tmp = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :] else: context_tmp = context[i][None,...].clone() elif UNCOND == False: transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0) transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0) transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig') AttnMask = transformer_options.get('AttnMask', None) RegContext = transformer_options.get('RegContext', None) if AttnMask is not None and transformer_options['reg_cond_weight'] > 0.0: AttnMask.attn_mask_recast(x.dtype) context_tmp = RegContext.get().to(context.dtype) else: context_tmp = context[i][None,...].clone() if context_tmp is None: context_tmp = context[i][None,...].clone() # process conditions for MMDiT Blocks #c_seq = context # B, T_c, D_c c_seq = context_tmp # B, T_c, D_c t = timestep c = self.cond_seq_linear(c_seq) # B, T_c, D # 1,256,2048 -> c = torch.cat([comfy.ops.cast_to_input(self.register_tokens, c).repeat(c.size(0), 1, 1), c], dim=1) #1,256,3072 -> 1,264,3072 global_cond = self.t_embedder(t, x.dtype) # B, D global_cond = global_cond[i][None] weight = transformer_options['reg_cond_weight'] if 'reg_cond_weight' in transformer_options else 0.0 floor = transformer_options['reg_cond_floor'] if 'reg_cond_floor' in transformer_options else 0.0 floor = min(floor, weight) reg_cond_mask_expanded = transformer_options.get('reg_cond_mask_expanded') reg_cond_mask_expanded = reg_cond_mask_expanded.to(img.dtype).to(img.device) if reg_cond_mask_expanded is not None else None reg_cond_mask = None AttnMask = transformer_options.get('AttnMask') mask = None if AttnMask is not None and weight > 0: mask = AttnMask.get(weight=weight) #mask_obj[0](transformer_options, weight.item()) mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False if not mask_type_bool: mask = mask.to(x.dtype) if mask_type_bool: mask = F.pad(mask, (8, 0, 8, 0), value=True) #mask = F.pad(mask, (0, 8, 0, 8), value=True) else: mask = F.pad(mask, (8, 0, 8, 0), value=1.0) text_len = context.shape[1] # mask_obj[0].text_len mask[text_len:,text_len:] = torch.clamp(mask[text_len:,text_len:], min=floor.to(mask.device)) #ORIGINAL SELF-ATTN REGION BLEED reg_cond_mask = reg_cond_mask_expanded.unsqueeze(0).clone() if reg_cond_mask_expanded is not None else None mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False total_layers = len(self.double_layers) + len(self.single_layers) blocks_replace = patches_replace.get("dit", {}) # context 1,259,2048 x 1,4032,3072 if len(self.double_layers) > 0: for i, layer in enumerate(self.double_layers): if mask_type_bool and weight < (i / (total_layers-1)) and mask is not None: mask = mask.to(x.dtype) if ("double_block", i) in blocks_replace: def block_wrap(args): out = {} out["txt"], out["img"] = layer( args["txt"], args["img"], args["vec"]) return out out = blocks_replace[("double_block", i)]({"img": x, "txt": c, "vec": global_cond}, {"original_block": block_wrap}) c = out["txt"] x = out["img"] else: c, x = layer(c, x, global_cond, mask=mask, **kwargs) if len(self.single_layers) > 0: c_len = c.size(1) cx = torch.cat([c, x], dim=1) for i, layer in enumerate(self.single_layers): if mask_type_bool and weight < ((len(self.double_layers) + i) / (total_layers-1)) and mask is not None: mask = mask.to(x.dtype) if ("single_block", i) in blocks_replace: def block_wrap(args): out = {} out["img"] = layer(args["img"], args["vec"]) return out out = blocks_replace[("single_block", i)]({"img": cx, "vec": global_cond}, {"original_block": block_wrap}) cx = out["img"] else: cx = layer(cx, global_cond, mask=mask, **kwargs) x = cx[:, c_len:] fshift, fscale = self.modF(global_cond).chunk(2, dim=1) x = modulate(x, fshift, fscale) x = self.final_linear(x) x = self.unpatchify(x, (h + 1) // self.patch_size, (w + 1) // self.patch_size)[:,:,:h,:w] out_list.append(x) eps = torch.stack(out_list, dim=0).squeeze(dim=1) freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method") freqsep_sigma = transformer_options.get("freqsep_sigma") freqsep_kernel_size = transformer_options.get("freqsep_kernel_size") freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size") freqsep_stride = transformer_options.get("freqsep_stride") freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight") freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight") freqsep_mask = transformer_options.get("freqsep_mask") dtype = eps.dtype if self.style_dtype is None else self.style_dtype if y0_style_pos is not None: y0_style_pos_weight = transformer_options.get("y0_style_pos_weight") y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight") y0_style_pos_synweight *= y0_style_pos_weight y0_style_pos_mask = transformer_options.get("y0_style_pos_mask") y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge") y0_style_pos = y0_style_pos.to(dtype) x = x_orig.clone().to(dtype) #x = x.to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) y0_adain_embed = self.Retrojector.embed(y0_style_pos) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if EO("scattersort_median_LP"): denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7)) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7)) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad) denoised_spatial = denoised_spatial_LP + denoised_spatial_HP denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": if freqsep_mask is not None: freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float() freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact') if hasattr(self, "adain_tile"): tile_h, tile_w = self.adain_tile denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if self.adain_flag: h_off = tile_h // 2 w_off = tile_w // 2 denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] self.adain_flag = False else: h_off = 0 w_off = 0 self.adain_flag = True tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w)) y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w)) tiles_out = [] for i in range(tiles.shape[0]): tile = tiles[i].unsqueeze(0) y0_tile = y0_tiles[i].unsqueeze(0) tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) tile = adain_seq_inplace(tile, y0_tile) tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w)) tiles_out_tensor = torch.cat(tiles_out, dim=0) tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides) if h_off == 0: denoised_pretile = tiles_out_tensor else: denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"): denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median_pw": denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight) elif freqsep_lowpass_method == "gaussian_pw": denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median": denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size) elif freqsep_lowpass_method == "gaussian": denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP if EO("adain_fs_uhp"): y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP #denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) else: denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) if transformer_options.get('y0_standard_guide') is not None: y0_standard_guide = transformer_options.get('y0_standard_guide') y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide) f_cs = self.StyleWCT.get(y0_standard_guide_embed) self.y0_standard_guide = self.Retrojector.unembed(f_cs) if transformer_options.get('y0_inv_standard_guide') is not None: y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide') y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide) f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed) self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs) denoised_approx = self.Retrojector.unembed(denoised_embed) eps = (x - denoised_approx) / sigma if not UNCOND: if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) else: eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) elif eps.shape[0] == 1 and UNCOND: eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) eps = eps.float() if y0_style_neg is not None: y0_style_neg_weight = transformer_options.get("y0_style_neg_weight") y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight") y0_style_neg_synweight *= y0_style_neg_weight y0_style_neg_mask = transformer_options.get("y0_style_neg_mask") y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge") y0_style_neg = y0_style_neg.to(dtype) x = x.to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) y0_adain_embed = self.Retrojector.embed(y0_style_neg) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) denoised_approx = self.Retrojector.unembed(denoised_embed) if UNCOND: eps = (x - denoised_approx) / sigma eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) elif eps.shape[0] == 1 and not UNCOND: eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0]) eps = eps.float() return eps def unpatchify2(x: torch.Tensor, H: int, W: int, patch_size: int) -> torch.Tensor: """ Invert patchify: x: (B, N, C*p*p) returns: (B, C, H, W), slicing off any padding """ B, N, CPP = x.shape p = patch_size Hp = math.ceil(H / p) Wp = math.ceil(W / p) C = CPP // (p * p) assert N == Hp * Wp, f"Expected N={Hp*Wp} patches, got {N}" x = x.view(B, Hp, Wp, CPP) x = x.view(B, Hp, Wp, C, p, p) x = x.permute(0, 3, 1, 4, 2, 5) imgs = x.reshape(B, C, Hp * p, Wp * p) return imgs[:, :, :H, :W] ================================================ FILE: beta/__init__.py ================================================ from . import rk_sampler_beta from . import samplers from . import samplers_extensions def add_beta(NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers): NODE_CLASS_MAPPINGS.update({ #"SharkSampler" : samplers.SharkSampler, #"SharkSamplerAdvanced_Beta" : samplers.SharkSampler, #SharkSamplerAdvanced_Beta, "SharkOptions_Beta" : samplers_extensions.SharkOptions_Beta, "ClownOptions_SDE_Beta" : samplers_extensions.ClownOptions_SDE_Beta, "ClownOptions_DetailBoost_Beta" : samplers_extensions.ClownOptions_DetailBoost_Beta, "ClownGuide_Style_Beta" : samplers_extensions.ClownGuide_Style_Beta, "ClownGuide_Style_EdgeWidth" : samplers_extensions.ClownGuide_Style_EdgeWidth, "ClownGuide_Style_TileSize" : samplers_extensions.ClownGuide_Style_TileSize, "ClownGuide_Beta" : samplers_extensions.ClownGuide_Beta, "ClownGuides_Beta" : samplers_extensions.ClownGuides_Beta, "ClownGuidesAB_Beta" : samplers_extensions.ClownGuidesAB_Beta, "ClownGuides_Sync" : samplers_extensions.ClownGuides_Sync, "ClownGuides_Sync_Advanced" : samplers_extensions.ClownGuides_Sync_Advanced, "ClownGuide_FrequencySeparation" : samplers_extensions.ClownGuide_FrequencySeparation, "SharkOptions_GuiderInput" : samplers_extensions.SharkOptions_GuiderInput, "ClownOptions_ImplicitSteps_Beta" : samplers_extensions.ClownOptions_ImplicitSteps_Beta, "ClownOptions_Cycles_Beta" : samplers_extensions.ClownOptions_Cycles_Beta, "SharkOptions_GuideCond_Beta" : samplers_extensions.SharkOptions_GuideCond_Beta, "SharkOptions_GuideConds_Beta" : samplers_extensions.SharkOptions_GuideConds_Beta, "ClownOptions_Tile_Beta" : samplers_extensions.ClownOptions_Tile_Beta, "ClownOptions_Tile_Advanced_Beta" : samplers_extensions.ClownOptions_Tile_Advanced_Beta, "ClownGuide_Mean_Beta" : samplers_extensions.ClownGuide_Mean_Beta, "ClownGuide_AdaIN_MMDiT_Beta" : samplers_extensions.ClownGuide_AdaIN_MMDiT_Beta, "ClownGuide_AttnInj_MMDiT_Beta" : samplers_extensions.ClownGuide_AttnInj_MMDiT_Beta, "ClownGuide_StyleNorm_Advanced_HiDream" : samplers_extensions.ClownGuide_StyleNorm_Advanced_HiDream, "ClownOptions_SDE_Mask_Beta" : samplers_extensions.ClownOptions_SDE_Mask_Beta, "ClownOptions_StepSize_Beta" : samplers_extensions.ClownOptions_StepSize_Beta, "ClownOptions_SigmaScaling_Beta" : samplers_extensions.ClownOptions_SigmaScaling_Beta, "ClownOptions_Momentum_Beta" : samplers_extensions.ClownOptions_Momentum_Beta, "ClownOptions_SwapSampler_Beta" : samplers_extensions.ClownOptions_SwapSampler_Beta, "ClownOptions_ExtraOptions_Beta" : samplers_extensions.ClownOptions_ExtraOptions_Beta, "ClownOptions_Automation_Beta" : samplers_extensions.ClownOptions_Automation_Beta, "SharkOptions_UltraCascade_Latent_Beta" : samplers_extensions.SharkOptions_UltraCascade_Latent_Beta, "SharkOptions_StartStep_Beta" : samplers_extensions.SharkOptions_StartStep_Beta, "ClownOptions_Combine" : samplers_extensions.ClownOptions_Combine, "ClownOptions_Frameweights" : samplers_extensions.ClownOptions_Frameweights, "ClownOptions_FlowGuide" : samplers_extensions.ClownOptions_FlowGuide, "ClownStyle_Block_MMDiT" : samplers_extensions.ClownStyle_Block_MMDiT, "ClownStyle_MMDiT" : samplers_extensions.ClownStyle_MMDiT, "ClownStyle_Attn_MMDiT" : samplers_extensions.ClownStyle_Attn_MMDiT, "ClownStyle_Boost" : samplers_extensions.ClownStyle_Boost, "ClownStyle_UNet" : samplers_extensions.ClownStyle_UNet, "ClownStyle_Block_UNet" : samplers_extensions.ClownStyle_Block_UNet, "ClownStyle_Attn_UNet" : samplers_extensions.ClownStyle_Attn_UNet, "ClownStyle_ResBlock_UNet" : samplers_extensions.ClownStyle_ResBlock_UNet, "ClownStyle_SpatialBlock_UNet" : samplers_extensions.ClownStyle_SpatialBlock_UNet, "ClownStyle_TransformerBlock_UNet": samplers_extensions.ClownStyle_TransformerBlock_UNet, "ClownSamplerSelector_Beta" : samplers_extensions.ClownSamplerSelector_Beta, "SharkSampler_Beta" : samplers.SharkSampler_Beta, "SharkChainsampler_Beta" : samplers.SharkChainsampler_Beta, "ClownsharKSampler_Beta" : samplers.ClownsharKSampler_Beta, "ClownsharkChainsampler_Beta" : samplers.ClownsharkChainsampler_Beta, "ClownSampler_Beta" : samplers.ClownSampler_Beta, "ClownSamplerAdvanced_Beta" : samplers.ClownSamplerAdvanced_Beta, "BongSampler" : samplers.BongSampler, }) extra_samplers.update({ "res_2m" : sample_res_2m, "res_3m" : sample_res_3m, "res_2s" : sample_res_2s, "res_3s" : sample_res_3s, "res_5s" : sample_res_5s, "res_6s" : sample_res_6s, "res_2m_ode" : sample_res_2m_ode, "res_3m_ode" : sample_res_3m_ode, "res_2s_ode" : sample_res_2s_ode, "res_3s_ode" : sample_res_3s_ode, "res_5s_ode" : sample_res_5s_ode, "res_6s_ode" : sample_res_6s_ode, "deis_2m" : sample_deis_2m, "deis_3m" : sample_deis_3m, "deis_2m_ode": sample_deis_2m_ode, "deis_3m_ode": sample_deis_3m_ode, "rk_beta": rk_sampler_beta.sample_rk_beta, }) NODE_DISPLAY_NAME_MAPPINGS.update({ #"SharkSampler" : "SharkSampler", #"SharkSamplerAdvanced_Beta" : "SharkSamplerAdvanced", "SharkSampler_Beta" : "SharkSampler", "SharkChainsampler_Beta" : "SharkChainsampler", "BongSampler" : "BongSampler", "ClownsharKSampler_Beta" : "ClownsharKSampler", "ClownsharkChainsampler_Beta" : "ClownsharkChainsampler", "ClownSampler_Beta" : "ClownSampler", "ClownSamplerAdvanced_Beta" : "ClownSamplerAdvanced", "ClownGuide_Mean_Beta" : "ClownGuide Mean", "ClownGuide_AdaIN_MMDiT_Beta" : "ClownGuide AdaIN (HiDream)", "ClownGuide_AttnInj_MMDiT_Beta" : "ClownGuide AttnInj (HiDream)", "ClownGuide_StyleNorm_Advanced_HiDream" : "ClownGuide_StyleNorm_Advanced_HiDream", "ClownGuide_Style_Beta" : "ClownGuide Style", "ClownGuide_Beta" : "ClownGuide", "ClownGuides_Beta" : "ClownGuides", "ClownGuides_Sync" : "ClownGuides Sync", "ClownGuides_Sync_Advanced" : "ClownGuides Sync_Advanced", "ClownGuidesAB_Beta" : "ClownGuidesAB", "ClownSamplerSelector_Beta" : "ClownSamplerSelector", "ClownOptions_SDE_Mask_Beta" : "ClownOptions SDE Mask", "ClownOptions_SDE_Beta" : "ClownOptions SDE", "ClownOptions_StepSize_Beta" : "ClownOptions Step Size", "ClownOptions_DetailBoost_Beta" : "ClownOptions Detail Boost", "ClownOptions_SigmaScaling_Beta" : "ClownOptions Sigma Scaling", "ClownOptions_Momentum_Beta" : "ClownOptions Momentum", "ClownOptions_ImplicitSteps_Beta" : "ClownOptions Implicit Steps", "ClownOptions_Cycles_Beta" : "ClownOptions Cycles", "ClownOptions_SwapSampler_Beta" : "ClownOptions Swap Sampler", "ClownOptions_ExtraOptions_Beta" : "ClownOptions Extra Options", "ClownOptions_Automation_Beta" : "ClownOptions Automation", "SharkOptions_GuideCond_Beta" : "SharkOptions Guide Cond", "SharkOptions_GuideConds_Beta" : "SharkOptions Guide Conds", "SharkOptions_Beta" : "SharkOptions", "SharkOptions_StartStep_Beta" : "SharkOptions Start Step", "SharkOptions_UltraCascade_Latent_Beta" : "SharkOptions UltraCascade Latent", "ClownOptions_Combine" : "ClownOptions Combine", "ClownOptions_Frameweights" : "ClownOptions Frameweights", "SharkOptions_GuiderInput" : "SharkOptions Guider Input", "ClownOptions_Tile_Beta" : "ClownOptions Tile", "ClownOptions_Tile_Advanced_Beta" : "ClownOptions Tile Advanced", }) return NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers def sample_res_2m(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_2m",) def sample_res_3m(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_3m",) def sample_res_2s(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_2s",) def sample_res_3s(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_3s",) def sample_res_5s(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_5s",) def sample_res_6s(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_6s",) def sample_res_2m_ode(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_2m", eta=0.0, eta_substep=0.0, ) def sample_res_3m_ode(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_3m", eta=0.0, eta_substep=0.0, ) def sample_res_2s_ode(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_2s", eta=0.0, eta_substep=0.0, ) def sample_res_3s_ode(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_3s", eta=0.0, eta_substep=0.0, ) def sample_res_5s_ode(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_5s", eta=0.0, eta_substep=0.0, ) def sample_res_6s_ode(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="res_6s", eta=0.0, eta_substep=0.0, ) def sample_deis_2m(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="deis_2m",) def sample_deis_3m(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="deis_3m",) def sample_deis_2m_ode(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="deis_2m", eta=0.0, eta_substep=0.0, ) def sample_deis_3m_ode(model, x, sigmas, extra_args=None, callback=None, disable=None): return rk_sampler_beta.sample_rk_beta(model, x, sigmas, None, extra_args, callback, disable, rk_type="deis_3m", eta=0.0, eta_substep=0.0, ) ================================================ FILE: beta/constants.py ================================================ MAX_STEPS = 10000 IMPLICIT_TYPE_NAMES = [ "rebound", "retro-eta", "bongmath", "predictor-corrector", ] GUIDE_MODE_NAMES_BETA_SIMPLE = [ "flow", "sync", "lure", "data", "epsilon", "inversion", "pseudoimplicit", "fully_pseudoimplicit", "none", ] FRAME_WEIGHTS_CONFIG_NAMES = [ "frame_weights", "frame_weights_inv", "frame_targets" ] FRAME_WEIGHTS_DYNAMICS_NAMES = [ "constant", "linear", "ease_out", "ease_in", "middle", "trough", ] FRAME_WEIGHTS_SCHEDULE_NAMES = [ "moderate_early", "moderate_late", "fast_early", "fast_late", "slow_early", "slow_late", ] GUIDE_MODE_NAMES_PSEUDOIMPLICIT = [ "pseudoimplicit", "pseudoimplicit_cw", "pseudoimplicit_projection", "pseudoimplicit_projection_cw", "fully_pseudoimplicit", "fully_pseudoimplicit_projection", "fully_pseudoimplicit_cw", "fully_pseudoimplicit_projection_cw" ] ================================================ FILE: beta/deis_coefficients.py ================================================ # Adapted from: https://github.com/zju-pi/diff-sampler/blob/main/gits-main/solver_utils.py # fixed the calcs for "rhoab" which suffered from an off-by-one error and made some other minor corrections import torch import numpy as np # A pytorch reimplementation of DEIS (https://github.com/qsh-zh/deis). ############################# ### Utils for DEIS solver ### ############################# #---------------------------------------------------------------------------- # Transfer from the input time (sigma) used in EDM to that (t) used in DEIS. def edm2t(edm_steps, epsilon_s=1e-3, sigma_min=0.002, sigma_max=80): vp_sigma = lambda beta_d, beta_min: lambda t: (np.e ** (0.5 * beta_d * (t ** 2) + beta_min * t) - 1) ** 0.5 vp_sigma_inv = lambda beta_d, beta_min: lambda sigma: ((beta_min ** 2 + 2 * beta_d * (sigma ** 2 + 1).log()).sqrt() - beta_min) / beta_d vp_beta_d = 2 * (np.log(torch.tensor(sigma_min).cpu() ** 2 + 1) / epsilon_s - np.log(torch.tensor(sigma_max).cpu() ** 2 + 1)) / (epsilon_s - 1) vp_beta_min = np.log(torch.tensor(sigma_max).cpu() ** 2 + 1) - 0.5 * vp_beta_d t_steps = vp_sigma_inv(vp_beta_d.clone().detach().cpu(), vp_beta_min.clone().detach().cpu())(edm_steps.clone().detach().cpu()) return t_steps, vp_beta_min, vp_beta_d + vp_beta_min #---------------------------------------------------------------------------- def cal_poly(prev_t, j, taus): poly = 1 for k in range(prev_t.shape[0]): if k == j: continue poly *= (taus - prev_t[k]) / (prev_t[j] - prev_t[k]) return poly #---------------------------------------------------------------------------- # Transfer from t to alpha_t. def t2alpha_fn(beta_0, beta_1, t): return torch.exp(-0.5 * t ** 2 * (beta_1 - beta_0) - t * beta_0) #---------------------------------------------------------------------------- def cal_integrand(beta_0, beta_1, taus): with torch.inference_mode(mode=False): taus = taus.clone() beta_0 = beta_0.clone() beta_1 = beta_1.clone() with torch.enable_grad(): taus.requires_grad_(True) alpha = t2alpha_fn(beta_0, beta_1, taus) log_alpha = alpha.log() log_alpha.sum().backward() d_log_alpha_dtau = taus.grad integrand = -0.5 * d_log_alpha_dtau / torch.sqrt(alpha * (1 - alpha)) return integrand #---------------------------------------------------------------------------- def get_deis_coeff_list(t_steps, max_order, N=10000, deis_mode='tab'): """ Get the coefficient list for DEIS sampling. Args: t_steps: A pytorch tensor. The time steps for sampling. max_order: A `int`. Maximum order of the solver. 1 <= max_order <= 4 N: A `int`. Use how many points to perform the numerical integration when deis_mode=='tab'. deis_mode: A `str`. Select between 'tab' and 'rhoab'. Type of DEIS. Returns: A pytorch tensor. A batch of generated samples or sampling trajectories if return_inters=True. """ if deis_mode == 'tab': t_steps, beta_0, beta_1 = edm2t(t_steps) C = [] for i, (t_cur, t_next) in enumerate(zip(t_steps[:-1], t_steps[1:])): order = min(i+1, max_order) if order == 1: C.append([]) else: taus = torch.linspace(t_cur, t_next, N) # split the interval for integral approximation dtau = (t_next - t_cur) / N prev_t = t_steps[[i - k for k in range(order)]] coeff_temp = [] integrand = cal_integrand(beta_0, beta_1, taus) for j in range(order): poly = cal_poly(prev_t, j, taus) coeff_temp.append(torch.sum(integrand * poly) * dtau) C.append(coeff_temp) elif deis_mode == 'rhoab': # Analytical solution, second order def get_def_integral_2(a, b, start, end, c): coeff = (end**3 - start**3) / 3 - (end**2 - start**2) * (a + b) / 2 + (end - start) * a * b return coeff / ((c - a) * (c - b)) # Analytical solution, third order def get_def_integral_3(a, b, c, start, end, d): coeff = (end**4 - start**4) / 4 - (end**3 - start**3) * (a + b + c) / 3 \ + (end**2 - start**2) * (a*b + a*c + b*c) / 2 - (end - start) * a * b * c return coeff / ((d - a) * (d - b) * (d - c)) C = [] for i, (t_cur, t_next) in enumerate(zip(t_steps[:-1], t_steps[1:])): order = min(i+1, max_order) #fixed order calcs if order == 1: C.append([]) else: prev_t = t_steps[[i - k for k in range(order+1)]] if order == 2: coeff_cur = ((t_next - prev_t[1])**2 - (t_cur - prev_t[1])**2) / (2 * (t_cur - prev_t[1])) coeff_prev1 = (t_next - t_cur)**2 / (2 * (prev_t[1] - t_cur)) coeff_temp = [coeff_cur, coeff_prev1] elif order == 3: coeff_cur = get_def_integral_2(prev_t[1], prev_t[2], t_cur, t_next, t_cur) coeff_prev1 = get_def_integral_2(t_cur, prev_t[2], t_cur, t_next, prev_t[1]) coeff_prev2 = get_def_integral_2(t_cur, prev_t[1], t_cur, t_next, prev_t[2]) coeff_temp = [coeff_cur, coeff_prev1, coeff_prev2] elif order == 4: coeff_cur = get_def_integral_3(prev_t[1], prev_t[2], prev_t[3], t_cur, t_next, t_cur) coeff_prev1 = get_def_integral_3(t_cur, prev_t[2], prev_t[3], t_cur, t_next, prev_t[1]) coeff_prev2 = get_def_integral_3(t_cur, prev_t[1], prev_t[3], t_cur, t_next, prev_t[2]) coeff_prev3 = get_def_integral_3(t_cur, prev_t[1], prev_t[2], t_cur, t_next, prev_t[3]) coeff_temp = [coeff_cur, coeff_prev1, coeff_prev2, coeff_prev3] C.append(coeff_temp) return C ================================================ FILE: beta/noise_classes.py ================================================ import torch import torch.nn.functional as F from torch import nn, Tensor, Generator, lerp from torch.nn.functional import unfold from torch.distributions import StudentT, Laplace import numpy as np import pywt import functools from typing import Callable, Tuple from math import pi from comfy.k_diffusion.sampling import BrownianTreeNoiseSampler from ..res4lyf import RESplain # Set this to "True" if you have installed OpenSimplex. Recommended to install without dependencies due to conflicting packages: pip3 install opensimplex --no-deps OPENSIMPLEX_ENABLE = False if OPENSIMPLEX_ENABLE: from opensimplex import OpenSimplex class PrecisionTool: def __init__(self, cast_type='fp64'): self.cast_type = cast_type def cast_tensor(self, func): @functools.wraps(func) def wrapper(*args, **kwargs): if self.cast_type not in ['fp64', 'fp32', 'fp16']: return func(*args, **kwargs) target_device = None for arg in args: if torch.is_tensor(arg): target_device = arg.device break if target_device is None: for v in kwargs.values(): if torch.is_tensor(v): target_device = v.device break # recursively zs_recast tensors in nested dictionaries def cast_and_move_to_device(data): if torch.is_tensor(data): if self.cast_type == 'fp64': return data.to(torch.float64).to(target_device) elif self.cast_type == 'fp32': return data.to(torch.float32).to(target_device) elif self.cast_type == 'fp16': return data.to(torch.float16).to(target_device) elif isinstance(data, dict): return {k: cast_and_move_to_device(v) for k, v in data.items()} return data new_args = [cast_and_move_to_device(arg) for arg in args] new_kwargs = {k: cast_and_move_to_device(v) for k, v in kwargs.items()} return func(*new_args, **new_kwargs) return wrapper def set_cast_type(self, new_value): if new_value in ['fp64', 'fp32', 'fp16']: self.cast_type = new_value else: self.cast_type = 'fp64' precision_tool = PrecisionTool(cast_type='fp64') def noise_generator_factory(cls, **fixed_params): def create_instance(**kwargs): params = {**fixed_params, **kwargs} return cls(**params) return create_instance def like(x): return {'size': x.shape, 'dtype': x.dtype, 'layout': x.layout, 'device': x.device} def scale_to_range(x, scaled_min = -1.73, scaled_max = 1.73): #1.73 is roughly the square root of 3 return scaled_min + (x - x.min()) * (scaled_max - scaled_min) / (x.max() - x.min()) def normalize(x): return (x - x.mean())/ x.std() class NoiseGenerator: def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None): self.seed = seed if x is not None: self.x = x self.size = x.shape self.dtype = x.dtype self.layout = x.layout self.device = x.device else: self.x = torch.zeros(size, dtype, layout, device) # allow overriding parameters imported from latent 'x' if specified if size is not None: self.size = size if dtype is not None: self.dtype = dtype if layout is not None: self.layout = layout if device is not None: self.device = device self.sigma_max = sigma_max.to(device) if isinstance(sigma_max, torch.Tensor) else sigma_max self.sigma_min = sigma_min.to(device) if isinstance(sigma_min, torch.Tensor) else sigma_min self.last_seed = seed #- 1 #adapt for update being called during initialization, which increments last_seed if generator is None: self.generator = torch.Generator(device=self.device).manual_seed(seed) else: self.generator = generator def __call__(self): raise NotImplementedError("This method got clownsharked!") def update(self, **kwargs): #if not isinstance(self, BrownianNoiseGenerator): # self.last_seed += 1 updated_values = [] for attribute_name, value in kwargs.items(): if value is not None: setattr(self, attribute_name, value) updated_values.append(getattr(self, attribute_name)) return tuple(updated_values) class BrownianNoiseGenerator(NoiseGenerator): def __call__(self, *, sigma=None, sigma_next=None, **kwargs): return BrownianTreeNoiseSampler(self.x, self.sigma_min, self.sigma_max, seed=self.seed, cpu = self.device.type=='cpu')(sigma, sigma_next) class FractalNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, alpha=0.0, k=1.0, scale=0.1): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(alpha=alpha, k=k, scale=scale) def __call__(self, *, alpha=None, k=None, scale=None, **kwargs): self.update(alpha=alpha, k=k, scale=scale) self.last_seed += 1 if len(self.size) == 5: b, c, t, h, w = self.size else: b, c, h, w = self.size noise = torch.normal(mean=0.0, std=1.0, size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) y_freq = torch.fft.fftfreq(h, 1/h, device=self.device) x_freq = torch.fft.fftfreq(w, 1/w, device=self.device) if len(self.size) == 5: t_freq = torch.fft.fftfreq(t, 1/t, device=self.device) freq = torch.sqrt(t_freq[:, None, None]**2 + y_freq[None, :, None]**2 + x_freq[None, None, :]**2).clamp(min=1e-10) else: freq = torch.sqrt(y_freq[:, None]**2 + x_freq[None, :]**2).clamp(min=1e-10) spectral_density = self.k / torch.pow(freq, self.alpha * self.scale) spectral_density[0, 0] = 0 noise_fft = torch.fft.fftn(noise) modified_fft = noise_fft * spectral_density noise = torch.fft.ifftn(modified_fft).real return noise / torch.std(noise) class SimplexNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, scale=0.01): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.noise = OpenSimplex(seed=seed) self.scale = scale def __call__(self, *, scale=None, **kwargs): self.update(scale=scale) self.last_seed += 1 if len(self.size) == 5: b, c, t, h, w = self.size else: b, c, h, w = self.size noise_array = self.noise.noise3array(np.arange(w),np.arange(h),np.arange(c)) self.noise = OpenSimplex(seed=self.noise.get_seed()+1) noise_tensor = torch.from_numpy(noise_array).to(self.device) noise_tensor = torch.unsqueeze(noise_tensor, dim=0) if len(self.size) == 5: noise_tensor = torch.unsqueeze(noise_tensor, dim=0) return noise_tensor / noise_tensor.std() #return normalize(scale_to_range(noise_tensor)) class HiresPyramidNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, discount=0.7, mode='nearest-exact'): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(discount=discount, mode=mode) def __call__(self, *, discount=None, mode=None, **kwargs): self.update(discount=discount, mode=mode) self.last_seed += 1 if len(self.size) == 5: b, c, t, h, w = self.size orig_h, orig_w, orig_t = h, w, t u = nn.Upsample(size=(orig_h, orig_w, orig_t), mode=self.mode).to(self.device) else: b, c, h, w = self.size orig_h, orig_w = h, w orig_t = t = 1 u = nn.Upsample(size=(orig_h, orig_w), mode=self.mode).to(self.device) noise = ((torch.rand(size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) - 0.5) * 2 * 1.73) for i in range(4): r = torch.rand(1, device=self.device, generator=self.generator).item() * 2 + 2 h, w = min(orig_h * 15, int(h * (r ** i))), min(orig_w * 15, int(w * (r ** i))) if len(self.size) == 5: t = min(orig_t * 15, int(t * (r ** i))) new_noise = torch.randn((b, c, t, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) else: new_noise = torch.randn((b, c, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) upsampled_noise = u(new_noise) noise += upsampled_noise * self.discount ** i if h >= orig_h * 15 or w >= orig_w * 15 or t >= orig_t * 15: break # if resolution is too high return noise / noise.std() class PyramidNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, discount=0.8, mode='nearest-exact'): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(discount=discount, mode=mode) def __call__(self, *, discount=None, mode=None, **kwargs): self.update(discount=discount, mode=mode) self.last_seed += 1 x = torch.zeros(self.size, dtype=self.dtype, layout=self.layout, device=self.device) if len(self.size) == 5: b, c, t, h, w = self.size orig_h, orig_w, orig_t = h, w, t else: b, c, h, w = self.size orig_h, orig_w = h, w r = 1 for i in range(5): r *= 2 if len(self.size) == 5: scaledSize = (b, c, t * r, h * r, w * r) origSize = (orig_h, orig_w, orig_t) else: scaledSize = (b, c, h * r, w * r) origSize = (orig_h, orig_w) x += torch.nn.functional.interpolate( torch.normal(mean=0, std=0.5 ** i, size=scaledSize, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator), size=origSize, mode=self.mode ) * self.discount ** i return x / x.std() class InterpolatedPyramidNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, discount=0.7, mode='nearest-exact'): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(discount=discount, mode=mode) def __call__(self, *, discount=None, mode=None, **kwargs): self.update(discount=discount, mode=mode) self.last_seed += 1 if len(self.size) == 5: b, c, t, h, w = self.size orig_t, orig_h, orig_w = t, h, w else: b, c, h, w = self.size orig_h, orig_w = h, w t = orig_t = 1 noise = ((torch.rand(size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) - 0.5) * 2 * 1.73) multipliers = [1] for i in range(4): r = torch.rand(1, device=self.device, generator=self.generator).item() * 2 + 2 h, w = min(orig_h * 15, int(h * (r ** i))), min(orig_w * 15, int(w * (r ** i))) if len(self.size) == 5: t = min(orig_t * 15, int(t * (r ** i))) new_noise = torch.randn((b, c, t, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) upsampled_noise = nn.functional.interpolate(new_noise, size=(orig_t, orig_h, orig_w), mode=self.mode) else: new_noise = torch.randn((b, c, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) upsampled_noise = nn.functional.interpolate(new_noise, size=(orig_h, orig_w), mode=self.mode) noise += upsampled_noise * self.discount ** i multipliers.append( self.discount ** i) if h >= orig_h * 15 or w >= orig_w * 15 or (len(self.size) == 5 and t >= orig_t * 15): break # if resolution is too high noise = noise / sum([m ** 2 for m in multipliers]) ** 0.5 return noise / noise.std() class CascadeBPyramidNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, levels=10, mode='nearest', size_range=[1,16]): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(epsilon=x, levels=levels, mode=mode, size_range=size_range) def __call__(self, *, levels=10, mode='nearest', size_range=[1,16], **kwargs): self.update(levels=levels, mode=mode) if len(self.size) == 5: raise NotImplementedError("CascadeBPyramidNoiseGenerator is not implemented for 5D tensors (eg. video).") self.last_seed += 1 b, c, h, w = self.size epsilon = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) multipliers = [1] for i in range(1, levels): m = 0.75 ** i h, w = int(epsilon.size(-2) // (2 ** i)), int(epsilon.size(-2) // (2 ** i)) if size_range is None or (size_range[0] <= h <= size_range[1] or size_range[0] <= w <= size_range[1]): offset = torch.randn(epsilon.size(0), epsilon.size(1), h, w, device=self.device, generator=self.generator) epsilon = epsilon + torch.nn.functional.interpolate(offset, size=epsilon.shape[-2:], mode=self.mode) * m multipliers.append(m) if h <= 1 or w <= 1: break epsilon = epsilon / sum([m ** 2 for m in multipliers]) ** 0.5 #divides the epsilon tensor by the square root of the sum of the squared multipliers. return epsilon class UniformNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, mean=0.0, scale=1.73): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(mean=mean, scale=scale) def __call__(self, *, mean=None, scale=None, **kwargs): self.update(mean=mean, scale=scale) self.last_seed += 1 noise = torch.rand(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) return self.scale * 2 * (noise - 0.5) + self.mean class GaussianNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, mean=0.0, std=1.0): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(mean=mean, std=std) def __call__(self, *, mean=None, std=None, **kwargs): self.update(mean=mean, std=std) self.last_seed += 1 noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) return (noise - noise.mean()) / noise.std() class GaussianBackwardsNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, mean=0.0, std=1.0): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(mean=mean, std=std) def __call__(self, *, mean=None, std=None, **kwargs): self.update(mean=mean, std=std) self.last_seed += 1 RESplain("GaussianBackwards last seed:", self.generator.initial_seed()) self.generator.manual_seed(self.generator.initial_seed() - 1) noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) return (noise - noise.mean()) / noise.std() class LaplacianNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, loc=0, scale=1.0): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(loc=loc, scale=scale) def __call__(self, *, loc=None, scale=None, **kwargs): self.update(loc=loc, scale=scale) self.last_seed += 1 # b, c, h, w = self.size # orig_h, orig_w = h, w noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 4.0 rng_state = torch.random.get_rng_state() torch.manual_seed(self.generator.initial_seed()) laplacian_noise = Laplace(loc=self.loc, scale=self.scale).rsample(self.size).to(self.device) self.generator.manual_seed(self.generator.initial_seed() + 1) torch.random.set_rng_state(rng_state) noise += laplacian_noise return noise / noise.std() class StudentTNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, loc=0, scale=0.2, df=1): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(loc=loc, scale=scale, df=df) def __call__(self, *, loc=None, scale=None, df=None, **kwargs): self.update(loc=loc, scale=scale, df=df) self.last_seed += 1 # b, c, h, w = self.size # orig_h, orig_w = h, w rng_state = torch.random.get_rng_state() torch.manual_seed(self.generator.initial_seed()) noise = StudentT(loc=self.loc, scale=self.scale, df=self.df).rsample(self.size) if not isinstance(self, BrownianNoiseGenerator): self.last_seed += 1 s = torch.quantile(noise.flatten(start_dim=1).abs(), 0.75, dim=-1) if len(self.size) == 5: s = s.reshape(*s.shape, 1, 1, 1, 1) else: s = s.reshape(*s.shape, 1, 1, 1) noise = noise.clamp(-s, s) noise_latent = torch.copysign(torch.pow(torch.abs(noise), 0.5), noise).to(self.device) self.generator.manual_seed(self.generator.initial_seed() + 1) torch.random.set_rng_state(rng_state) return (noise_latent - noise_latent.mean()) / noise_latent.std() class WaveletNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, wavelet='haar'): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(wavelet=wavelet) def __call__(self, *, wavelet=None, **kwargs): self.update(wavelet=wavelet) self.last_seed += 1 # b, c, h, w = self.size # orig_h, orig_w = h, w # noise for spatial dimensions only coeffs = pywt.wavedecn(torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator).to('cpu'), wavelet=self.wavelet, mode='periodization') noise = pywt.waverecn(coeffs, wavelet=self.wavelet, mode='periodization') noise_tensor = torch.tensor(noise, dtype=self.dtype, device=self.device) noise_tensor = (noise_tensor - noise_tensor.mean()) / noise_tensor.std() return noise_tensor class PerlinNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, detail=0.0): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(detail=detail) @staticmethod def get_positions(block_shape: Tuple[int, int]) -> Tensor: bh, bw = block_shape positions = torch.stack( torch.meshgrid( [(torch.arange(b) + 0.5) / b for b in (bw, bh)], indexing="xy", ), -1, ).view(1, bh, bw, 1, 1, 2) return positions @staticmethod def unfold_grid(vectors: Tensor) -> Tensor: batch_size, _, gpy, gpx = vectors.shape return ( unfold(vectors, (2, 2)) .view(batch_size, 2, 4, -1) .permute(0, 2, 3, 1) .view(batch_size, 4, gpy - 1, gpx - 1, 2) ) @staticmethod def smooth_step(t: Tensor) -> Tensor: return t * t * (3.0 - 2.0 * t) @staticmethod def perlin_noise_tensor( self, vectors: Tensor, positions: Tensor, step: Callable = None ) -> Tensor: if step is None: step = self.smooth_step batch_size = vectors.shape[0] # grid height, grid width gh, gw = vectors.shape[2:4] # block height, block width bh, bw = positions.shape[1:3] for i in range(2): if positions.shape[i + 3] not in (1, vectors.shape[i + 2]): raise Exception( f"Blocks shapes do not match: vectors ({vectors.shape[1]}, {vectors.shape[2]}), positions {gh}, {gw})" ) if positions.shape[0] not in (1, batch_size): raise Exception( f"Batch sizes do not match: vectors ({vectors.shape[0]}), positions ({positions.shape[0]})" ) vectors = vectors.view(batch_size, 4, 1, gh * gw, 2) positions = positions.view(positions.shape[0], bh * bw, -1, 2) step_x = step(positions[..., 0]) step_y = step(positions[..., 1]) row0 = lerp( (vectors[:, 0] * positions).sum(dim=-1), (vectors[:, 1] * (positions - positions.new_tensor((1, 0)))).sum(dim=-1), step_x, ) row1 = lerp( (vectors[:, 2] * (positions - positions.new_tensor((0, 1)))).sum(dim=-1), (vectors[:, 3] * (positions - positions.new_tensor((1, 1)))).sum(dim=-1), step_x, ) noise = lerp(row0, row1, step_y) return ( noise.view( batch_size, bh, bw, gh, gw, ) .permute(0, 3, 1, 4, 2) .reshape(batch_size, gh * bh, gw * bw) ) def perlin_noise( self, grid_shape: Tuple[int, int], out_shape: Tuple[int, int], batch_size: int = 1, generator: Generator = None, *args, **kwargs, ) -> Tensor: gh, gw = grid_shape # grid height and width oh, ow = out_shape # output height and width bh, bw = oh // gh, ow // gw # block height and width if oh != bh * gh: raise Exception(f"Output height {oh} must be divisible by grid height {gh}") if ow != bw * gw != 0: raise Exception(f"Output width {ow} must be divisible by grid width {gw}") angle = torch.empty( [batch_size] + [s + 1 for s in grid_shape], device=self.device, *args, **kwargs ).uniform_(to=2.0 * pi, generator=self.generator) # random vectors on grid points vectors = self.unfold_grid(torch.stack((torch.cos(angle), torch.sin(angle)), dim=1)) # positions inside grid cells [0, 1) positions = self.get_positions((bh, bw)).to(vectors) return self.perlin_noise_tensor(self, vectors, positions).squeeze(0) def __call__(self, *, detail=None, **kwargs): self.update(detail=detail) #currently unused self.last_seed += 1 if len(self.size) == 5: b, c, t, h, w = self.size noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 2.0 for tt in range(t): for i in range(2): perlin_slice = self.perlin_noise((h, w), (h, w), batch_size=c, generator=self.generator).to(self.device) perlin_expanded = perlin_slice.unsqueeze(0).unsqueeze(2) time_slice = noise[:, :, tt:tt+1, :, :] noise[:, :, tt:tt+1, :, :] += perlin_expanded else: b, c, h, w = self.size #orig_h, orig_w = h, w noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 2.0 for i in range(2): noise += self.perlin_noise((h, w), (h, w), batch_size=c, generator=self.generator).to(self.device) return noise / noise.std() from functools import partial NOISE_GENERATOR_CLASSES = { "fractal" : FractalNoiseGenerator, "gaussian" : GaussianNoiseGenerator, "gaussian_backwards" : GaussianBackwardsNoiseGenerator, "uniform" : UniformNoiseGenerator, "pyramid-cascade_B" : CascadeBPyramidNoiseGenerator, "pyramid-interpolated" : InterpolatedPyramidNoiseGenerator, "pyramid-bilinear" : noise_generator_factory(PyramidNoiseGenerator, mode='bilinear'), "pyramid-bicubic" : noise_generator_factory(PyramidNoiseGenerator, mode='bicubic'), "pyramid-nearest" : noise_generator_factory(PyramidNoiseGenerator, mode='nearest'), "hires-pyramid-bilinear": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bilinear'), "hires-pyramid-bicubic" : noise_generator_factory(HiresPyramidNoiseGenerator, mode='bicubic'), "hires-pyramid-nearest" : noise_generator_factory(HiresPyramidNoiseGenerator, mode='nearest'), "brownian" : BrownianNoiseGenerator, "laplacian" : LaplacianNoiseGenerator, "studentt" : StudentTNoiseGenerator, "wavelet" : WaveletNoiseGenerator, "perlin" : PerlinNoiseGenerator, } NOISE_GENERATOR_CLASSES_SIMPLE = { "none" : GaussianNoiseGenerator, "brownian" : BrownianNoiseGenerator, "gaussian" : GaussianNoiseGenerator, "gaussian_backwards" : GaussianBackwardsNoiseGenerator, "laplacian" : LaplacianNoiseGenerator, "perlin" : PerlinNoiseGenerator, "studentt" : StudentTNoiseGenerator, "uniform" : UniformNoiseGenerator, "wavelet" : WaveletNoiseGenerator, "brown" : noise_generator_factory(FractalNoiseGenerator, alpha=2.0), "pink" : noise_generator_factory(FractalNoiseGenerator, alpha=1.0), "white" : noise_generator_factory(FractalNoiseGenerator, alpha=0.0), "blue" : noise_generator_factory(FractalNoiseGenerator, alpha=-1.0), "violet" : noise_generator_factory(FractalNoiseGenerator, alpha=-2.0), "ultraviolet_A" : noise_generator_factory(FractalNoiseGenerator, alpha=-3.0), "ultraviolet_B" : noise_generator_factory(FractalNoiseGenerator, alpha=-4.0), "ultraviolet_C" : noise_generator_factory(FractalNoiseGenerator, alpha=-5.0), "hires-pyramid-bicubic" : noise_generator_factory(HiresPyramidNoiseGenerator, mode='bicubic'), "hires-pyramid-bilinear": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bilinear'), "hires-pyramid-nearest" : noise_generator_factory(HiresPyramidNoiseGenerator, mode='nearest'), "pyramid-bicubic" : noise_generator_factory(PyramidNoiseGenerator, mode='bicubic'), "pyramid-bilinear" : noise_generator_factory(PyramidNoiseGenerator, mode='bilinear'), "pyramid-nearest" : noise_generator_factory(PyramidNoiseGenerator, mode='nearest'), "pyramid-interpolated" : InterpolatedPyramidNoiseGenerator, "pyramid-cascade_B" : CascadeBPyramidNoiseGenerator, } if OPENSIMPLEX_ENABLE: NOISE_GENERATOR_CLASSES.update({ "simplex": SimplexNoiseGenerator, }) NOISE_GENERATOR_NAMES = tuple(NOISE_GENERATOR_CLASSES.keys()) NOISE_GENERATOR_NAMES_SIMPLE = tuple(NOISE_GENERATOR_CLASSES_SIMPLE.keys()) @precision_tool.cast_tensor def prepare_noise(latent_image, seed, noise_type, noise_inds=None, alpha=1.0, k=1.0): # adapted from comfy/sample.py: https://github.com/comfyanonymous/ComfyUI #optional arg skip can be used to skip and discard x number of noise generations for a given seed noise_func = NOISE_GENERATOR_CLASSES.get(noise_type)(x=latent_image, seed=seed, sigma_min=0.0291675, sigma_max=14.614642) # WARNING: HARDCODED SDXL SIGMA RANGE! if noise_type == "fractal": noise_func.alpha = alpha noise_func.k = k # from here until return is very similar to comfy/sample.py if noise_inds is None: return noise_func(sigma=14.614642, sigma_next=0.0291675) unique_inds, inverse = np.unique(noise_inds, return_inverse=True) noises = [] for i in range(unique_inds[-1]+1): noise = noise_func(size = [1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, device=latent_image.device) if i in unique_inds: noises.append(noise) noises = [noises[i] for i in inverse] noises = torch.cat(noises, axis=0) return noises ================================================ FILE: beta/phi_functions.py ================================================ import torch import math from typing import Optional # Remainder solution def _phi(j, neg_h): remainder = torch.zeros_like(neg_h) for k in range(j): remainder += (neg_h)**k / math.factorial(k) phi_j_h = ((neg_h).exp() - remainder) / (neg_h)**j return phi_j_h def calculate_gamma(c2, c3): return (3*(c3**3) - 2*c3) / (c2*(2 - 3*c2)) # Exact analytic solution originally calculated by Clybius. https://github.com/Clybius/ComfyUI-Extra-Samplers/tree/main def _gamma(n: int,) -> int: """ https://en.wikipedia.org/wiki/Gamma_function for every positive integer n, Γ(n) = (n-1)! """ return math.factorial(n-1) def _incomplete_gamma(s: int, x: float, gamma_s: Optional[int] = None) -> float: """ https://en.wikipedia.org/wiki/Incomplete_gamma_function#Special_values if s is a positive integer, Γ(s, x) = (s-1)!*∑{k=0..s-1}(x^k/k!) """ if gamma_s is None: gamma_s = _gamma(s) sum_: float = 0 # {k=0..s-1} inclusive for k in range(s): numerator: float = x**k denom: int = math.factorial(k) quotient: float = numerator/denom sum_ += quotient incomplete_gamma_: float = sum_ * math.exp(-x) * gamma_s return incomplete_gamma_ def phi(j: int, neg_h: float, ): """ For j={1,2,3}: you could alternatively use Kat's phi_1, phi_2, phi_3 which perform fewer steps Lemma 1 https://arxiv.org/abs/2308.02157 ϕj(-h) = 1/h^j*∫{0..h}(e^(τ-h)*(τ^(j-1))/((j-1)!)dτ) https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84 = 1/h^j*[(e^(-h)*(-τ)^(-j)*τ(j))/((j-1)!)]{0..h} https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84+between+0+and+h = 1/h^j*((e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h)))/(j-1)!) = (e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h))/((j-1)!*h^j) = (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/(j-1)! = (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/Γ(j) = (e^(-h)*(-h)^(-j)*(1-Γ(j,-h)/Γ(j)) requires j>0 """ assert j > 0 gamma_: float = _gamma(j) incomp_gamma_: float = _incomplete_gamma(j, neg_h, gamma_s=gamma_) phi_: float = math.exp(neg_h) * neg_h**-j * (1-incomp_gamma_/gamma_) return phi_ from mpmath import mp, mpf, factorial, exp mp.dps = 80 # e.g. 80 decimal digits (~ float256) def phi_mpmath_series(j: int, neg_h: float) -> float: """ Arbitrary‐precision phi_j(-h) via the remainder‐series definition, using mpmath’s mpf and factorial. """ j = int(j) z = mpf(float(neg_h)) S = mp.mpf('0') # S = sum_{k=0..j-1} z^k / k! for k in range(j): S += (z**k) / factorial(k) phi_val = (exp(z) - S) / (z**j) return float(phi_val) class Phi: def __init__(self, h, c, analytic_solution=False): self.h = h self.c = c self.cache = {} if analytic_solution: #self.phi_f = superphi self.phi_f = phi_mpmath_series self.h = mpf(float(h)) self.c = [mpf(c_val) for c_val in c] #self.c = c #self.phi_f = phi else: self.phi_f = phi #self.phi_f = _phi # remainder method def __call__(self, j, i=-1): if (j, i) in self.cache: return self.cache[(j, i)] if i < 0: c = 1 else: c = self.c[i - 1] if c == 0: self.cache[(j, i)] = 0 return 0 if j == 0 and type(c) in {float, torch.Tensor}: result = math.exp(float(-self.h * c)) else: result = self.phi_f(j, -self.h * c) self.cache[(j, i)] = result return result from mpmath import mp, mpf, gamma, gammainc def superphi(j: int, neg_h: float, ): gamma_: float = gamma(j) incomp_gamma_: float = gamma_ - gammainc(j, 0, float(neg_h)) phi_: float = float(math.exp(float(neg_h)) * neg_h**-j) * (1-incomp_gamma_/gamma_) return float(phi_) ================================================ FILE: beta/rk_coefficients_beta.py ================================================ import torch from torch import Tensor import copy import math from mpmath import mp, mpf, factorial, exp mp.dps = 80 from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar from .deis_coefficients import get_deis_coeff_list from .phi_functions import phi, Phi, calculate_gamma from ..helper import ExtraOptions, get_extra_options_kv, extra_options_flag from itertools import permutations, combinations import random from einops import rearrange, einsum from ..res4lyf import get_display_sampler_category # Samplers with free parameters (c1, c2, c3) # 1 2 3 # X res_2s # X X res_3s # X res_3s_alt # X res_3s_strehmel_weiner # X dpmpp_2s (dpmpp_sde_2s has c2=1.0) # X X dpmpp_3s # X X irk_exp_diag_2s RK_EXPONENTIAL_PREFIXES = ( "res", "dpmpp", "ddim", "pec", "etdrk", "lawson", "abnorsett", ) def is_exponential(rk_type:str) -> bool: return rk_type.startswith(RK_EXPONENTIAL_PREFIXES) RK_SAMPLER_NAMES_BETA_FOLDERS = ["none", "multistep/res_2m", "multistep/res_3m", "multistep/dpmpp_2m", "multistep/dpmpp_3m", "multistep/abnorsett_2m", "multistep/abnorsett_3m", "multistep/abnorsett_4m", "multistep/deis_2m", "multistep/deis_3m", "multistep/deis_4m", "exponential/res_2s_rkmk2e", "exponential/res_2s", "exponential/res_2s_stable", "exponential/res_3s", "exponential/res_3s_non-monotonic", "exponential/res_3s_alt", "exponential/res_3s_cox_matthews", "exponential/res_3s_lie", "exponential/res_3s_sunstar", "exponential/res_3s_strehmel_weiner", "exponential/res_4s_krogstad", "exponential/res_4s_krogstad_alt", "exponential/res_4s_strehmel_weiner", "exponential/res_4s_strehmel_weiner_alt", "exponential/res_4s_cox_matthews", "exponential/res_4s_cfree4", "exponential/res_4s_friedli", "exponential/res_4s_minchev", "exponential/res_4s_munthe-kaas", "exponential/res_5s", "exponential/res_5s_hochbruck-ostermann", "exponential/res_6s", "exponential/res_8s", "exponential/res_8s_alt", "exponential/res_10s", "exponential/res_15s", "exponential/res_16s", "exponential/etdrk2_2s", "exponential/etdrk3_a_3s", "exponential/etdrk3_b_3s", "exponential/etdrk4_4s", "exponential/etdrk4_4s_alt", "exponential/dpmpp_2s", "exponential/dpmpp_sde_2s", "exponential/dpmpp_3s", "exponential/lawson2a_2s", "exponential/lawson2b_2s", "exponential/lawson4_4s", "exponential/lawson41-gen_4s", "exponential/lawson41-gen-mod_4s", "exponential/ddim", "hybrid/pec423_2h2s", "hybrid/pec433_2h3s", "hybrid/abnorsett2_1h2s", "hybrid/abnorsett3_2h2s", "hybrid/abnorsett4_3h2s", "hybrid/lawson42-gen-mod_1h4s", "hybrid/lawson43-gen-mod_2h4s", "hybrid/lawson44-gen-mod_3h4s", "hybrid/lawson45-gen-mod_4h4s", "linear/ralston_2s", "linear/ralston_3s", "linear/ralston_4s", "linear/midpoint_2s", "linear/heun_2s", "linear/heun_3s", "linear/houwen-wray_3s", "linear/kutta_3s", "linear/ssprk3_3s", "linear/ssprk4_4s", "linear/rk38_4s", "linear/rk4_4s", "linear/rk5_7s", "linear/rk6_7s", "linear/bogacki-shampine_4s", "linear/bogacki-shampine_7s", "linear/dormand-prince_6s", "linear/dormand-prince_13s", "linear/tsi_7s", #"verner_robust_16s", "linear/euler", "diag_implicit/irk_exp_diag_2s", "diag_implicit/kraaijevanger_spijker_2s", "diag_implicit/qin_zhang_2s", "diag_implicit/pareschi_russo_2s", "diag_implicit/pareschi_russo_alt_2s", "diag_implicit/crouzeix_2s", "diag_implicit/crouzeix_3s", "diag_implicit/crouzeix_3s_alt", "fully_implicit/gauss-legendre_2s", "fully_implicit/gauss-legendre_3s", "fully_implicit/gauss-legendre_4s", "fully_implicit/gauss-legendre_4s_alternating_a", "fully_implicit/gauss-legendre_4s_ascending_a", "fully_implicit/gauss-legendre_4s_alt", "fully_implicit/gauss-legendre_5s", "fully_implicit/gauss-legendre_5s_ascending", #"gauss-legendre_diag_8s", "fully_implicit/radau_ia_2s", "fully_implicit/radau_ia_3s", "fully_implicit/radau_iia_2s", "fully_implicit/radau_iia_3s", "fully_implicit/radau_iia_3s_alt", "fully_implicit/radau_iia_5s", "fully_implicit/radau_iia_7s", "fully_implicit/radau_iia_9s", "fully_implicit/radau_iia_11s", "fully_implicit/lobatto_iiia_2s", "fully_implicit/lobatto_iiia_3s", "fully_implicit/lobatto_iiia_4s", "fully_implicit/lobatto_iiib_2s", "fully_implicit/lobatto_iiib_3s", "fully_implicit/lobatto_iiib_4s", "fully_implicit/lobatto_iiic_2s", "fully_implicit/lobatto_iiic_3s", "fully_implicit/lobatto_iiic_4s", "fully_implicit/lobatto_iiic_star_2s", "fully_implicit/lobatto_iiic_star_3s", "fully_implicit/lobatto_iiid_2s", "fully_implicit/lobatto_iiid_3s", ] RK_SAMPLER_NAMES_BETA_NO_FOLDERS = [] for orig_sampler_name in RK_SAMPLER_NAMES_BETA_FOLDERS[1:]: sampler_name = orig_sampler_name.split("/")[-1] if "/" in orig_sampler_name else orig_sampler_name RK_SAMPLER_NAMES_BETA_NO_FOLDERS.append(sampler_name) IRK_SAMPLER_NAMES_BETA_FOLDERS = ["none", "use_explicit"] for orig_sampler_name in RK_SAMPLER_NAMES_BETA_FOLDERS[1:]: if "implicit" in orig_sampler_name and "/" in orig_sampler_name: IRK_SAMPLER_NAMES_BETA_FOLDERS.append(orig_sampler_name) IRK_SAMPLER_NAMES_BETA_NO_FOLDERS = [] for orig_sampler_name in IRK_SAMPLER_NAMES_BETA_FOLDERS[1:]: sampler_name = orig_sampler_name.split("/")[-1] if "/" in orig_sampler_name else orig_sampler_name IRK_SAMPLER_NAMES_BETA_NO_FOLDERS.append(sampler_name) RK_SAMPLER_FOLDER_MAP = {} for orig_sampler_name in RK_SAMPLER_NAMES_BETA_FOLDERS: if "/" in orig_sampler_name: folder, sampler_name = orig_sampler_name.rsplit("/", 1) else: folder = "" sampler_name = orig_sampler_name RK_SAMPLER_FOLDER_MAP[sampler_name] = folder IRK_SAMPLER_FOLDER_MAP = {} for orig_sampler_name in IRK_SAMPLER_NAMES_BETA_FOLDERS: if "/" in orig_sampler_name: folder, sampler_name = orig_sampler_name.rsplit("/", 1) else: folder = "" sampler_name = orig_sampler_name IRK_SAMPLER_FOLDER_MAP[sampler_name] = folder class DualFormatList(list): """list that can match items with or without category prefixes.""" def __contains__(self, item): if super().__contains__(item): return True if isinstance(item, str) and "/" in item: base_name = item.split("/")[-1] return any(name.endswith(base_name) for name in self) return any(isinstance(opt, str) and opt.endswith("/" + item) for opt in self) def get_sampler_name_list(nameOnly = False) -> list: sampler_name_list = [] for sampler_name in RK_SAMPLER_FOLDER_MAP: if get_display_sampler_category() and not nameOnly: folder_name = RK_SAMPLER_FOLDER_MAP[sampler_name] full_sampler_name = f"{folder_name}/{sampler_name}" else: full_sampler_name = sampler_name if full_sampler_name[0] == "/": full_sampler_name = full_sampler_name[1:] sampler_name_list.append(full_sampler_name) return DualFormatList(sampler_name_list) def get_default_sampler_name(nameOnly = False) -> str: default_sampler_name = "res_2m" #find the key associated with the default value for sampler_name in RK_SAMPLER_FOLDER_MAP: if sampler_name == default_sampler_name: if get_display_sampler_category() and not nameOnly: folder_name = RK_SAMPLER_FOLDER_MAP[sampler_name] return f"{folder_name}/{default_sampler_name}" else: return default_sampler_name return default_sampler_name def get_implicit_sampler_name_list(nameOnly = False) -> list: implicit_sampler_name_list = [] for sampler_name in IRK_SAMPLER_FOLDER_MAP: if get_display_sampler_category() and not nameOnly: folder_name = IRK_SAMPLER_FOLDER_MAP[sampler_name] full_sampler_name = f"{folder_name}/{sampler_name}" else: full_sampler_name = sampler_name if full_sampler_name[0] == "/": full_sampler_name = full_sampler_name[1:] implicit_sampler_name_list.append(full_sampler_name) return DualFormatList(implicit_sampler_name_list) def get_default_implicit_sampler_name(nameOnly = False) -> str: default_sampler_value = "explicit_diagonal" #find the key associated with the default value for sampler_name in IRK_SAMPLER_FOLDER_MAP: if sampler_name == default_sampler_value: if get_display_sampler_category() and not nameOnly: folder_name = IRK_SAMPLER_FOLDER_MAP[sampler_name] return f"{folder_name}/{default_sampler_value}" else: return default_sampler_value return default_sampler_value def get_full_sampler_name(sampler_name_in: str) -> str: if "/" in sampler_name_in and sampler_name_in[0] != "/": return sampler_name_in for sampler_name in RK_SAMPLER_FOLDER_MAP: if sampler_name == sampler_name_in: folder_name = RK_SAMPLER_FOLDER_MAP[sampler_name] return f"{folder_name}/{sampler_name}" return sampler_name def process_sampler_name(sampler_name_in): processed_name = sampler_name_in.split("/")[-1] if "/" in sampler_name_in else sampler_name_in full_sampler_name = get_full_sampler_name(sampler_name_in) if sampler_name_in.startswith("fully_implicit") or sampler_name_in.startswith("diag_implicit"): implicit_sampler_name = processed_name sampler_name = "euler" else: sampler_name = processed_name implicit_sampler_name = "use_explicit" return sampler_name, implicit_sampler_name alpha_crouzeix = (2/(3**0.5)) * math.cos(math.pi / 18) gamma_crouzeix = (1/(3**0.5)) * math.cos(math.pi / 18) + 1/2 # Crouzeix & Raviart 1980; A-stable; pg 100 in Solving Ordinary Differential Equations II delta_crouzeix = 1 / (6 * (2 * gamma_crouzeix - 1)**2) # Crouzeix & Raviart 1980; A-stable; pg 100 in Solving Ordinary Differential Equations II rk_coeff = { "gauss-legendre_diag_8s": ( # https://github.com/SciML/IRKGaussLegendre.jl/blob/master/src/IRKCoefficients.jl Antoñana, M., Makazaga, J., Murua, Ander. "Reducing and monitoring round-off error propagation for symplectic implicit Runge-Kutta schemes." Numerical Algorithms. 2017. [ [ 0.5, 0,0,0,0,0,0,0, ], [ 1.0818949631055814971365081647359309e00, 0.5, 0,0,0,0,0,0, ], [ 9.5995729622205494766003095439844678e-01, 1.0869589243008327233290709646162480e00, 0.5, 0,0,0,0,0, ], [ 1.0247213458032003748680445816450829e00, 9.5505887369737431186016905653386876e-01, 1.0880938387323083134422138713913203e00, 0.5, 0,0,0,0, ], [ 9.8302382676362890697311829123888390e-01, 1.0287597754747493109782305570410685e00, 9.5383453518519996588326911440754302e-01, 1.0883471611098277842507073806008045e00, 0.5, 0,0,0, ], [ 1.0122259141132982060539425317219435e00, 9.7998287236359129082628958290257329e-01, 1.0296038730649779374630125982121223e00, 9.5383453518519996588326911440754302e-01, 1.0880938387323083134422138713913203e00, 0.5, 0,0, ], [ 9.9125143323080263118822334698608777e-01, 1.0140743558891669291459735166525994e00, 9.7998287236359129082628958290257329e-01, 1.0287597754747493109782305570410685e00, 9.5505887369737431186016905653386876e-01, 1.0869589243008327233290709646162480e00, 0.5, 0, ], [ 1.0054828082532158826793409353214951e00, 9.9125143323080263118822334698608777e-01, 1.0122259141132982060539425317219435e00, 9.8302382676362890697311829123888390e-01, 1.0247213458032003748680445816450829e00, 9.5995729622205494766003095439844678e-01, 1.0818949631055814971365081647359309e00, 0.5, ], ], [ [ 5.0614268145188129576265677154981094e-02, 1.1119051722668723527217799721312045e-01, 1.5685332293894364366898110099330067e-01, 1.8134189168918099148257522463859781e-01, 1.8134189168918099148257522463859781e-01, 1.5685332293894364366898110099330067e-01, 1.1119051722668723527217799721312045e-01, 5.0614268145188129576265677154981094e-02,] ], [ 1.9855071751231884158219565715263505e-02, # 0.019855071751231884158219565715263505 1.0166676129318663020422303176208480e-01, 2.3723379504183550709113047540537686e-01, 4.0828267875217509753026192881990801e-01, 5.9171732124782490246973807118009203e-01, 7.6276620495816449290886952459462321e-01, 8.9833323870681336979577696823791522e-01, 9.8014492824876811584178043428473653e-01, ] ), "gauss-legendre_5s": ( [ [4563950663 / 32115191526, (310937500000000 / 2597974476091533 + 45156250000 * (739**0.5) / 8747388808389), (310937500000000 / 2597974476091533 - 45156250000 * (739**0.5) / 8747388808389), (5236016175 / 88357462711 + 709703235 * (739**0.5) / 353429850844), (5236016175 / 88357462711 - 709703235 * (739**0.5) / 353429850844)], [(4563950663 / 32115191526 - 38339103 * (739**0.5) / 6250000000), (310937500000000 / 2597974476091533 + 9557056475401 * (739**0.5) / 3498955523355600000), (310937500000000 / 2597974476091533 - 14074198220719489 * (739**0.5) / 3498955523355600000), (5236016175 / 88357462711 + 5601362553163918341 * (739**0.5) / 2208936567775000000000), (5236016175 / 88357462711 - 5040458465159165409 * (739**0.5) / 2208936567775000000000)], [(4563950663 / 32115191526 + 38339103 * (739**0.5) / 6250000000), (310937500000000 / 2597974476091533 + 14074198220719489 * (739**0.5) / 3498955523355600000), (310937500000000 / 2597974476091533 - 9557056475401 * (739**0.5) / 3498955523355600000), (5236016175 / 88357462711 + 5040458465159165409 * (739**0.5) / 2208936567775000000000), (5236016175 / 88357462711 - 5601362553163918341 * (739**0.5) / 2208936567775000000000)], [(4563950663 / 32115191526 - 38209 * (739**0.5) / 7938810), (310937500000000 / 2597974476091533 - 359369071093750 * (739**0.5) / 70145310854471391), (310937500000000 / 2597974476091533 - 323282178906250 * (739**0.5) / 70145310854471391), (5236016175 / 88357462711 - 470139 * (739**0.5) / 1413719403376), (5236016175 / 88357462711 - 44986764863 * (739**0.5) / 21205791050640)], [(4563950663 / 32115191526 + 38209 * (739**0.5) / 7938810), (310937500000000 / 2597974476091533 + 359369071093750 * (739**0.5) / 70145310854471391), (310937500000000 / 2597974476091533 + 323282178906250 * (739**0.5) / 70145310854471391), (5236016175 / 88357462711 + 44986764863 * (739**0.5) / 21205791050640), (5236016175 / 88357462711 + 470139 * (739**0.5) / 1413719403376)], ], [ [ 4563950663 / 16057595763, 621875000000000 / 2597974476091533, 621875000000000 / 2597974476091533, 10472032350 / 88357462711, 10472032350 / 88357462711] ], [ 1 / 2, 1 / 2 - 99 * (739**0.5) / 10000, # smallest # 0.06941899716778028758987101075583196 1 / 2 + 99 * (739**0.5) / 10000, # largest 1 / 2 - (739**0.5) / 60, 1 / 2 + (739**0.5) / 60 ] ), "gauss-legendre_5s_ascending": ( [ [(4563950663 / 32115191526 - 38339103 * (739**0.5) / 6250000000), (310937500000000 / 2597974476091533 + 9557056475401 * (739**0.5) / 3498955523355600000), (310937500000000 / 2597974476091533 - 14074198220719489 * (739**0.5) / 3498955523355600000), (5236016175 / 88357462711 + 5601362553163918341 * (739**0.5) / 2208936567775000000000), (5236016175 / 88357462711 - 5040458465159165409 * (739**0.5) / 2208936567775000000000)], [(4563950663 / 32115191526 - 38209 * (739**0.5) / 7938810), (310937500000000 / 2597974476091533 - 359369071093750 * (739**0.5) / 70145310854471391), (310937500000000 / 2597974476091533 - 323282178906250 * (739**0.5) / 70145310854471391), (5236016175 / 88357462711 - 470139 * (739**0.5) / 1413719403376), (5236016175 / 88357462711 - 44986764863 * (739**0.5) / 21205791050640)], [4563950663 / 32115191526, (310937500000000 / 2597974476091533 + 45156250000 * (739**0.5) / 8747388808389), (310937500000000 / 2597974476091533 - 45156250000 * (739**0.5) / 8747388808389), (5236016175 / 88357462711 + 709703235 * (739**0.5) / 353429850844), (5236016175 / 88357462711 - 709703235 * (739**0.5) / 353429850844)], [(4563950663 / 32115191526 + 38209 * (739**0.5) / 7938810), (310937500000000 / 2597974476091533 + 359369071093750 * (739**0.5) / 70145310854471391), (310937500000000 / 2597974476091533 + 323282178906250 * (739**0.5) / 70145310854471391), (5236016175 / 88357462711 + 44986764863 * (739**0.5) / 21205791050640), (5236016175 / 88357462711 + 470139 * (739**0.5) / 1413719403376)], [(4563950663 / 32115191526 + 38339103 * (739**0.5) / 6250000000), (310937500000000 / 2597974476091533 + 14074198220719489 * (739**0.5) / 3498955523355600000), (310937500000000 / 2597974476091533 - 9557056475401 * (739**0.5) / 3498955523355600000), (5236016175 / 88357462711 + 5040458465159165409 * (739**0.5) / 2208936567775000000000), (5236016175 / 88357462711 - 5601362553163918341 * (739**0.5) / 2208936567775000000000)], ], [ [621875000000000 / 2597974476091533, 10472032350 / 88357462711, 4563950663 / 16057595763, 10472032350 / 88357462711, 621875000000000 / 2597974476091533,] ], [ 1 / 2 - 99 * (739**0.5) / 10000, # smallest # 0.06941899716778028758987101075583196 1 / 2 - (739**0.5) / 60, 1 / 2, 1 / 2 + (739**0.5) / 60, 1 / 2 + 99 * (739**0.5) / 10000, # largest ] ), "gauss-legendre_4s_alt": ( # https://ijstre.com/Publish/072016/371428231.pdf Four Point Gauss Quadrature Runge – Kuta Method Of Order 8 For Ordinary Differential Equations [ [1633/18780 - 71*206**0.5/96717000, 134689/939000 - 927*206**0.5/78250, 171511/939000 - 927*206**0.5/78250, 1633/18780 - 121979*206**0.5/19343400,], [7623/78250 - 1629507*206**0.5/257912000, 347013/21284000, -118701/4256800, 7623/78250 + 1629507*206**0.5/257912000,], [8978/117375 + 1629507*206**0.5/257912000, 4520423/12770400, 10410661/63852000, 8978/117375 + 1629507*206**0.5/257912000,], [1633/18780 + 121979*206**0.5/19343400, 134689/939000 + 927*206**0.5/78250, 171511/939000 + 927*206**0.5/78250, 1633/18780 + 71*206**0.5/96717000,], ], [ [1633/9390, 1531/4695, 1531/4695, 1633/9390,] ], [ 1/2 - 3*206**0.5 / 100, # 0.06941899716778028758987101075583196 33/100, 67/100, 1/2 + 3*206**0.5 / 100, ] ), "gauss-legendre_4s": ( [ [1/4, 1/4 - 15**0.5 / 6, 1/4 + 15**0.5 / 6, 1/4], [1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6, 1/4], [1/4, 1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6], [1/4 - 15**0.5 / 6, 1/4, 1/4 + 15**0.5 / 6, 1/4], ], [ [ 1/8, 3/8, 3/8, 1/8,] ], [ 1/2 - 15**0.5 / 10, # 0.11270166537925831148207346002176004 1/2 + 15**0.5 / 10, 1/2 + 15**0.5 / 10, 1/2 - 15**0.5 / 10 ] ), "gauss-legendre_4s_alternating_a": ( [ [1/4, 1/4 - 15**0.5 / 6, 1/4 + 15**0.5 / 6, 1/4], [1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6, 1/4], [1/4 - 15**0.5 / 6, 1/4, 1/4 + 15**0.5 / 6, 1/4], [1/4, 1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6], ], [ [ 1/8, 3/8, 1/8, 3/8,] ], [ 1/2 - 15**0.5 / 10, # 0.11270166537925831148207346002176004 1/2 + 15**0.5 / 10, 1/2 - 15**0.5 / 10, 1/2 + 15**0.5 / 10, ] ), "gauss-legendre_4s_ascending_a": ( [ [1/4 - 15**0.5 / 6, 1/4, 1/4 + 15**0.5 / 6, 1/4], [1/4, 1/4 - 15**0.5 / 6, 1/4 + 15**0.5 / 6, 1/4], [1/4, 1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6], [1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6, 1/4], ], [ [ 1/8, 3/8, 1/8, 3/8,] ], [ 1/2 - 15**0.5 / 10, 1/2 - 15**0.5 / 10, 1/2 + 15**0.5 / 10, 1/2 + 15**0.5 / 10, ] ), "gauss-legendre_3s": ( # Kunzmann-Butcher, IRK, order 6 https://www.math.umd.edu/~mariakc/SymplecticMethods.pdf [ [5/36, 2/9 - 15**0.5 / 15, 5/36 - 15**0.5 / 30], [5/36 + 15**0.5 / 24, 2/9, 5/36 - 15**0.5 / 24], [5/36 + 15**0.5 / 30, 2/9 + 15**0.5 / 15, 5/36], ], [ [5/18, 4/9, 5/18] ], [1/2 - 15**0.5 / 10, 1/2, 1/2 + 15**0.5 / 10] # 0.11270166537925831148207346002176004 ), "gauss-legendre_2s": ( # Hammer-Hollingsworth, IRK, order 4 https://www.math.umd.edu/~mariakc/SymplecticMethods.pdf [ [1/4, 1/4 - 3**0.5 / 6], [1/4 + 3**0.5 / 6, 1/4], ], [ [1/2, 1/2], ], [1/2 - 3**0.5 / 6, 1/2 + 3**0.5 / 6] # 0.21132486540518711774542560974902127 # 1/2 - (1/2)*(1/3**0.5) 1/2 + (1/2)*(1/3**0.5) ), "radau_iia_4s": ( [ [], [], [], [], ], [ [1/4, 1/4, 1/4, 1/4], ], [(1/11)*(4-6**0.5), (1/11)*(4+6**0.5), 1/2, 1] ), "radau_iia_11s": ( # https://github.com/ryanelandt/Radau.jl [ [0.015280520789530369, -0.0057824996781311875, 0.00438010324638053, -0.0036210375473319026, 0.003092977042211754, -0.0026728314041491816, 0.0023050911672361017, -0.001955651803123845, 0.001593873849612843, -0.0011728625554916522, 0.00046993032567176855], [0.03288397668119629, 0.03451351173940448, -0.009285420023734383, 0.00641324617083941, -0.005095455838865143, 0.0042460913690415955, -0.0035876743372353984, 0.003006834900018004, -0.0024326697483255453, 0.0017827773828584467, -0.0007131464180496306], [0.029332502147155125, 0.0741624250777296, 0.0511486756872502, -0.012005023334430185, 0.00777794727524923, -0.005944695307870806, 0.004802655736401176, -0.003923600687657003, 0.003127328539609814, -0.0022731432208609507, 0.0009063777304940358], [0.03111455337650569, 0.06578995121943092, 0.10929962691877611, 0.06381051663919307, -0.013853591907177828, 0.008557435524870741, -0.0063076358492939275, 0.004913357548166058, -0.0038139969541068734, 0.0027334306074068546, -0.0010839711153145738], [0.03005269275666326, 0.07011284530154153, 0.09714692306747527, 0.1353916024839275, 0.07147107644479529, -0.014710238851905252, 0.008733191499420551, -0.00619941303527863, 0.004591640852897801, -0.003213330884490774, 0.001262857250740274], [0.030728073929609766, 0.06751925856657341, 0.10334060375222286, 0.12083525997663601, 0.1503267876654705, 0.07350931976920085, -0.014512880052768446, 0.008296645645701008, -0.0056128275038367864, 0.003766229774466616, -0.001457705807615146], [0.030292022376401242, 0.06914472100762357, 0.09972096441656238, 0.12801064060853223, 0.13493180383303127, 0.15289670039157693, 0.06975993047996924, -0.013274545709987746, 0.007258767272883859, -0.0044843888202694155, 0.0016878458203415244], [0.03056654381836576, 0.06813851028407998, 0.10188107030389015, 0.12403361149690655, 0.14211431622263265, 0.13829395377418516, 0.14289135336320447, 0.06052636121446275, -0.011077739682117822, 0.005598667203856668, -0.0019877269625674446], [0.030406629901865028, 0.06871880785022819, 0.10066095698900927, 0.12619527453091425, 0.13848875677027936, 0.14450773783254642, 0.13065188915037962, 0.1211140113707743, 0.046555483263607714, -0.008026200095719123, 0.002437640226261747], [0.030484119381553945, 0.06843924691254653, 0.10124184869598654, 0.1251873187759311, 0.14011843430039864, 0.14190386755377057, 0.13500342651951197, 0.11262869537051934, 0.08930604389562254, 0.028969664972192485, -0.0033116985395201413], [0.03046254890606557, 0.06851684106660112, 0.10108155427001221, 0.1254626888485642, 0.13968066655169153, 0.14258278197050367, 0.1339335430948421, 0.11443306192448831, 0.08565880960332992, 0.04992304095398403, 0.008264462809917356], ], [ [0.03046254890606557, 0.06851684106660112, 0.10108155427001221, 0.1254626888485642, 0.13968066655169153, 0.14258278197050367, 0.1339335430948421, 0.11443306192448831, 0.08565880960332992, 0.04992304095398403, 0.008264462809917356], ], [0.011917613432415597, 0.061732071877148124, 0.14711144964307024, 0.26115967600845624, 0.39463984688578685, 0.5367387657156606, 0.6759444616766651, 0.8009789210368988, 0.9017109877901468, 0.9699709678385136, 1.0] ), "radau_iia_9s": ( # https://github.com/ryanelandt/Radau.jl [ [0.022788378793458776, -0.008589639752938945, 0.0064510291769951465, -0.00525752869975012, 0.004388833809361376, -0.0036512155536904674, 0.0029404882137526148, -0.002149274163882554, 0.0008588433240576261], [0.04890795244749932, 0.05070205048082808, -0.013523807196021316, 0.009209373774305071, -0.0071557133175369604, 0.005747246699432309, -0.004542582976394536, 0.003288161681791406, -0.0013090736941094112], [0.04374276009157137, 0.10830189290274023, 0.07291956593742897, -0.016879877210016055, 0.010704551844802781, -0.007901946479238777, 0.005991406942179993, -0.0042480244399873135, 0.0016781498061495626], [0.04624923745394712, 0.09656073072680009, 0.1542987697900386, 0.0867193693031384, -0.018451639643617873, 0.011036658729835513, -0.007673280940281649, 0.005228224999889903, -0.00203590583647778], [0.044834436586910234, 0.10230684968594175, 0.13821763419236816, 0.18126393468214014, 0.09043360059943564, -0.018085063366782478, 0.010193387903855565, -0.006405265418866323, 0.0024271699384239612], [0.045658755719323395, 0.09914547048938806, 0.14574704049699233, 0.16364828123387398, 0.18594458734451902, 0.08361326023153276, -0.015809936146309538, 0.00813825269404473, -0.002910469207795258], [0.045200600187797244, 0.10085370671832047, 0.1419422367945749, 0.17118947183876332, 0.1697833861700019, 0.16776829117327952, 0.06707903432249304, -0.011792230536025322, 0.0036092462886493657], [0.045416516657427734, 0.10006040244594375, 0.143652840987038, 0.16801908098069296, 0.17556076841841367, 0.15588627045003361, 0.12889391351650395, 0.04281082602522101, -0.004934574771244536], [0.04535725246164146, 0.10027664901227598, 0.1431933481786156, 0.16884698348796479, 0.1741365013864833, 0.158421887835219, 0.12359468910229653, 0.0738270095231577, 0.012345679012345678], ], [ [0.04535725246164146, 0.10027664901227598, 0.1431933481786156, 0.16884698348796479, 0.1741365013864833, 0.158421887835219, 0.12359468910229653, 0.0738270095231577, 0.012345679012345678], ], [0.01777991514736345, 0.09132360789979396, 0.21430847939563075, 0.37193216458327233, 0.5451866848034267, 0.7131752428555694, 0.8556337429578544, 0.9553660447100302, 1.0] ), "radau_iia_7s": ( # https://github.com/ryanelandt/Radau.jl [ [0.03754626499392133, -0.0140393345564604, 0.0103527896007423, -0.008158322540275011, 0.006388413879534685, -0.004602326779148656, 0.0018289425614706437], [0.08014759651561897, 0.08106206398589154, -0.021237992120711036, 0.014000291238817119, -0.010234185730090163, 0.0071534651513645905, -0.0028126393724067235], [0.0720638469418819, 0.17106835498388662, 0.10961456404007211, -0.024619871728984055, 0.014760377043950817, -0.009575259396791401, 0.0036726783971383057], [0.07570512581982441, 0.15409015514217114, 0.2271077366732024, 0.11747818703702478, -0.023810827153044174, 0.012709985533661206, -0.004608844281289633], [0.07391234216319184, 0.16135560761594242, 0.2068672415521042, 0.23700711534269422, 0.10308679353381345, -0.018854139152580447, 0.0058589009748887914], [0.07470556205979623, 0.1583072238724687, 0.21415342326720002, 0.21987784703186003, 0.19875212168063527, 0.06926550160550914, -0.00811600819772829], [0.07449423555601031, 0.15910211573365074, 0.21235188950297781, 0.22355491450728324, 0.19047493682211558, 0.1196137446126562, 0.02040816326530612], ], [ [0.07449423555601031, 0.15910211573365074, 0.21235188950297781, 0.22355491450728324, 0.19047493682211558, 0.1196137446126562, 0.02040816326530612], ], [0.029316427159784893, 0.1480785996684843, 0.3369846902811543, 0.5586715187715501, 0.7692338620300545, 0.9269456713197411, 1.0] ), "radau_iia_5s": ( # https://github.com/ryanelandt/Radau.jl [ [0.07299886431790333, -0.02673533110794557, 0.018676929763984353, -0.01287910609330644, 0.005042839233882015], [0.15377523147918246, 0.14621486784749352, -0.03644456890512809, 0.02123306311930472, -0.007935579902728777], [0.14006304568480987, 0.29896712949128346, 0.16758507013524895, -0.03396910168661774, 0.010944288744192253], [0.14489430810953477, 0.2765000687601592, 0.32579792291042103, 0.12875675325490976, -0.015708917378805327], [0.14371356079122594, 0.28135601514946207, 0.31182652297574126, 0.22310390108357075, 0.04], ], [ [0.14371356079122594, 0.28135601514946207, 0.31182652297574126, 0.22310390108357075, 0.04], ], [0.05710419611451768, 0.2768430136381238, 0.5835904323689168, 0.8602401356562195, 1.0] ), "radau_iia_3s": ( [ [11/45 - 7*6**0.5 / 360, 37/225 - 169*6**0.5 / 1800, -2/225 + 6**0.5 / 75], [37/225 + 169*6**0.5 / 1800, 11/45 + 7*6**0.5 / 360, -2/225 - 6**0.5 / 75], [4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9], ], [ [4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9], ], [2/5 - 6**0.5 / 10, 2/5 + 6**0.5 / 10, 1.] ), "radau_iia_3s_alt": ( # https://www.unige.ch/~hairer/preprints/coimbra.pdf (page 7) Ehle [Eh69] and Axelsson [Ax69] [ [(88 - 7*6**0.5) / 360, (296 - 169*6**0.5) / 1800, (-2 + 3 * 6**0.5) / 225], [(296 + 169*6**0.5) / 1800, (88 + 7*6**0.5) / 360, (-2 - 3*6**0.5) / 225], [(16 - 6**0.5) / 36, (16 + 6**0.5) / 36, 1/9], ], [ [ (16 - 6**0.5) / 36, (16 + 6**0.5) / 36, 1/9], ], [ (4 - 6**0.5) / 10, (4 + 6**0.5) / 10, 1.] ), "radau_iia_2s": ( [ [5/12, -1/12], [3/4, 1/4], ], [ [3/4, 1/4], ], [1/3, 1] ), "radau_ia_3s": ( [ [1/9, (-1-6**0.5)/18, (-1+6**0.5)/18], [1/9, 11/45 + 7*6**0.5/360, 11/45-43*6**0.5/360], [1/9, 11/45-43*6**0.5/360, 11/45 + 7*6**0.5/360], ], [ [1/9, 4/9 + 6**0.5/36, 4/9 - 6**0.5/36], ], [0, 3/5-6**0.5/10, 3/5+6**0.5/10] ), "radau_ia_2s": ( [ [1/4, -1/4], [1/4, 5/12], ], [ [1/4, 3/4], ], [0, 2/3] ), "lobatto_iiia_4s": ( #6th order [ [0, 0, 0, 0], [(11+5**0.5)/120, (25-5**0.5)/120, (25-13*5**0.5)/120, (-1+5**0.5)/120], [(11-5**0.5)/120, (25+13*5**0.5)/120, (25+5**0.5)/120, (-1-5**0.5)/120], [1/12, 5/12, 5/12, 1/12], ], [ [1/12, 5/12, 5/12, 1/12], ], [0, (5-5**0.5)/10, (5+5**0.5)/10, 1] ), "lobatto_iiib_4s": ( #6th order [ [1/12, (-1-5**0.5)/24, (-1+5**0.5)/24, 0], [1/12, (25+5**0.5)/120, (25-13*5**0.5)/120, 0], [1/12, (25+13*5**0.5)/120, (25-5**0.5)/120, 0], [1/12, (11-5**0.5)/24, (11+5**0.5)/24, 0], ], [ [1/12, 5/12, 5/12, 1/12], ], [0, (5-5**0.5)/10, (5+5**0.5)/10, 1] ), "lobatto_iiic_4s": ( #6th order [ [1/12, (-5**0.5)/12, (5**0.5)/12, -1/12], [1/12, 1/4, (10-7*5**0.5)/60, (5**0.5)/60], [1/12, (10+7*5**0.5)/60, 1/4, (-5**0.5)/60], [1/12, 5/12, 5/12, 1/12], ], [ [1/12, 5/12, 5/12, 1/12], ], [0, (5-5**0.5)/10, (5+5**0.5)/10, 1] ), "lobatto_iiia_3s": ( [ [0, 0, 0], [5/24, 1/3, -1/24], [1/6, 2/3, 1/6], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1] ), "lobatto_iiia_2s": ( [ [0, 0], [1/2, 1/2], ], [ [1/2, 1/2], ], [0, 1] ), "lobatto_iiib_3s": ( [ [1/6, -1/6, 0], [1/6, 1/3, 0], [1/6, 5/6, 0], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1] ), "lobatto_iiib_2s": ( [ [1/2, 0], [1/2, 0], ], [ [1/2, 1/2], ], [0, 1] ), "lobatto_iiic_3s": ( [ [1/6, -1/3, 1/6], [1/6, 5/12, -1/12], [1/6, 2/3, 1/6], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1] ), "lobatto_iiic_2s": ( [ [1/2, -1/2], [1/2, 1/2], ], [ [1/2, 1/2], ], [0, 1] ), "lobatto_iiic_star_3s": ( [ [0, 0, 0], [1/4, 1/4, 0], [0, 1, 0], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1] ), "lobatto_iiic_star_2s": ( [ [0, 0], [1, 0], ], [ [1/2, 1/2], ], [0, 1] ), "lobatto_iiid_3s": ( [ [1/6, 0, -1/6], [1/12, 5/12, 0], [1/2, 1/3, 1/6], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1] ), "lobatto_iiid_2s": ( [ [1/2, 1/2], [-1/2, 1/2], ], [ [1/2, 1/2], ], [0, 1] ), "kraaijevanger_spijker_2s": ( #overshoots step [ [1/2, 0], [-1/2, 2], ], [ [-1/2, 3/2], ], [1/2, 3/2] ), "qin_zhang_2s": ( [ [1/4, 0], [1/2, 1/4], ], [ [1/2, 1/2], ], [1/4, 3/4] ), "pareschi_russo_2s": ( [ [(1-2**0.5/2), 0], [1-2*(1-2**0.5/2), (1-2**0.5/2)], ], [ [1/2, 1/2], ], [(1-2**0.5/2), 1-(1-2**0.5/2)] ), "pareschi_russo_alt_2s": ( [ [(1-2**0.5/2), 0], [1-(1-2**0.5/2), (1-2**0.5/2)], ], [ [1-(1-2**0.5/2), (1-2**0.5/2)], ], [(1-2**0.5/2), 1] ), "crouzeix_3s_alt": ( # Crouzeix & Raviart 1980; A-stable; pg 100 in Solving Ordinary Differential Equations II [ [gamma_crouzeix, 0, 0], [1/2 - gamma_crouzeix, gamma_crouzeix, 0], [2*gamma_crouzeix, 1-4*gamma_crouzeix, gamma_crouzeix], ], [ [delta_crouzeix, 1-2*delta_crouzeix, delta_crouzeix], ], [gamma_crouzeix, 1/2, 1-gamma_crouzeix], ), "crouzeix_3s": ( [ [(1+alpha_crouzeix)/2, 0, 0], [-alpha_crouzeix/2, (1+alpha_crouzeix)/2, 0], [1+alpha_crouzeix, -(1+2*alpha_crouzeix), (1+alpha_crouzeix)/2], ], [ [1/(6*alpha_crouzeix**2), 1-(1/(3*alpha_crouzeix**2)), 1/(6*alpha_crouzeix**2)], ], [(1+alpha_crouzeix)/2, 1/2, (1-alpha_crouzeix)/2], ), "crouzeix_2s": ( [ [1/2 + 3**0.5 / 6, 0], [-(3**0.5 / 3), 1/2 + 3**0.5 / 6] ], [ [1/2, 1/2], ], [1/2 + 3**0.5 / 6, 1/2 - 3**0.5 / 6], ), "verner_13s": ( #verner9. some values are missing, need to revise [ [], ], [ [], ], [ 0.03462, 0.09702435063878045, 0.14553652595817068, 0.561, 0.22900791159048503, 0.544992088409515, 0.645, 0.48375, 0.06757, 0.25, 0.6590650618730999, 0.8206, 0.9012, ] ), "verner_robust_16s": ( [ [], [0.04], [-0.01988527319182291, 0.11637263332969652], [0.0361827600517026, 0, 0.10854828015510781], [2.272114264290177, 0, -8.526886447976398, 6.830772183686221], [0.050943855353893744, 0, 0, 0.1755865049809071, 0.007022961270757467], [0.1424783668683285, 0, 0, -0.3541799434668684, 0.07595315450295101, 0.6765157656337123], [0.07111111111111111, 0, 0, 0, 0, 0.3279909287605898, 0.24089796012829906], [0.07125, 0, 0, 0, 0, 0.32688424515752457, 0.11561575484247544, -0.03375], [0.0482267732246581, 0, 0, 0, 0, 0.039485599804954, 0.10588511619346581, -0.021520063204743093, -0.10453742601833482], [-0.026091134357549235, 0, 0, 0, 0, 0.03333333333333333, -0.1652504006638105, 0.03434664118368617, 0.1595758283215209, 0.21408573218281934], [-0.03628423396255658, 0, 0, 0, 0, -1.0961675974272087, 0.1826035504321331, 0.07082254444170683, -0.02313647018482431, 0.2711204726320933, 1.3081337494229808], [-0.5074635056416975, 0, 0, 0, 0, -6.631342198657237, -0.2527480100908801, -0.49526123800360955, 0.2932525545253887, 1.440108693768281, 6.237934498647056, 0.7270192054526988], [0.6130118256955932, 0, 0, 0, 0, 9.088803891640463, -0.40737881562934486, 1.7907333894903747, 0.714927166761755, -1.4385808578417227, -8.26332931206474, -1.537570570808865, 0.34538328275648716], [-1.2116979103438739, 0, 0, 0, 0, -19.055818715595954, 1.263060675389875, -6.913916969178458, -0.6764622665094981, 3.367860445026608, 18.00675164312591, 6.83882892679428, -1.0315164519219504, 0.4129106232130623], [2.1573890074940536, 0, 0, 0, 0, 23.807122198095804, 0.8862779249216555, 13.139130397598764, -2.604415709287715, -5.193859949783872, -20.412340711541507, -12.300856252505723, 1.5215530950085394], ], [ 0.014588852784055396, 0, 0, 0, 0, 0, 0, 0.0020241978878893325, 0.21780470845697167, 0.12748953408543898, 0.2244617745463132, 0.1787254491259903, 0.07594344758096556, 0.12948458791975614, 0.029477447612619417, 0 ], [ 0, 0.04, 0.09648736013787361, 0.1447310402068104, 0.576, 0.2272326564618766, 0.5407673435381234, 0.64, 0.48, 0.06754, 0.25, 0.6770920153543243, 0.8115, 0.906, 1, 1 ], ), "dormand-prince_13s": ( #non-monotonic [ [], [1/18], [1/48, 1/16], [1/32, 0, 3/32], [5/16, 0, -75/64, 75/64], [3/80, 0, 0, 3/16, 3/20], [29443841/614563906, 0, 0, 77736538/692538347, -28693883/1125000000, 23124283/1800000000], [16016141/946692911, 0, 0, 61564180/158732637, 22789713/633445777, 545815736/2771057229, -180193667/1043307555], [39632708/573591083, 0, 0, -433636366/683701615, -421739975/2616292301, 100302831/723423059, 790204164/839813087, 800635310/3783071287], [246121993/1340847787, 0, 0, -37695042795/15268766246, -309121744/1061227803, -12992083/490766935, 6005943493/2108947869, 393006217/1396673457, 123872331/1001029789], [-1028468189/846180014, 0, 0, 8478235783/508512852, 1311729495/1432422823, -10304129995/1701304382, -48777925059/3047939560, 15336726248/1032824649, -45442868181/3398467696, 3065993473/597172653], [185892177/718116043, 0, 0, -3185094517/667107341, -477755414/1098053517, -703635378/230739211, 5731566787/1027545527, 5232866602/850066563, -4093664535/808688257, 3962137247/1805957418, 65686358/487910083], [403863854/491063109, 0, 0, -5068492393/434740067, -411421997/543043805, 652783627/914296604, 11173962825/925320556, -13158990841/6184727034, 3936647629/1978049680, -160528059/685178525, 248638103/1413531060], ], [ [14005451/335480064, 0, 0, 0, 0, -59238493/1068277825, 181606767/758867731, 561292985/797845732, -1041891430/1371343529, 760417239/1151165299, 118820643/751138087, -528747749/2220607170, 1/4], ], [0, 1/18, 1/12, 1/8, 5/16, 3/8, 59/400, 93/200, 5490023248 / 9719169821, 13/20, 1201146811 / 1299019798, 1, 1], ), "dormand-prince_6s": ( [ [], [1/5], [3/40, 9/40], [44/45, -56/15, 32/9], [19372/6561, -25360/2187, 64448/6561, -212/729], [9017/3168, -355/33, 46732/5247, 49/176, -5103/18656], ], [ [35/384, 0, 500/1113, 125/192, -2187/6784, 11/84], ], [0, 1/5, 3/10, 4/5, 8/9, 1], ), "bogacki-shampine_7s": ( #5th order [ [], [1/6], [2/27, 4/27], [183/1372, -162/343, 1053/1372], [68/297, -4/11, 42/143, 1960/3861], [597/22528, 81/352, 63099/585728, 58653/366080, 4617/20480], [174197/959244, -30942/79937, 8152137/19744439, 666106/1039181, -29421/29068, 482048/414219], ], [ [587/8064, 0, 4440339/15491840, 24353/124800, 387/44800, 2152/5985, 7267/94080], ], [0, 1/6, 2/9, 3/7, 2/3, 3/4, 1] ), "bogacki-shampine_4s": ( #5th order [ [], [1/2], [0, 3/4], [2/9, 1/3, 4/9], ], [ [2/9, 1/3, 4/9, 0], ], [0, 1/2, 3/4, 1] ), "tsi_7s": ( #5th order [ [], [0.161], [-0.008480655492356989, 0.335480655492357], [2.8971530571054935, -6.359448489975075, 4.3622954328695815], [5.325864828439257, -11.748883564062828, 7.4955393428898365, -0.09249506636175525], [5.86145544294642, -12.92096931784711, 8.159367898576159, -0.071584973281401, -0.02826905039406838], [0.09646076681806523, 0.01, 0.4798896504144996, 1.379008574103742, -3.290069515436081, 2.324710524099774], ], [ [0.09646076681806523, 0.01, 0.4798896504144996, 1.379008574103742, -3.290069515436081, 2.324710524099774, 0.0], ], [0.0, 0.161, 0.327, 0.9, 0.9800255409045097, 1.0, 1.0], ), "rk6_7s": ( #non-monotonic #5th order [ [], [1/3], [0, 2/3], [1/12, 1/3, -1/12], [-1/16, 9/8, -3/16, -3/8], [0, 9/8, -3/8, -3/4, 1/2], [9/44, -9/11, 63/44, 18/11, 0, -16/11], ], [ [11/120, 0, 27/40, 27/40, -4/15, -4/15, 11/120], ], [0, 1/3, 2/3, 1/3, 1/2, 1/2, 1], ), "rk5_7s": ( #5th order [ [], [1/5], [3/40, 9/40], [44/45, -56/15, 32/9], [19372/6561, -25360/2187, 64448/6561, 212/729], #flipped 212 sign [-9017/3168, -355/33, 46732/5247, 49/176, -5103/18656], [35/384, 0, 500/1113, 125/192, -2187/6784, 11/84], ], [ [5179/57600, 0, 7571/16695, 393/640, -92097/339200, 187/2100, 1/40], ], [0, 1/5, 3/10, 4/5, 8/9, 1, 1], ), "ssprk4_4s": ( #non-monotonic #https://link.springer.com/article/10.1007/s41980-022-00731-x [ [], [1/2], [1/2, 1/2], [1/6, 1/6, 1/6], ], [ [1/6, 1/6, 1/6, 1/2], ], [0, 1/2, 1, 1/2], ), "rk4_4s": ( [ [], [1/2], [0, 1/2], [0, 0, 1], ], [ [1/6, 1/3, 1/3, 1/6], ], [0, 1/2, 1/2, 1], ), "rk38_4s": ( [ [], [1/3], [-1/3, 1], [1, -1, 1], ], [ [1/8, 3/8, 3/8, 1/8], ], [0, 1/3, 2/3, 1], ), "ralston_4s": ( [ [], [2/5], [(-2889+1428 * 5**0.5)/1024, (3785-1620 * 5**0.5)/1024], [(-3365+2094 * 5**0.5)/6040, (-975-3046 * 5**0.5)/2552, (467040+203968*5**0.5)/240845], ], [ [(263+24*5**0.5)/1812, (125-1000*5**0.5)/3828, (3426304+1661952*5**0.5)/5924787, (30-4*5**0.5)/123], ], [0, 2/5, (14-3 * 5**0.5)/16, 1], ), "heun_3s": ( [ [], [1/3], [0, 2/3], ], [ [1/4, 0, 3/4], ], [0, 1/3, 2/3], ), "kutta_3s": ( [ [], [1/2], [-1, 2], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1], ), "ralston_3s": ( [ [], [1/2], [0, 3/4], ], [ [2/9, 1/3, 4/9], ], [0, 1/2, 3/4], ), "houwen-wray_3s": ( [ [], [8/15], [1/4, 5/12], ], [ [1/4, 0, 3/4], ], [0, 8/15, 2/3], ), "ssprk3_3s": ( #non-monotonic [ [], [1], [1/4, 1/4], ], [ [1/6, 1/6, 2/3], ], [0, 1, 1/2], ), "midpoint_2s": ( [ [], [1/2], ], [ [0, 1], ], [0, 1/2], ), "heun_2s": ( [ [], [1], ], [ [1/2, 1/2], ], [0, 1], ), "ralston_2s": ( [ [], [2/3], ], [ [1/4, 3/4], ], [0, 2/3], ), "euler": ( [ [], ], [ [1], ], [0], ), } def get_rk_methods_beta(rk_type : str, h : Tensor, c1 : float = 0.0, c2 : float = 0.5, c3 : float = 1.0, h_prev : Optional[Tensor] = None, step : int = 0, sigmas : Optional[Tensor] = None, sigma : Optional[Tensor] = None, sigma_next : Optional[Tensor] = None, sigma_down : Optional[Tensor] = None, extra_options : Optional[str] = None ): FSAL = False multistep_stages = 0 hybrid_stages = 0 u = None v = None EO = ExtraOptions(extra_options) use_analytic_solution = not EO("disable_analytic_solution") multistep_initial_sampler = EO("multistep_initial_sampler", "", debugMode=1) multistep_fallback_sampler = EO("multistep_fallback_sampler", "") multistep_extra_initial_steps = EO("multistep_extra_initial_steps", 1) #if RK_Method_Beta.is_exponential(rk_type): if rk_type.startswith(("res", "dpmpp", "ddim", "pec", "etdrk", "lawson")): h_no_eta = -torch.log(sigma_next/sigma) h_prev1_no_eta = -torch.log(sigmas[step]/sigmas[step-1]) if step >= 1 else None h_prev2_no_eta = -torch.log(sigmas[step]/sigmas[step-2]) if step >= 2 else None h_prev3_no_eta = -torch.log(sigmas[step]/sigmas[step-3]) if step >= 3 else None h_prev4_no_eta = -torch.log(sigmas[step]/sigmas[step-4]) if step >= 4 else None else: h_no_eta = sigma_next - sigma h_prev1_no_eta = sigmas[step] - sigmas[step-1] if step >= 1 else None h_prev2_no_eta = sigmas[step] - sigmas[step-2] if step >= 2 else None h_prev3_no_eta = sigmas[step] - sigmas[step-3] if step >= 3 else None h_prev4_no_eta = sigmas[step] - sigmas[step-4] if step >= 4 else None if type(c1) == torch.Tensor: c1 = c1.item() if type(c2) == torch.Tensor: c2 = c2.item() if type(c3) == torch.Tensor: c3 = c3.item() if c1 == -1: c1 = random.uniform(0, 1) if c2 == -1: c2 = random.uniform(0, 1) if c3 == -1: c3 = random.uniform(0, 1) if rk_type[:4] == "deis": order = int(rk_type[-2]) if step < order + multistep_extra_initial_steps: if order == 4: #rk_type = "res_4s_strehmel_weiner" rk_type = "ralston_4s" rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type order = 3 elif order == 3: #rk_type = "res_3s" rk_type = "ralston_3s" rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type elif order == 2: #rk_type = "res_2s" rk_type = "ralston_2s" rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type else: rk_type = "deis" multistep_stages = order-1 if rk_type[-2:] == "2m": #multistep method rk_type = rk_type[:-2] + "2s" #if h_prev is not None and step >= 1: if h_no_eta < 1.0: if step >= 1 + multistep_extra_initial_steps: multistep_stages = 1 c2 = (-h_prev1_no_eta / h_no_eta).item() else: rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type if rk_type.startswith("abnorsett"): rk_type = "res_2s" rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type else: #rk_type = "res_2s" rk_type = "euler" if sigma < 0.1 else "res_2s" rk_type = multistep_fallback_sampler if multistep_fallback_sampler else rk_type if rk_type[-2:] == "3m": #multistep method rk_type = rk_type[:-2] + "3s" #if h_prev2 is not None and step >= 2: if h_no_eta < 1.0: if step >= 2 + multistep_extra_initial_steps: multistep_stages = 2 c2 = (-h_prev1_no_eta / h_no_eta).item() c3 = (-h_prev2_no_eta / h_no_eta).item() else: rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type if rk_type.startswith("abnorsett"): rk_type = "res_3s" rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type else: #rk_type = "res_3s" rk_type = "euler" if sigma < 0.1 else "res_3s" rk_type = multistep_fallback_sampler if multistep_fallback_sampler else rk_type if rk_type[-2:] == "4m": #multistep method rk_type = rk_type[:-2] + "4s" #if h_prev2 is not None and step >= 2: if h_no_eta < 1.0: if step >= 3 + multistep_extra_initial_steps: multistep_stages = 3 c2 = (-h_prev1_no_eta / h_no_eta).item() c3 = (-h_prev2_no_eta / h_no_eta).item() # WOULD NEED A C4 (POW) TO IMPLEMENT RES_4M IF IT EXISTED else: rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type if rk_type == "res_4s": rk_type = "res_4s_strehmel_weiner" rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type if rk_type.startswith("abnorsett"): rk_type = "res_4s_strehmel_weiner" rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type else: #rk_type = "res_4s_strehmel_weiner" rk_type = "euler" if sigma < 0.1 else "res_4s_strehmel_weiner" rk_type = multistep_fallback_sampler if multistep_fallback_sampler else rk_type if rk_type[-3] == "h" and rk_type[-1] == "s": #hybrid method if step < int(rk_type[-4]) + multistep_extra_initial_steps: rk_type = "res_" + rk_type[-2:] rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type else: hybrid_stages = int(rk_type[-4]) #+1 adjustment needed? if rk_type == "res_4s": rk_type = "res_4s_strehmel_weiner" rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type if rk_type == "res_1s": rk_type = "res_2s" rk_type = multistep_initial_sampler if multistep_initial_sampler else rk_type if rk_type in rk_coeff: a, b, ci = copy.deepcopy(rk_coeff[rk_type]) a = [row + [0] * (len(ci) - len(row)) for row in a] match rk_type: case "deis": coeff_list = get_deis_coeff_list(sigmas, multistep_stages+1, deis_mode="rhoab") coeff_list = [[elem / h for elem in inner_list] for inner_list in coeff_list] if multistep_stages == 1: b1, b2 = coeff_list[step] a = [ [0, 0], [0, 0], ] b = [ [b1, b2], ] ci = [0, 0] if multistep_stages == 2: b1, b2, b3 = coeff_list[step] a = [ [0, 0, 0], [0, 0, 0], [0, 0, 0], ] b = [ [b1, b2, b3], ] ci = [0, 0, 0] if multistep_stages == 3: b1, b2, b3, b4 = coeff_list[step] a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], ] b = [ [b1, b2, b3, b4], ] ci = [0, 0, 0, 0] if multistep_stages > 0: for i in range(len(b[0])): b[0][i] *= ((sigma_down - sigma) / (sigma_next - sigma)) case "dormand-prince_6s": FSAL = True case "ddim": b1 = phi(1, -h) a = [ [0], ] b = [ [b1], ] ci = [0] case "res_2s": c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) ci = [0, c2] φ = Phi(h, ci, use_analytic_solution) a2_1 = c2 * φ(1,2) b2 = φ(2)/c2 b1 = φ(1) - b2 a = [ [0,0], [a2_1, 0], ] b = [ [b1, b2], ] case "res_2s_stable": c2 = 1.0 #float(get_extra_options_kv("c2", str(c2), extra_options)) ci = [0, c2] φ = Phi(h, ci, use_analytic_solution) a2_1 = c2 * φ(1,2) b2 = φ(2)/c2 b1 = φ(1) - b2 a = [ [0,0], [a2_1, 0], ] b = [ [b1, b2], ] case "res_2s_rkmk2e": ci = [0, 1] φ = Phi(h, ci, use_analytic_solution) b2 = φ(2) a = [ [0,0], [0, 0], ] b = [ [0, b2], ] gen_first_col_exp(a, b, ci, φ) case "abnorsett2_1h2s": c1, c2 = 0, 1 ci = [c1, c2] φ = Phi(h, ci, use_analytic_solution) b1 = φ(1) #+ φ(2) a = [ [0, 0], [0, 0], ] b = [ [0, 0], ] if extra_options_flag("h_prev_h_h_no_eta", extra_options): φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci) elif extra_options_flag("h_only", extra_options): φ1 = Phi(h, ci, use_analytic_solution) else: φ1 = Phi(h_prev1_no_eta, ci) u1 = -φ1(2) v1 = -φ1(2) u = [ [0, 0], [u1, 0], ] v = [ [v1, 0], ] gen_first_col_exp_uv(a, b, ci, u, v, φ) case "abnorsett_2m": c1, c2 = 0, 1 ci = [c1, c2] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0], [0, 0], ] b = [ [0, -φ(2)], ] gen_first_col_exp(a, b, ci, φ) case "abnorsett_3m": c1, c2, c3 = 0, 0, 1 ci = [c1, c2, c3] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0], [0, 0, 0], [0, 0, 0], ] b = [ [0, -2*φ(2) - 2*φ(3), (1/2)*φ(2) + φ(3)], ] gen_first_col_exp(a, b, ci, φ) case "abnorsett_4m": c1, c2, c3, c4 = 0, 0, 0, 1 ci = [c1, c2, c3, c4] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], ] b = [ [0, -3*φ(2) - 5*φ(3) - 3*φ(4), (3/2)*φ(2) + 4*φ(3) + 3*φ(4), (-1/3)*φ(2) - φ(3) - φ(4), ], ] gen_first_col_exp(a, b, ci, φ) case "abnorsett3_2h2s": c1,c2 = 0,1 ci = [c1, c2] φ = Phi(h, ci, use_analytic_solution) b2 = 0 a = [ [0, 0], [0, 0], ] b = [ [0, 0], ] if extra_options_flag("h_prev_h_h_no_eta", extra_options): φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci) φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci) elif extra_options_flag("h_only", extra_options): φ1 = Phi(h, ci, use_analytic_solution) φ2 = Phi(h, ci, use_analytic_solution) else: φ1 = Phi(h_prev1_no_eta, ci) φ2 = Phi(h_prev2_no_eta, ci) u2_1 = -2*φ1(2) - 2*φ1(3) u2_2 = (1/2)*φ2(2) + φ2(3) v1 = u2_1 # -φ1(2) + φ1(3) + 3*φ1(4) v2 = u2_2 # (1/6)*φ2(2) - φ2(4) u = [ [ 0, 0], [u2_1, u2_2], ] v = [ [v1, v2], ] gen_first_col_exp_uv(a, b, ci, u, v, φ) case "pec423_2h2s": #https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2 = 0,1 ci = [c1, c2] φ = Phi(h, ci, use_analytic_solution) b2 = (1/3)*φ(2) + φ(3) + φ(4) a = [ [0, 0], [0, 0], ] b = [ [0, b2], ] if extra_options_flag("h_prev_h_h_no_eta", extra_options): φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci) φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci) elif extra_options_flag("h_only", extra_options): φ1 = Phi(h, ci, use_analytic_solution) φ2 = Phi(h, ci, use_analytic_solution) else: φ1 = Phi(h_prev1_no_eta, ci) φ2 = Phi(h_prev2_no_eta, ci) u2_1 = -2*φ1(2) - 2*φ1(3) u2_2 = (1/2)*φ2(2) + φ2(3) v1 = -φ1(2) + φ1(3) + 3*φ1(4) v2 = (1/6)*φ2(2) - φ2(4) u = [ [ 0, 0], [u2_1, u2_2], ] v = [ [v1, v2], ] gen_first_col_exp_uv(a, b, ci, u, v, φ) case "pec433_2h3s": #https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3 = 0, 1, 1 ci = [c1,c2,c3] φ = Phi(h, ci, use_analytic_solution) a3_2 = (1/3)*φ(2) + φ(3) + φ(4) b2 = 0 b3 = (1/3)*φ(2) + φ(3) + φ(4) a = [ [0, 0, 0], [0, 0, 0], [0, a3_2, 0], ] b = [ [0, b2, b3], ] if extra_options_flag("h_prev_h_h_no_eta", extra_options): φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci) φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci) elif extra_options_flag("h_only", extra_options): φ1 = Phi(h, ci, use_analytic_solution) φ2 = Phi(h, ci, use_analytic_solution) else: φ1 = Phi(h_prev1_no_eta, ci) φ2 = Phi(h_prev2_no_eta, ci) u2_1 = -2*φ1(2) - 2*φ1(3) u3_1 = -φ1(2) + φ1(3) + 3*φ1(4) v1 = -φ1(2) + φ1(3) + 3*φ1(4) u2_2 = (1/2)*φ2(2) + φ2(3) u3_2 = (1/6)*φ2(2) - φ2(4) v2 = (1/6)*φ2(2) - φ2(4) u = [ [ 0, 0, 0], [u2_1, u2_2, 0], [u3_1, u3_2, 0], ] v = [ [v1, v2, 0], ] gen_first_col_exp_uv(a, b, ci, u, v, φ) case "res_3s": c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) c3 = float(get_extra_options_kv("c3", str(c3), extra_options)) ci = [0,c2,c3] φ = Phi(h, ci, use_analytic_solution) gamma = calculate_gamma(c2, c3) a3_2 = gamma * c2 * φ(2,2) + (c3 ** 2 / c2) * φ(2, 3) b3 = (1 / (gamma * c2 + c3)) * φ(2) b2 = gamma * b3 #simplified version of: b2 = (gamma / (gamma * c2 + c3)) * phi_2_h a = [ [0, 0, 0], [0, 0, 0], [0, a3_2, 0], ] b = [ [0, b2, b3], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_3s_non-monotonic": c2 = float(get_extra_options_kv("c2", "1.0", extra_options)) c3 = float(get_extra_options_kv("c3", "0.5", extra_options)) ci = [0,c2,c3] φ = Phi(h, ci, use_analytic_solution) gamma = calculate_gamma(c2, c3) a3_2 = gamma * c2 * φ(2,2) + (c3 ** 2 / c2) * φ(2, 3) b3 = (1 / (gamma * c2 + c3)) * φ(2) b2 = gamma * b3 #simplified version of: b2 = (gamma / (gamma * c2 + c3)) * phi_2_h a = [ [0, 0, 0], [0, 0, 0], [0, a3_2, 0], ] b = [ [0, b2, b3], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_3s_alt": c2 = 1/3 c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) c1,c2,c3 = 0, c2, 2/3 ci = [c1,c2,c3] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0], [0, 0, 0], [0, (4/(9*c2)) * φ(2,3), 0], ] b = [ [0, 0, (1/c3)*φ(2)], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_3s_strehmel_weiner": # c2 = 1/2 c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) ci = [0,c2,1] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0], [0, 0, 0], [0, (1/c2) * φ(2,3), 0], ] b = [ [0, 0, φ(2)], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_3s_cox_matthews": # Cox & Matthews; known as ETD3RK c2 = 1/2 # must be 1/2 ci = [0,c2,1] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0], [0, 0, 0], [0, (1/c2) * φ(1,3), 0], # paper said 2 * φ(1,3), but this is the same and more consistent with res_3s_strehmel_weiner ] b = [ [0, -8*φ(3) + 4*φ(2), 4*φ(3) - φ(2)], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_3s_lie": # Lie; known as ETD2CF3 c1,c2,c3 = 0, 1/3, 2/3 ci = [c1,c2,c3] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0], [0, 0, 0], [0, (4/3)*φ(2,3), 0], # paper said 2 * φ(1,3), but this is the same and more consistent with res_3s_strehmel_weiner ] b = [ [0, 6*φ(2) - 18*φ(3), (-3/2)*φ(2) + 9*φ(3)], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_3s_sunstar": # https://arxiv.org/pdf/2410.00498 pg 5 (tableau 2.7) c1,c2,c3 = 0, 1/3, 2/3 ci = [c1,c2,c3] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0], [0, 0, 0], [0, (8/9)*φ(2,3), 0], # paper said 2 * φ(1,3), but this is the same and more consistent with res_3s_strehmel_weiner ] b = [ [0, 0, (3/2)*φ(2)], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_4s_cox_matthews": # weak 4th order, Cox & Matthews; unresolved issue, see below c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a2_1 = c2 * φ(1,2) a3_2 = c3 * φ(1,3) a4_1 = (1/2) * φ(1,3) * (φ(0,3) - 1) # φ(0,3) == torch.exp(-h*c3) a4_3 = φ(1,3) b1 = φ(1) - 3*φ(2) + 4*φ(3) b2 = 2*φ(2) - 4*φ(3) b3 = 2*φ(2) - 4*φ(3) b4 = 4*φ(3) - φ(2) a = [ [0, 0,0,0], [a2_1, 0,0,0], [0, a3_2,0,0], [a4_1, 0, a4_3,0], ] b = [ [b1, b2, b3, b4], ] case "res_4s_cfree4": # weak 4th order, Cox & Matthews; unresolved issue, see below c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a2_1 = c2 * φ(1,2) a3_2 = c3 * φ(1,2) a4_1 = (1/2) * φ(1,2) * (φ(0,2) - 1) # φ(0,3) == torch.exp(-h*c3) a4_3 = φ(1,2) b1 = (1/2)*φ(1) - (1/3)*φ(1,2) b2 = (1/3)*φ(1) b3 = (1/3)*φ(1) b4 = -(1/6)*φ(1) + (1/3)*φ(1,2) a = [ [0, 0,0,0], [a2_1, 0,0,0], [0, a3_2,0,0], [a4_1, 0, a4_3,0], ] b = [ [b1, b2, b3, b4], ] case "res_4s_friedli": # https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a3_2 = 2*φ(2,2) a4_2 = -(26/25)*φ(1) + (2/25)*φ(2) a4_3 = (26/25)*φ(1) + (48/25)*φ(2) b2 = 0 b3 = 4*φ(2) - 8*φ(3) b4 = -φ(2) + 4*φ(3) a = [ [0, 0,0,0], [0, 0,0,0], [0, a3_2,0,0], [0, a4_2, a4_3,0], ] b = [ [0, b2, b3, b4], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_4s_munthe-kaas": # unstable RKMK4t c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0, 0], [c2*φ(1,2), 0, 0, 0], [(h/8)*φ(1,2), (1/2)*(1-h/4)*φ(1,2), 0, 0], [0, 0, φ(1), 0], ] b = [ [ (1/6)*φ(1)*(1+h/2), (1/3)*φ(1), (1/3)*φ(1), (1/6)*φ(1)*(1-h/2) ], ] case "res_4s_krogstad": # weak 4th order, Krogstad c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, φ(2,3), 0, 0], [0, 0, 2*φ(2,4), 0], ] b = [ [ 0, 2*φ(2) - 4*φ(3), 2*φ(2) - 4*φ(3), -φ(2) + 4*φ(3) ], ] #a = [row + [0] * (len(ci) - len(row)) for row in a] a, b = gen_first_col_exp(a,b,ci,φ) case "res_4s_krogstad_alt": # weak 4th order, Krogstad https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 4*φ(2,2), 0, 0], [0, 0, 2*φ(2), 0], ] b = [ [ 0, 2*φ(2) - 4*φ(3), 2*φ(2) - 4*φ(3), -φ(2) + 4*φ(3) ], ] #a = [row + [0] * (len(ci) - len(row)) for row in a] a, b = gen_first_col_exp(a,b,ci,φ) case "res_4s_minchev": # https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a3_2 = (4/25)*φ(1,2) + (24/25)*φ(2,2) a4_2 = (21/5)*φ(2) - (108/5)*φ(3) a4_3 = (1/20)*φ(1) - (33/10)*φ(2) + (123/5)*φ(3) b2 = -(1/10)*φ(1) + (1/5)*φ(2) - 4*φ(3) + 12*φ(4) b3 = (1/30)*φ(1) + (23/5)*φ(2) - 8*φ(3) - 4*φ(4) b4 = (1/30)*φ(1) - (7/5)*φ(2) + 6*φ(3) - 4*φ(4) a = [ [0, 0,0,0], [0, 0,0,0], [0, a3_2,0,0], [0, 0, a4_3,0], ] b = [ [0, b2, b3, b4], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_4s_strehmel_weiner": # weak 4th order, Strehmel & Weiner c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, c3*φ(2,3), 0, 0], [0, -2*φ(2,4), 4*φ(2,4), 0], ] b = [ [ 0, 0, 4*φ(2) - 8*φ(3), -φ(2) + 4*φ(3) ], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_4s_strehmel_weiner_alt": # weak 4th order, Strehmel & Weiner https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 2*φ(2,2), 0, 0], [0, -2*φ(2), 4*φ(2), 0], ] b = [ [ 0, 0, 4*φ(2) - 8*φ(3), -φ(2) + 4*φ(3) ], ] a, b = gen_first_col_exp(a,b,ci,φ) case "lawson2a_2s": # based on midpoint rule, stiff order 1 https://cds.cern.ch/record/848126/files/cer-002531460.pdf c1,c2 = 0,1/2 ci = [c1, c2] φ = Phi(h, ci, use_analytic_solution) a2_1 = c2 * φ(0,2) b2 = φ(0,2) b1 = 0 a = [ [0,0], [a2_1, 0], ] b = [ [b1, b2], ] case "lawson2b_2s": # based on trapezoidal rule, stiff order 1 https://cds.cern.ch/record/848126/files/cer-002531460.pdf c1,c2 = 0,1 ci = [c1, c2] φ = Phi(h, ci, use_analytic_solution) a2_1 = φ(0) b2 = 1/2 b1 = (1/2)*φ(0) a = [ [0,0], [a2_1, 0], ] b = [ [b1, b2], ] case "lawson4_4s": c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a2_1 = c2 * φ(0,2) a3_2 = 1/2 a4_3 = φ(0,2) b1 = (1/6) * φ(0) b2 = (1/3) * φ(0,2) b3 = (1/3) * φ(0,2) b4 = 1/6 a = [ [0, 0, 0, 0], [a2_1, 0, 0, 0], [0, a3_2, 0, 0], [0, 0, a4_3, 0], ] b = [ [b1,b2,b3,b4], ] case "lawson41-gen_4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a3_2 = 1/2 a4_3 = φ(0,2) b2 = (1/3) * φ(0,2) b3 = (1/3) * φ(0,2) b4 = 1/6 a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, a3_2, 0, 0], [0, 0, a4_3, 0], ] b = [ [0, b2, b3, b4,], ] a, b = gen_first_col_exp(a,b,ci,φ) case "lawson41-gen-mod_4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a3_2 = 1/2 a4_3 = φ(0,2) b2 = (1/3) * φ(0,2) b3 = (1/3) * φ(0,2) b4 = φ(2) - (1/3)*φ(0,2) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, a3_2, 0, 0], [0, 0, a4_3, 0], ] b = [ [0, b2, b3, b4,], ] a, b = gen_first_col_exp(a,b,ci,φ) case "lawson42-gen-mod_1h4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a3_2 = 1/2 a4_3 = φ(0,2) b2 = (1/3) * φ(0,2) b3 = (1/3) * φ(0,2) b4 = (1/2)*φ(2) + φ(3) - (1/4)*φ(0,2) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, a3_2, 0, 0], [0, 0, a4_3, 0], ] b = [ [0, b2, b3, b4,], ] if extra_options_flag("h_prev_h_h_no_eta", extra_options): φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci, use_analytic_solution) elif extra_options_flag("h_only", extra_options): φ1 = Phi(h, ci, use_analytic_solution) else: φ1 = Phi(h_prev1_no_eta, ci, use_analytic_solution) u2_1 = -φ1(2,2) u3_1 = -φ1(2,2) + 1/4 u4_1 = -φ1(2) + (1/2)*φ1(0,2) v1 = -(1/2)*φ1(2) + φ1(3) + (1/12)*φ1(0,2) u = [ [ 0, 0, 0, 0], [u2_1, 0, 0, 0], [u3_1, 0, 0, 0], [u4_1, 0, 0, 0], ] v = [ [v1, 0, 0, 0,], ] a, b = gen_first_col_exp_uv(a,b,ci,u,v,φ) case "lawson43-gen-mod_2h4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a3_2 = 1/2 a4_3 = φ(0,2) b3 = b2 = (1/3) * a4_3 b4 = (1/3)*φ(2) + φ(3) + φ(4) - (5/24)*φ(0,2) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, a3_2, 0, 0], [0, 0, a4_3, 0], ] b = [ [0, b2, b3, b4,], ] if extra_options_flag("h_prev_h_h_no_eta", extra_options): φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci, use_analytic_solution) φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci, use_analytic_solution) elif extra_options_flag("h_only", extra_options): φ1 = Phi(h, ci, use_analytic_solution) φ2 = Phi(h, ci, use_analytic_solution) else: φ1 = Phi(h_prev1_no_eta, ci, use_analytic_solution) φ2 = Phi(h_prev2_no_eta, ci, use_analytic_solution) u2_1 = -2*φ1(2,2) - 2*φ1(3,2) u3_1 = -2*φ1(2,2) - 2*φ1(3,2) + 5/8 u4_1 = -2*φ1(2) - 2*φ1(3) + (5/4)*φ1(0,2) v1 = -φ1(2) + φ1(3) + 3*φ1(4) + (5/24)*φ1(0,2) u2_2 = -(1/2)*φ2(2,2) + φ2(3,2) u3_2 = (1/2)*φ2(2,2) + φ2(3,2) - 3/16 u4_2 = (1/2)*φ2(2) + φ2(3) - (3/8)*φ2(0,2) v2 = (1/6)*φ2(2) - φ2(4) - (1/24)*φ2(0,2) u = [ [ 0, 0, 0, 0], [u2_1, u2_2, 0, 0], [u3_1, u3_2, 0, 0], [u4_1, u4_2, 0, 0], ] v = [ [v1, v2, 0, 0,], ] a, b = gen_first_col_exp_uv(a,b,ci,u,v,φ) case "lawson44-gen-mod_3h4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a3_2 = 1/2 a4_3 = φ(0,2) b3 = b2 = (1/3) * a4_3 b4 = (1/4)*φ(2) + (11/12)*φ(3) + (3/2)*φ(4) + φ(5) - (35/192)*φ(0,2) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, a3_2, 0, 0], [0, 0, a4_3, 0], ] b = [ [0, b2, b3, b4,], ] if extra_options_flag("h_prev_h_h_no_eta", extra_options): φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci, use_analytic_solution) φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci, use_analytic_solution) φ3 = Phi(h_prev3_no_eta * h/h_no_eta, ci, use_analytic_solution) elif extra_options_flag("h_only", extra_options): φ1 = Phi(h, ci, use_analytic_solution) φ2 = Phi(h, ci, use_analytic_solution) φ3 = Phi(h, ci, use_analytic_solution) else: φ1 = Phi(h_prev1_no_eta, ci, use_analytic_solution) φ2 = Phi(h_prev2_no_eta, ci, use_analytic_solution) φ3 = Phi(h_prev3_no_eta, ci, use_analytic_solution) u2_1 = -3*φ1(2,2) - 5*φ1(3,2) - 3*φ1(4,2) u3_1 = u2_1 + 35/32 u4_1 = -3*φ1(2) - 5*φ1(3) - 3*φ1(4) + (35/16)*φ1(0,2) v1 = -(3/2)*φ1(2) + (1/2)*φ1(3) + 6*φ1(4) + 6*φ1(5) + (35/96)*φ1(0,2) u2_2 = (3/2)*φ2(2,2) + 4*φ2(3,2) + 3*φ2(4,2) u3_2 = u2_2 - 21/32 u4_2 = (3/2)*φ2(2) + 4*φ2(3) + 3*φ2(4) - (21/16)*φ2(0,2) v2 = (1/2)*φ2(2) + (1/3)*φ2(3) - 3*φ2(4) - 4*φ2(5) - (7/48)*φ2(0,2) u2_3 = (-1/3)*φ3(2,2) - φ3(3,2) - φ3(4,2) u3_3 = u2_3 + 5/32 u4_3 = -(1/3)*φ3(2) - φ3(3) - φ3(4) + (5/16)*φ3(0,2) v3 = -(1/12)*φ3(2) - (1/12)*φ3(3) + (1/2)*φ3(4) + φ3(5) + (5/192)*φ3(0,2) u = [ [ 0, 0, 0, 0], [u2_1, u2_2, u2_3, 0], [u3_1, u3_2, u3_3, 0], [u4_1, u4_2, u4_3, 0], ] v = [ [v1, v2, v3, 0,], ] a, b = gen_first_col_exp_uv(a,b,ci,u,v,φ) case "lawson45-gen-mod_4h4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a3_2 = 1/2 a4_3 = φ(0,2) b2 = (1/3) * φ(0,2) b3 = (1/3) * φ(0,2) b4 = (12/59)*φ(2) + (50/59)*φ(3) + (105/59)*φ(4) + (120/59)*φ(5) - (60/59)*φ(6) - (157/944)*φ(0,2) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, a3_2, 0, 0], [0, 0, a4_3, 0], ] b = [ [0, b2, b3, b4,], ] if extra_options_flag("h_prev_h_h_no_eta", extra_options): φ1 = Phi(h_prev1_no_eta * h/h_no_eta, ci, use_analytic_solution) φ2 = Phi(h_prev2_no_eta * h/h_no_eta, ci, use_analytic_solution) φ3 = Phi(h_prev3_no_eta * h/h_no_eta, ci, use_analytic_solution) φ4 = Phi(h_prev4_no_eta * h/h_no_eta, ci, use_analytic_solution) elif extra_options_flag("h_only", extra_options): φ1 = Phi(h, ci, use_analytic_solution) φ2 = Phi(h, ci, use_analytic_solution) φ3 = Phi(h, ci, use_analytic_solution) φ4 = Phi(h, ci, use_analytic_solution) else: φ1 = Phi(h_prev1_no_eta, ci, use_analytic_solution) φ2 = Phi(h_prev2_no_eta, ci, use_analytic_solution) φ3 = Phi(h_prev3_no_eta, ci, use_analytic_solution) φ4 = Phi(h_prev4_no_eta, ci, use_analytic_solution) u2_1 = -4*φ1(2,2) - (26/3)*φ1(3,2) - 9*φ1(4,2) - 4*φ1(5,2) u3_1 = u2_1 + 105/64 u4_1 = -4*φ1(2) - (26/3)*φ1(3) - 9*φ1(4) - 4*φ1(5) + (105/32)*φ1(0,2) v1 = -(116/59)*φ1(2) - (34/177)*φ1(3) + (519/59)*φ1(4) + (964/59)*φ1(5) - (600/59)*φ1(6) + (495/944)*φ1(0,2) u2_2 = 3*φ2(2,2) + (19/2)*φ2(3,2) + 12*φ2(4,2) + 6*φ2(5,2) u3_2 = u2_2 - 189/128 u4_2 = 3*φ2(2) + (19/2)*φ2(3) + 12*φ2(4) + 6*φ2(5) - (189/64)*φ2(0,2) v2 = (57/59)*φ2(2) + (121/118)*φ2(3) - (342/59)*φ2(4) - (846/59)*φ2(5) + (600/59)*φ2(6) - (577/1888)*φ2(0,2) u2_3 = -(4/3)*φ3(2,2) - (14/3)*φ3(3,2) - 7*φ3(4,2) - 4*φ3(5,2) u3_3 = u2_3 + 45/64 u4_3 = -(4/3)*φ3(2) - (14/3)*φ3(3) - 7*φ3(4) - 4*φ3(5) +(45/32)*φ3(0,2) v3 = -(56/177)*φ3(2) - (76/177)*φ3(3) + (112/59)*φ3(4) + (364/59)*φ3(5) - (300/59)*φ3(6) + (25/236)*φ3(0,2) u2_4 = (1/4)*φ4(2,2) + (88/96)*φ4(3,2) + (3/2)*φ4(4,2) + φ4(5,2) u3_4 = u2_4 - 35/256 u4_4 = (1/4)*φ4(2) + (11/12)*φ4(3) + (3/2)*φ4(4) + φ4(5) - (35/128)*φ4(0,2) v4 = (11/236)*φ4(2) + (49/708)*φ4(3) - (33/118)*φ4(4) - (61/59)*φ4(5) + ( 60/59)*φ4(6) - (181/11328)*φ4(0,2) u = [ [ 0, 0, 0, 0], [u2_1, u2_2, u2_3, u2_4], [u3_1, u3_2, u3_3, u3_4], [u4_1, u4_2, u4_3, u4_4], ] v = [ [v1, v2, v3, v4,], ] a, b = gen_first_col_exp_uv(a,b,ci,u,v,φ) case "etdrk2_2s": # https://arxiv.org/pdf/2402.15142v1 c1,c2 = 0, 1 ci = [c1,c2] φ = Phi(h, ci, use_analytic_solution) a = [ [0, 0], [φ(1), 0], ] b = [ [φ(1)-φ(2), φ(2)], ] case "etdrk3_a_3s": #non-monotonic # https://arxiv.org/pdf/2402.15142v1 c1,c2,c3 = 0, 1, 2/3 ci = [c1,c2,c3] φ = Phi(h, ci, use_analytic_solution) a2_1 = c2*φ(1) a3_2 = (4/9)*φ(2,3) a3_1 = c3*φ(1,3) - a3_2 b2 = φ(2) - (1/2)*φ(1) b3 = (3/4) * φ(1) b1 = φ(1) - b2 - b3 a = [ [0, 0, 0], [a2_1, 0, 0], [a3_1, a3_2, 0 ] ] b = [ [b1, b2, b3], ] case "etdrk3_b_3s": # https://arxiv.org/pdf/2402.15142v1 c1,c2,c3 = 0, 4/9, 2/3 ci = [c1,c2,c3] φ = Phi(h, ci, use_analytic_solution) a2_1 = c2*φ(1,2) a3_2 = φ(2,3) a3_1 = c3*φ(1,3) - a3_2 b2 = 0 b3 = (3/2) * φ(2) b1 = φ(1) - b2 - b3 a = [ [0, 0, 0], [a2_1, 0, 0], [a3_1, a3_2, 0 ] ] b = [ [b1, b2, b3], ] case "etdrk4_4s": # https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a3_2 = φ(1,2) a4_3 = 2*φ(1,2) b2 = 2*φ(2) - 4*φ(3) b3 = 2*φ(2) - 4*φ(3) b4 = -φ(2) + 4*φ(3) a = [ [0, 0,0,0], [0, 0,0,0], [0, a3_2,0,0], [0, 0, a4_3,0], ] b = [ [0, b2, b3, b4], ] a, b = gen_first_col_exp(a,b,ci,φ) case "etdrk4_4s_alt": # pg 70 col 1 computed with (4.9) https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci, use_analytic_solution) a2_1 = φ(1,2) #unsure about this, looks bad and is pretty different from col #1 implementations for everything else except the other 4s alt and 5s ostermann??? from the link a3_1 = 0 a4_1 = φ(1) - 2*φ(1,2) a3_2 = φ(1,2) a4_3 = 2*φ(1,2) b1 = φ(1) - 3*φ(2) + 4*φ(3) b2 = 2*φ(2) - 4*φ(3) b3 = 2*φ(2) - 4*φ(3) b4 = -φ(2) + 4*φ(3) a = [ [ 0, 0, 0,0], [a2_1, 0, 0,0], [a3_1, a3_2, 0,0], [a4_1, 0, a4_3,0], ] b = [ [0, b2, b3, b4], ] #a, b = gen_first_col_exp(a,b,ci,φ) case "dpmpp_2s": c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) ci = [0,c2] φ = Phi(h, ci, use_analytic_solution) b2 = (1/(2*c2)) * φ(1) a = [ [0, 0], [0, 0], ] b = [ [0, b2], ] a, b = gen_first_col_exp(a,b,ci,φ) case "dpmpp_sde_2s": c2 = 1.0 #hardcoded to 1.0 to more closely emulate the configuration for k-diffusion's implementation ci = [0,c2] φ = Phi(h, ci, use_analytic_solution) b2 = (1/(2*c2)) * φ(1) a = [ [0, 0], [0, 0], ] b = [ [0, b2], ] a, b = gen_first_col_exp(a,b,ci,φ) case "dpmpp_3s": c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) c3 = float(get_extra_options_kv("c3", str(c3), extra_options)) ci = [0,c2,c3] φ = Phi(h, ci, use_analytic_solution) a3_2 = (c3**2 / c2) * φ(2,3) b3 = (1/c3) * φ(2) a = [ [0, 0, 0], [0, 0, 0], [0, a3_2, 0], ] b = [ [0, 0, b3], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_5s": #non-monotonic #4th order c1, c2, c3, c4, c5 = 0, 1/2, 1/2, 1, 1/2 ci = [c1,c2,c3,c4,c5] φ = Phi(h, ci, use_analytic_solution) a3_2 = φ(2,3) a4_2 = φ(2,4) a5_2 = (1/2)*φ(2,5) - φ(3,4) + (1/4)*φ(2,4) - (1/2)*φ(3,5) a4_3 = a4_2 a5_3 = a5_2 a5_4 = (1/4)*φ(2,5) - a5_2 b4 = -φ(2) + 4*φ(3) b5 = 4*φ(2) - 8*φ(3) a = [ [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, a3_2, 0, 0, 0], [0, a4_2, a4_3, 0, 0], [0, a5_2, a5_3, a5_4, 0], ] b = [ [0, 0, 0, b4, b5], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_5s_hochbruck-ostermann": #non-monotonic #4th order c1, c2, c3, c4, c5 = 0, 1/2, 1/2, 1, 1/2 ci = [c1,c2,c3,c4,c5] φ = Phi(h, ci, use_analytic_solution) a3_2 = 4*φ(2,2) a4_2 = φ(2) a5_2 = (1/4)*φ(2) - φ(3) + 2*φ(2,2) - 4*φ(3,2) a4_3 = φ(2) a5_3 = a5_2 a5_4 = φ(2,2) - a5_2 b4 = -φ(2) + 4*φ(3) b5 = 4*φ(2) - 8*φ(3) a = [ [0, 0 , 0 , 0 , 0], [0, 0 , 0 , 0 , 0], [0, a3_2, 0 , 0 , 0], [0, a4_2, a4_3, 0 , 0], [0, a5_2, a5_3, a5_4, 0], ] b = [ [0, 0, 0, b4, b5], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_6s": #non-monotonic #4th order c1, c2, c3, c4, c5, c6 = 0, 1/2, 1/2, 1/3, 1/3, 5/6 ci = [c1, c2, c3, c4, c5, c6] φ = Phi(h, ci, use_analytic_solution) a2_1 = c2 * φ(1,2) a3_1 = 0 a3_2 = (c3**2 / c2) * φ(2,3) a4_1 = 0 a4_2 = (c4**2 / c2) * φ(2,4) a4_3 = (c4**2 * φ(2,4) - a4_2 * c2) / c3 a5_1 = 0 a5_2 = 0 #zero a5_3 = (-c4 * c5**2 * φ(2,5) + 2*c5**3 * φ(3,5)) / (c3 * (c3 - c4)) a5_4 = (-c3 * c5**2 * φ(2,5) + 2*c5**3 * φ(3,5)) / (c4 * (c4 - c3)) a6_1 = 0 a6_2 = 0 #zero a6_3 = (-c4 * c6**2 * φ(2,6) + 2*c6**3 * φ(3,6)) / (c3 * (c3 - c4)) a6_4 = (-c3 * c6**2 * φ(2,6) + 2*c6**3 * φ(3,6)) / (c4 * (c4 - c3)) a6_5 = (c6**2 * φ(2,6) - a6_3*c3 - a6_4*c4) / c5 #a6_5_alt = (2*c6**3 * φ(3,6) - a6_3*c3**2 - a6_4*c4**2) / c5**2 b1 = 0 b2 = 0 b3 = 0 b4 = 0 b5 = (-c6*φ(2) + 2*φ(3)) / (c5 * (c5 - c6)) b6 = (-c5*φ(2) + 2*φ(3)) / (c6 * (c6 - c5)) a = [ [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, a3_2, 0, 0, 0, 0], [0, a4_2, a4_3, 0, 0, 0], [0, a5_2, a5_3, a5_4, 0, 0], [0, a6_2, a6_3, a6_4, a6_5, 0], ] b = [ [0, b2, b3, b4, b5, b6], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_8s": #non-monotonic # this is not EXPRK5S8 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1, c2, c3, c4, c5, c6, c7, c8 = 0, 1/2, 1/2, 1/4, 1/2, 1/5, 2/3, 1 ci = [c1, c2, c3, c4, c5, c6, c7, c8] #φ = Phi(h, ci, analytic_solution=use_analytic_solution) ci = [mpf(c_val) for c_val in ci] c1, c2, c3, c4, c5, c6, c7, c8 = [c_val for c_val in ci] φ = Phi(mpf(h.item()), ci, analytic_solution=use_analytic_solution) a3_2 = (1/2) * φ(2,3) a4_3 = (1/8) * φ(2,4) a5_3 = (-1/2) * φ(2,5) + 2 * φ(3,5) a5_4 = 2 * φ(2,5) - 4 * φ(3,5) a6_4 = (8/25) * φ(2,6) - (32/125) * φ(3,6) a6_5 = (2/25) * φ(2,6) - (1/2) * a6_4 a7_4 = (-125/162) * a6_4 a7_5 = (125/1944) * a6_4 - (16/27) * φ(2,7) + (320/81) * φ(3,7) a7_6 = (3125/3888) * a6_4 + (100/27) * φ(2,7) - (800/81) * φ(3,7) Φ = (5/32)*a6_4 - (1/28)*φ(2,6) + (36/175)*φ(2,7) - (48/25)*φ(3,7) + (6/175)*φ(4,6) + (192/35)*φ(4,7) + 6*φ(4,8) a8_5 = (208/3)*φ(3,8) - (16/3) *φ(2,8) - 40*Φ a8_6 = (-250/3)*φ(3,8) + (250/21)*φ(2,8) + (250/7)*Φ a8_7 = -27*φ(3,8) + (27/14)*φ(2,8) + (135/7)*Φ b6 = (125/14)*φ(2) - (625/14)*φ(3) + (1125/14)*φ(4) b7 = (-27/14)*φ(2) + (162/7) *φ(3) - (405/7) *φ(4) b8 = (1/2) *φ(2) - (13/2) *φ(3) + (45/2) *φ(4) b1 = φ(1) - b6 - b7 - b8 a = [ [0 , 0 , 0 , 0 , 0 , 0 , 0 , 0], [0 , 0 , 0 , 0 , 0 , 0 , 0 , 0], [0 , a3_2, 0 , 0 , 0 , 0 , 0 , 0], [0 , 0 , a4_3, 0 , 0 , 0 , 0 , 0], [0 , 0 , a5_3, a5_4, 0 , 0 , 0 , 0], [0 , 0 , 0 , a6_4, a6_5, 0 , 0 , 0], [0 , 0 , 0 , a7_4, a7_5, a7_6, 0 , 0], [0 , 0 , 0 , 0 , a8_5, a8_6, a8_7, 0], ] b = [ [0, 0, 0, 0, 0, b6, b7, b8], ] a, b = gen_first_col_exp(a,b,ci,φ) a = [[float(val) for val in row] for row in a] b = [[float(val) for val in row] for row in b] ci = [c1, c2, c3, c4, c5, c6, c7, c8] case "res_8s_alt": # this is EXPRK5S8 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1, c2, c3, c4, c5, c6, c7, c8 = 0, 1/2, 1/2, 1/4, 1/2, 1/5, 2/3, 1 #ci = [c1, c2, c3, c4, c5, c6, c7, c8] #φ = Phi(h, ci, analytic_solution=use_analytic_solution) ci = [mpf(c_val) for c_val in ci] c1, c2, c3, c4, c5, c6, c7, c8 = [c_val for c_val in ci] φ = Phi(mpf(h.item()), ci, analytic_solution=use_analytic_solution) a3_2 = 2*φ(2,2) a4_3 = 2*φ(2,4) a5_3 = -2*φ(2,2) + 16*φ(3,2) a5_4 = 8*φ(2,2) - 32*φ(3,2) a6_4 = 8*φ(2,6) - 32*φ(3,6) a6_5 = -2*φ(2,6) + 16*φ(3,6) a7_4 = (-125/162) * a6_4 a7_5 = (125/1944) * a6_4 - (4/3) * φ(2,7) + (40/3)*φ(3,7) a7_6 = (3125/3888) * a6_4 + (25/3) * φ(2,7) - (100/3)*φ(3,7) Φ = (5/32)*a6_4 - (25/28)*φ(2,6) + (81/175)*φ(2,7) - (162/25)*φ(3,7) + (150/7)*φ(4,6) + (972/35)*φ(4,7) + 6*φ(4) a8_5 = -(16/3)*φ(2) + (208/3)*φ(3) - 40*Φ a8_6 = (250/21)*φ(2) - (250/3)*φ(3) + (250/7)*Φ a8_7 = (27/14)*φ(2) - 27*φ(3) + (135/7)*Φ b6 = (125/14)*φ(2) - (625/14)*φ(3) + (1125/14)*φ(4) b7 = (-27/14)*φ(2) + (162/7) *φ(3) - (405/7) *φ(4) b8 = (1/2) *φ(2) - (13/2) *φ(3) + (45/2) *φ(4) a = [ [0 , 0 , 0 , 0 , 0 , 0 , 0 , 0], [0 , 0 , 0 , 0 , 0 , 0 , 0 , 0], [0 , a3_2, 0 , 0 , 0 , 0 , 0 , 0], [0 , 0 , a4_3, 0 , 0 , 0 , 0 , 0], [0 , 0 , a5_3, a5_4, 0 , 0 , 0 , 0], [0 , 0 , 0 , a6_4, a6_5, 0 , 0 , 0], [0 , 0 , 0 , a7_4, a7_5, a7_6, 0 , 0], [0 , 0 , 0 , 0 , a8_5, a8_6, a8_7, 0], ] b = [ [0, 0, 0, 0, 0, b6, b7, b8], ] a, b = gen_first_col_exp(a,b,ci,φ) a = [[float(val) for val in row] for row in a] b = [[float(val) for val in row] for row in b] ci = [c1, c2, c3, c4, c5, c6, c7, c8] case "res_10s": c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 = 0, 1/2, 1/2, 1/3, 1/2, 1/3, 1/4, 3/10, 3/4, 1 ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10] #φ = Phi(h, ci, analytic_solution=use_analytic_solution) ci = [mpf(c_val) for c_val in ci] c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 = [c_val for c_val in ci] φ = Phi(mpf(h.item()), ci, analytic_solution=use_analytic_solution) a3_2 = (c3**2 / c2) * φ(2,3) a4_2 = (c4**2 / c2) * φ(2,4) b8 = (c9*c10*φ(2) - 2*(c9+c10)*φ(3) + 6*φ(4)) / (c8 * (c8-c9) * (c8-c10)) b9 = (c8*c10*φ(2) - 2*(c8+c10)*φ(3) + 6*φ(4)) / (c9 * (c9-c8) * (c9-c10)) b10 = (c8*c9*φ(2) - 2*(c8+c9) *φ(3) + 6*φ(4)) / (c10 * (c10-c8) * (c10-c9)) a = [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, a3_2, 0, 0, 0, 0, 0, 0, 0, 0], [0, a4_2, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], ] b = [ [0, 0, 0, 0, 0, 0, 0, b8, b9, b10], ] # a5_3, a5_4 # a6_3, a6_4 # a7_3, a7_4 for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k jk = [(3, 4), (4, 3)] jk = list(permutations([3, 4], 2)) for j,k in jk: a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / (ci[j-1] * (ci[j-1] - ci[k-1])) for i in range(8, 11): # i=8,9,10 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff jkl = list(permutations([5, 6, 7], 3)) for j,k,l in jkl: a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1])) gen_first_col_exp(a, b, ci, φ) a = [[float(val) for val in row] for row in a] b = [[float(val) for val in row] for row in b] c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 = 0, 1/2, 1/2, 1/3, 1/2, 1/3, 1/4, 3/10, 3/4, 1 ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10] case "res_15s": c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15 = 0, 1/2, 1/2, 1/3, 1/2, 1/5, 1/4, 18/25, 1/3, 3/10, 1/6, 90/103, 1/3, 3/10, 1/5 c1 = 0 c2 = c3 = c5 = 1/2 c4 = c9 = c13 = 1/3 c6 = c15 = 1/5 c7 = 1/4 c8 = 18/25 c10 = c14 = 3/10 c11 = 1/6 c12 = 90/103 c15 = 1/5 ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15] ci = [mpf(c_val) for c_val in ci] φ = Phi(mpf(h.item()), ci, analytic_solution=use_analytic_solution) a = [[mpf(0) for _ in range(15)] for _ in range(15)] b = [[mpf(0) for _ in range(15)]] for i in range(3, 5): # i=3,4 j=2 j=2 a[i-1][j-1] = (ci[i-1]**2 / ci[j-1]) * φ(j,i) for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k jk = list(permutations([3, 4], 2)) for j,k in jk: a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / prod_diff(ci[j-1], ci[k-1]) for i in range(8, 12): # i=8,9,10,11 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff jkl = list(permutations([5, 6, 7], 3)) for j,k,l in jkl: a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1])) for i in range(12,16): # i=12,13,14,15 jkld = list(permutations([8,9,10,11], 4)) for j,k,l,d in jkld: numerator = -ci[i-1]**2 * ci[d-1]*ci[k-1]*ci[l-1] * φ(2,i) + 2*ci[i-1]**3 * (ci[d-1]*ci[k-1] + ci[d-1]*ci[l-1] + ci[k-1]*ci[l-1]) * φ(3,i) - 6*ci[i-1]**4 * (ci[d-1] + ci[k-1] + ci[l-1]) * φ(4,i) + 24*ci[i-1]**5 * φ(5,i) a[i-1][j-1] = numerator / prod_diff(ci[j-1], ci[k-1], ci[l-1], ci[d-1]) """ijkl = list(permutations([12,13,14,15], 4)) for i,j,k,l in ijkl: #numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5) #b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1]) for jjj in range (2, 6): # 2,3,4,5 b[0][i-1] += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj) b[0][i-1] /= prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])""" ijkl = list(permutations([12,13,14,15], 4)) for i,j,k,l in ijkl: numerator = 0 for jjj in range(2, 6): # 2, 3, 4, 5 numerator += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj) #print(i,j,k,l) b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1]) ijkl = list(permutations([12, 13, 14, 15], 4)) selected_permutations = {} sign = 1 for i in range(12, 16): results = [] for j, k, l, d in ijkl: if i != j and i != k and i != l and i != d: numerator = 0 for jjj in range(2, 6): # 2, 3, 4, 5 numerator += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj) theta_value = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1]) results.append((theta_value, (i, j, k, l, d))) results.sort(key=lambda x: abs(x[0])) for theta_value, permutation in results: if sign == 1 and theta_value > 0: selected_permutations[i] = (theta_value, permutation) sign *= -1 break elif sign == -1 and theta_value < 0: selected_permutations[i] = (theta_value, permutation) sign *= -1 break for i in range(12, 16): if i in selected_permutations: theta_value, (i, j, k, l, d) = selected_permutations[i] b[0][i-1] = theta_value for i in selected_permutations: theta_value, permutation = selected_permutations[i] print(f"i={i}") print(f" Selected Theta: {theta_value:.6f}, Permutation: {permutation}") gen_first_col_exp(a, b, ci, φ) a = [[float(val) for val in row] for row in a] b = [[float(val) for val in row] for row in b] ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15] case "res_16s": # 6th order without weakened order conditions c1 = 0 c2 = c3 = c5 = c8 = c12 = 1/2 c4 = c11 = c15 = 1/3 c6 = c9 = c13 = 1/5 c7 = c10 = c14 = 1/4 c16 = 1 ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16] ci = [mpf(c_val) for c_val in ci] φ = Phi(mpf(h.item()), ci, analytic_solution=use_analytic_solution) a3_2 = (1/2) * φ(2,3) a = [[mpf(0) for _ in range(16)] for _ in range(16)] b = [[mpf(0) for _ in range(16)]] for i in range(3, 5): # i=3,4 j=2 j=2 a[i-1][j-1] = (ci[i-1]**2 / ci[j-1]) * φ(j,i) for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k jk = list(permutations([3, 4], 2)) for j,k in jk: a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / prod_diff(ci[j-1], ci[k-1]) for i in range(8, 12): # i=8,9,10,11 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff jkl = list(permutations([5, 6, 7], 3)) for j,k,l in jkl: a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1])) for i in range(12,17): # i=12,13,14,15,16 jkld = list(permutations([8,9,10,11], 4)) for j,k,l,d in jkld: numerator = -ci[i-1]**2 * ci[d-1]*ci[k-1]*ci[l-1] * φ(2,i) + 2*ci[i-1]**3 * (ci[d-1]*ci[k-1] + ci[d-1]*ci[l-1] + ci[k-1]*ci[l-1]) * φ(3,i) - 6*ci[i-1]**4 * (ci[d-1] + ci[k-1] + ci[l-1]) * φ(4,i) + 24*ci[i-1]**5 * φ(5,i) a[i-1][j-1] = numerator / prod_diff(ci[j-1], ci[k-1], ci[l-1], ci[d-1]) """ijdkl = list(permutations([12,13,14,15,16], 5)) for i,j,d,k,l in ijdkl: #numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5) b[0][i-1] = theta(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6) #b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])""" ijdkl = list(permutations([12,13,14,15,16], 5)) for i,j,d,k,l in ijdkl: #numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5) #numerator = theta_numerator(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta_numerator(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta_numerator(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta_numerator(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta_numerator(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6) #b[0][i-1] = numerator / (ci[i-1] *, ci[d-1], ci[j-1], ci[k-1], ci[l-1]) #b[0][i-1] = numerator / denominator(ci[i-1], ci[d-1], ci[j-1], ci[k-1], ci[l-1]) b[0][i-1] = theta(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6) ijdkl = list(permutations([12,13,14,15,16], 5)) for i,j,d,k,l in ijdkl: numerator = 0 for jjj in range(2, 7): # 2, 3, 4, 5, 6 numerator += theta_numerator(jjj, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(jjj) #print(i,j,d,k,l) b[0][i-1] = numerator / (ci[i-1] * (ci[i-1] - ci[k-1]) * (ci[i-1] - ci[j-1] * (ci[i-1] - ci[d-1]) * (ci[i-1] - ci[l-1]))) gen_first_col_exp(a, b, ci, φ) a = [[float(val) for val in row] for row in a] b = [[float(val) for val in row] for row in b] ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16] case "irk_exp_diag_2s": c1 = 1/3 c2 = 2/3 c1 = float(get_extra_options_kv("c1", str(c1), extra_options)) c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) lam = (1 - torch.exp(-c1 * h)) / h a2_1 = ( torch.exp(c2*h) - torch.exp(c1*h)) / (h * torch.exp(2*c1*h)) b1 = (1 + c2*h + torch.exp(h) * (-1 + h - c2*h)) / ((c1-c2) * h**2 * torch.exp(c1*h)) b2 = -(1 + c1*h - torch.exp(h) * ( 1 - h + c1*h)) / ((c1-c2) * h**2 * torch.exp(c2*h)) a = [ [lam, 0], [a2_1, lam], ] b = [ [b1, b2], ] ci = [c1, c2] ci = ci[:] #if rk_type.startswith("lob") == False: ci.append(1) if EO("exp2lin_override_coeff") and is_exponential(rk_type): a = scale_all(a, -sigma.item()) b = scale_all(b, -sigma.item()) return a, b, u, v, ci, multistep_stages, hybrid_stages, FSAL def scale_all(data, scalar): if isinstance(data, torch.Tensor): return data * scalar elif isinstance(data, list): return [scale_all(x, scalar) for x in data] elif isinstance(data, (float, int)): return data * scalar else: return data # passthrough unscaled if unknown type... or None, etc def gen_first_col_exp(a, b, c, φ): for i in range(len(c)): a[i][0] = c[i] * φ(1,i+1) - sum(a[i]) for i in range(len(b)): b[i][0] = φ(1) - sum(b[i]) return a, b def gen_first_col_exp_uv(a, b, c, u, v, φ): for i in range(len(c)): a[i][0] = c[i] * φ(1,i+1) - sum(a[i]) - sum(u[i]) for i in range(len(b)): b[i][0] = φ(1) - sum(b[i]) - sum(v[i]) return a, b def rho(j, ci, ck, cl): if j == 2: numerator = ck*cl if j == 3: numerator = (-2 * (ck + cl)) if j == 4: numerator = 6 return numerator / denominator(ci, ck, cl) def mu(j, cd, ci, ck, cl): if j == 2: numerator = -cd * ck * cl if j == 3: numerator = 2 * (cd * ck + cd * cl + ck * cl) if j == 4: numerator = -6 * (cd + ck + cl) if j == 5: numerator = 24 return numerator / denominator(ci, cd, ck, cl) def mu_numerator(j, cd, ci, ck, cl): if j == 2: numerator = -cd * ck * cl if j == 3: numerator = 2 * (cd * ck + cd * cl + ck * cl) if j == 4: numerator = -6 * (cd + ck + cl) if j == 5: numerator = 24 return numerator #/ denominator(ci, cd, ck, cl) def theta_numerator(j, cd, ci, ck, cj, cl): if j == 2: numerator = -cj * cd * ck * cl if j == 3: numerator = 2 * (cj * ck * cd + cj*ck*cl + ck*cd*cl + cd*cl*cj) if j == 4: numerator = -6*(cj*ck + cj*cd + cj*cl + ck*cd + ck*cl + cd*cl) if j == 5: numerator = 24 * (cj + ck + cl + cd) if j == 6: numerator = -120 return numerator # / denominator(ci, cj, ck, cl, cd) def theta(j, cd, ci, ck, cj, cl): if j == 2: numerator = -cj * cd * ck * cl if j == 3: numerator = 2 * (cj * ck * cd + cj*ck*cl + ck*cd*cl + cd*cl*cj) if j == 4: numerator = -6*(cj*ck + cj*cd + cj*cl + ck*cd + ck*cl + cd*cl) if j == 5: numerator = 24 * (cj + ck + cl + cd) if j == 6: numerator = -120 return numerator / ( ci * (ci - cj) * (ci - ck) * (ci - cl) * (ci - cd)) return numerator / denominator(ci, cj, ck, cl, cd) def prod_diff(cj, ck, cl=None, cd=None): if cl is None and cd is None: return cj * (cj - ck) if cd is None: return cj * (cj - ck) * (cj - cl) else: return cj * (cj - ck) * (cj - cl) * (cj - cd) def denominator(ci, *args): result = ci for arg in args: result *= (ci - arg) return result def check_condition_4_2(nodes): c12, c13, c14, c15 = nodes term_1 = (1 / 5) * (c12 + c13 + c14 + c15) term_2 = (1 / 4) * (c12 * c13 + c12 * c14 + c12 * c15 + c13 * c14 + c13 * c15 + c14 * c15) term_3 = (1 / 3) * (c12 * c13 * c14 + c12 * c13 * c15 + c12 * c14 * c15 + c13 * c14 * c15) term_4 = (1 / 2) * (c12 * c13 * c14 * c15) result = term_1 - term_2 + term_3 - term_4 return abs(result - (1 / 6)) < 1e-6 ================================================ FILE: beta/rk_guide_func_beta.py ================================================ import torch import torch.nn.functional as F from torch import Tensor import itertools import copy from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar if TYPE_CHECKING: from .noise_classes import NoiseGenerator NoiseGeneratorSubclass = TypeVar("NoiseGeneratorSubclass", bound="NoiseGenerator") from einops import rearrange from ..sigmas import get_sigmas from ..helper import ExtraOptions, FrameWeightsManager, initialize_or_scale, is_video_model from ..latents import normalize_zscore, get_collinear, get_orthogonal, get_cosine_similarity, get_pearson_similarity, \ get_slerp_weight_for_cossim, normalize_latent, hard_light_blend, slerp_tensor, get_orthogonal_noise_from_channelwise, get_edge_mask from .rk_method_beta import RK_Method_Beta from .constants import MAX_STEPS from ..models import PRED #from ..latents import hard_light_blend, normalize_latent class LatentGuide: def __init__(self, model, sigmas : Tensor, UNSAMPLE : bool, VE_MODEL : bool, LGW_MASK_RESCALE_MIN : bool, extra_options : str, device : str = 'cpu', dtype : torch.dtype = torch.float64, frame_weights_mgr : FrameWeightsManager = None, ): self.dtype = dtype self.device = device self.model = model if hasattr(model, "model"): model_sampling = model.model.model_sampling elif hasattr(model, "inner_model"): model_sampling = model.inner_model.inner_model.model_sampling self.sigma_min = model_sampling.sigma_min.to(dtype=dtype, device=device) self.sigma_max = model_sampling.sigma_max.to(dtype=dtype, device=device) self.sigmas = sigmas .to(dtype=dtype, device=device) self.UNSAMPLE = UNSAMPLE self.VE_MODEL = VE_MODEL self.VIDEO = is_video_model(model) self.SAMPLE = (sigmas[0] > sigmas[1]) # type torch.bool self.y0 = None self.y0_inv = None self.y0_mean = None self.y0_adain = None self.y0_attninj = None self.y0_style_pos = None self.y0_style_neg = None self.guide_mode = "" self.max_steps = MAX_STEPS self.mask = None self.mask_inv = None self.mask_sync = None self.mask_drift_x = None self.mask_drift_y = None self.mask_lure_x = None self.mask_lure_y = None self.mask_mean = None self.mask_adain = None self.mask_attninj = None self.mask_style_pos = None self.mask_style_neg = None self.x_lying_ = None self.s_lying_ = None self.LGW_MASK_RESCALE_MIN = LGW_MASK_RESCALE_MIN self.HAS_LATENT_GUIDE = False self.HAS_LATENT_GUIDE_INV = False self.HAS_LATENT_GUIDE_MEAN = False self.HAS_LATENT_GUIDE_ADAIN = False self.HAS_LATENT_GUIDE_ATTNINJ = False self.HAS_LATENT_GUIDE_STYLE_POS= False self.HAS_LATENT_GUIDE_STYLE_NEG= False self.lgw = torch.full_like(sigmas, 0., dtype=dtype) self.lgw_inv = torch.full_like(sigmas, 0., dtype=dtype) self.lgw_mean = torch.full_like(sigmas, 0., dtype=dtype) self.lgw_adain = torch.full_like(sigmas, 0., dtype=dtype) self.lgw_attninj = torch.full_like(sigmas, 0., dtype=dtype) self.lgw_style_pos = torch.full_like(sigmas, 0., dtype=dtype) self.lgw_style_neg = torch.full_like(sigmas, 0., dtype=dtype) self.cossim_tgt = torch.full_like(sigmas, 0., dtype=dtype) self.cossim_tgt_inv = torch.full_like(sigmas, 0., dtype=dtype) self.guide_cossim_cutoff_ = 1.0 self.guide_bkg_cossim_cutoff_ = 1.0 self.guide_mean_cossim_cutoff_ = 1.0 self.guide_adain_cossim_cutoff_ = 1.0 self.guide_attninj_cossim_cutoff_ = 1.0 self.guide_style_pos_cossim_cutoff_= 1.0 self.guide_style_neg_cossim_cutoff_= 1.0 self.frame_weights_mgr = frame_weights_mgr self.frame_weights = None self.frame_weights_inv = None #self.freqsep_lowpass_method = "none" #self.freqsep_sigma = 0. #self.freqsep_kernel_size = 0 self.extra_options = extra_options self.EO = ExtraOptions(extra_options) def init_guides(self, x : Tensor, RK_IMPLICIT : bool, guides : Optional[Tensor] = None, noise_sampler : Optional["NoiseGeneratorSubclass"] = None, batch_num : int = 0, sigma_init = None, guide_inversion_y0 = None, guide_inversion_y0_inv = None, ) -> Tensor: latent_guide_weight = 0.0 latent_guide_weight_inv = 0.0 latent_guide_weight_sync = 0.0 latent_guide_weight_sync_inv = 0.0 latent_guide_weight_drift_x = 0.0 latent_guide_weight_drift_x_inv = 0.0 latent_guide_weight_drift_y = 0.0 latent_guide_weight_drift_y_inv = 0.0 latent_guide_weight_lure_x = 0.0 latent_guide_weight_lure_x_inv = 0.0 latent_guide_weight_lure_y = 0.0 latent_guide_weight_lure_y_inv = 0.0 latent_guide_weight_mean = 0.0 latent_guide_weight_adain = 0.0 latent_guide_weight_attninj = 0.0 latent_guide_weight_style_pos = 0.0 latent_guide_weight_style_neg = 0.0 latent_guide_weights = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_sync = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_sync_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_drift_x = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_drift_x_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_drift_y = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_drift_y_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_lure_x = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_lure_x_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_lure_y = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_lure_y_inv = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_mean = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_adain = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_attninj = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_style_pos = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide_weights_style_neg = torch.zeros_like(self.sigmas, dtype=self.dtype, device=self.device) latent_guide = None latent_guide_inv = None latent_guide_mean = None latent_guide_adain = None latent_guide_attninj = None latent_guide_style_pos = None latent_guide_style_neg = None self.drift_x_data = 0.0 self.drift_x_sync = 0.0 self.drift_y_data = 0.0 self.drift_y_sync = 0.0 self.drift_y_guide = 0.0 if guides is not None: self.guide_mode = guides.get("guide_mode", "none") if self.guide_mode.startswith("inversion"): self.guide_mode = self.guide_mode.replace("inversion", "epsilon", 1) else: self.SAMPLE = True self.UNSAMPLE = False latent_guide_weight = guides.get("weight_masked", 0.) latent_guide_weight_inv = guides.get("weight_unmasked", 0.) latent_guide_weight_sync = guides.get("weight_masked_sync", 0.) latent_guide_weight_sync_inv = guides.get("weight_unmasked_sync", 0.) latent_guide_weight_drift_x = guides.get("weight_masked_drift_x", 0.) latent_guide_weight_drift_x_inv = guides.get("weight_unmasked_drift_x", 0.) latent_guide_weight_drift_y = guides.get("weight_masked_drift_y", 0.) latent_guide_weight_drift_y_inv = guides.get("weight_unmasked_drift_y", 0.) latent_guide_weight_lure_x = guides.get("weight_masked_lure_x", 0.) latent_guide_weight_lure_x_inv = guides.get("weight_unmasked_lure_x", 0.) latent_guide_weight_lure_y = guides.get("weight_masked_lure_y", 0.) latent_guide_weight_lure_y_inv = guides.get("weight_unmasked_lure_y", 0.) latent_guide_weight_mean = guides.get("weight_mean", 0.) latent_guide_weight_adain = guides.get("weight_adain", 0.) latent_guide_weight_attninj = guides.get("weight_attninj", 0.) latent_guide_weight_style_pos = guides.get("weight_style_pos", 0.) latent_guide_weight_style_neg = guides.get("weight_style_neg", 0.) #latent_guide_synweight_style_pos = guides.get("synweight_style_pos", 0.) #latent_guide_synweight_style_neg = guides.get("synweight_style_neg", 0.) self.drift_x_data = guides.get("drift_x_data", 0.) self.drift_x_sync = guides.get("drift_x_sync", 0.) self.drift_y_data = guides.get("drift_y_data", 0.) self.drift_y_sync = guides.get("drift_y_sync", 0.) self.drift_y_guide = guides.get("drift_y_guide", 0.) latent_guide_weights = guides.get("weights_masked") latent_guide_weights_inv = guides.get("weights_unmasked") latent_guide_weights_sync = guides.get("weights_masked_sync") latent_guide_weights_sync_inv = guides.get("weights_unmasked_sync") latent_guide_weights_drift_x = guides.get("weights_masked_drift_x") latent_guide_weights_drift_x_inv = guides.get("weights_unmasked_drift_x") latent_guide_weights_drift_y = guides.get("weights_masked_drift_y") latent_guide_weights_drift_y_inv = guides.get("weights_unmasked_drift_y") latent_guide_weights_lure_x = guides.get("weights_masked_lure_x") latent_guide_weights_lure_x_inv = guides.get("weights_unmasked_lure_x") latent_guide_weights_lure_y = guides.get("weights_masked_lure_y") latent_guide_weights_lure_y_inv = guides.get("weights_unmasked_lure_y") latent_guide_weights_mean = guides.get("weights_mean") latent_guide_weights_adain = guides.get("weights_adain") latent_guide_weights_attninj = guides.get("weights_attninj") latent_guide_weights_style_pos = guides.get("weights_style_pos") latent_guide_weights_style_neg = guides.get("weights_style_neg") #latent_guide_synweights_style_p os = guides.get("synweights_style_pos") #latent_guide_synweights_style_neg = guides.get("synweights_style_neg") latent_guide = guides.get("guide_masked") latent_guide_inv = guides.get("guide_unmasked") latent_guide_mean = guides.get("guide_mean") latent_guide_adain = guides.get("guide_adain") latent_guide_attninj = guides.get("guide_attninj") latent_guide_style_pos = guides.get("guide_style_pos") latent_guide_style_neg = guides.get("guide_style_neg") self.mask = guides.get("mask") self.mask_inv = guides.get("unmask") self.mask_sync = guides.get("mask_sync") self.mask_drift_x = guides.get("mask_drift_x") self.mask_drift_y = guides.get("mask_drift_y") self.mask_lure_x = guides.get("mask_lure_x") self.mask_lure_y = guides.get("mask_lure_y") self.mask_mean = guides.get("mask_mean") self.mask_adain = guides.get("mask_adain") self.mask_attninj = guides.get("mask_attninj") self.mask_style_pos = guides.get("mask_style_pos") self.mask_style_neg = guides.get("mask_style_neg") scheduler_ = guides.get("weight_scheduler_masked") scheduler_inv_ = guides.get("weight_scheduler_unmasked") scheduler_sync_ = guides.get("weight_scheduler_masked_sync") scheduler_sync_inv_ = guides.get("weight_scheduler_unmasked_sync") scheduler_drift_x_ = guides.get("weight_scheduler_masked_drift_x") scheduler_drift_x_inv_ = guides.get("weight_scheduler_unmasked_drift_x") scheduler_drift_y_ = guides.get("weight_scheduler_masked_drift_y") scheduler_drift_y_inv_ = guides.get("weight_scheduler_unmasked_drift_y") scheduler_lure_x_ = guides.get("weight_scheduler_masked_lure_x") scheduler_lure_x_inv_ = guides.get("weight_scheduler_unmasked_lure_x") scheduler_lure_y_ = guides.get("weight_scheduler_masked_lure_y") scheduler_lure_y_inv_ = guides.get("weight_scheduler_unmasked_lure_y") scheduler_mean_ = guides.get("weight_scheduler_mean") scheduler_adain_ = guides.get("weight_scheduler_adain") scheduler_attninj_ = guides.get("weight_scheduler_attninj") scheduler_style_pos_ = guides.get("weight_scheduler_style_pos") scheduler_style_neg_ = guides.get("weight_scheduler_style_neg") start_steps_ = guides.get("start_step_masked", 0) start_steps_inv_ = guides.get("start_step_unmasked", 0) start_steps_sync_ = guides.get("start_step_masked_sync", 0) start_steps_sync_inv_ = guides.get("start_step_unmasked_sync", 0) start_steps_drift_x_ = guides.get("start_step_masked_drift_x", 0) start_steps_drift_x_inv_ = guides.get("start_step_unmasked_drift_x", 0) start_steps_drift_y_ = guides.get("start_step_masked_drift_y", 0) start_steps_drift_y_inv_ = guides.get("start_step_unmasked_drift_y", 0) start_steps_lure_x_ = guides.get("start_step_masked_lure_x", 0) start_steps_lure_x_inv_ = guides.get("start_step_unmasked_lure_x", 0) start_steps_lure_y_ = guides.get("start_step_masked_lure_y", 0) start_steps_lure_y_inv_ = guides.get("start_step_unmasked_lure_y", 0) start_steps_mean_ = guides.get("start_step_mean", 0) start_steps_adain_ = guides.get("start_step_adain", 0) start_steps_attninj_ = guides.get("start_step_attninj", 0) start_steps_style_pos_ = guides.get("start_step_style_pos", 0) start_steps_style_neg_ = guides.get("start_step_style_neg", 0) steps_ = guides.get("end_step_masked", 1) steps_inv_ = guides.get("end_step_unmasked", 1) steps_sync_ = guides.get("end_step_masked_sync", 1) steps_sync_inv_ = guides.get("end_step_unmasked_sync", 1) steps_drift_x_ = guides.get("end_step_masked_drift_x", 1) steps_drift_x_inv_ = guides.get("end_step_unmasked_drift_x", 1) steps_drift_y_ = guides.get("end_step_masked_drift_y", 1) steps_drift_y_inv_ = guides.get("end_step_unmasked_drift_y", 1) steps_lure_x_ = guides.get("end_step_masked_lure_x", 1) steps_lure_x_inv_ = guides.get("end_step_unmasked_lure_x", 1) steps_lure_y_ = guides.get("end_step_masked_lure_y", 1) steps_lure_y_inv_ = guides.get("end_step_unmasked_lure_y", 1) steps_mean_ = guides.get("end_step_mean", 1) steps_adain_ = guides.get("end_step_adain", 1) steps_attninj_ = guides.get("end_step_attninj", 1) steps_style_pos_ = guides.get("end_step_style_pos", 1) steps_style_neg_ = guides.get("end_step_style_neg", 1) self.guide_cossim_cutoff_ = guides.get("cutoff_masked", 1.) self.guide_bkg_cossim_cutoff_ = guides.get("cutoff_unmasked", 1.) self.guide_mean_cossim_cutoff_ = guides.get("cutoff_mean", 1.) self.guide_adain_cossim_cutoff_ = guides.get("cutoff_adain", 1.) self.guide_attninj_cossim_cutoff_ = guides.get("cutoff_attninj", 1.) self.guide_style_pos_cossim_cutoff_ = guides.get("cutoff_style_pos", 1.) self.guide_style_neg_cossim_cutoff_ = guides.get("cutoff_style_neg", 1.) self.sync_lure_iter = guides.get("sync_lure_iter", 0) self.sync_lure_sequence = guides.get("sync_lure_sequence") #self.SYNC_SEPARATE = False #if scheduler_sync_ is not None: # self.SYNC_SEPARATE = True self.SYNC_SEPARATE = True if scheduler_sync_ is None and scheduler_ is not None: latent_guide_weight_sync = latent_guide_weight latent_guide_weight_sync_inv = latent_guide_weight_inv latent_guide_weights_sync = latent_guide_weights latent_guide_weights_sync_inv = latent_guide_weights_inv scheduler_sync_ = scheduler_ scheduler_sync_inv_ = scheduler_inv_ start_steps_sync_ = start_steps_ start_steps_sync_inv_ = start_steps_inv_ steps_sync_ = steps_ steps_sync_inv_ = steps_inv_ self.SYNC_drift_X = True if scheduler_drift_x_ is None and scheduler_ is not None: self.SYNC_drift_X = False latent_guide_weight_drift_x = latent_guide_weight latent_guide_weight_drift_x_inv = latent_guide_weight_inv latent_guide_weights_drift_x = latent_guide_weights latent_guide_weights_drift_x_inv = latent_guide_weights_inv scheduler_drift_x_ = scheduler_ scheduler_drift_x_inv_ = scheduler_inv_ start_steps_drift_x_ = start_steps_ start_steps_drift_x_inv_ = start_steps_inv_ steps_drift_x_ = steps_ steps_drift_x_inv_ = steps_inv_ self.SYNC_drift_Y = True if scheduler_drift_y_ is None and scheduler_ is not None: self.SYNC_drift_Y = False latent_guide_weight_drift_y = latent_guide_weight latent_guide_weight_drift_y_inv = latent_guide_weight_inv latent_guide_weights_drift_y = latent_guide_weights latent_guide_weights_drift_y_inv = latent_guide_weights_inv scheduler_drift_y_ = scheduler_ scheduler_drift_y_inv_ = scheduler_inv_ start_steps_drift_y_ = start_steps_ start_steps_drift_y_inv_ = start_steps_inv_ steps_drift_y_ = steps_ steps_drift_y_inv_ = steps_inv_ self.SYNC_LURE_X = True if scheduler_lure_x_ is None and scheduler_ is not None: self.SYNC_LURE_X = False latent_guide_weight_lure_x = latent_guide_weight latent_guide_weight_lure_x_inv = latent_guide_weight_inv latent_guide_weights_lure_x = latent_guide_weights latent_guide_weights_lure_x_inv = latent_guide_weights_inv scheduler_lure_x_ = scheduler_ scheduler_lure_x_inv_ = scheduler_inv_ start_steps_lure_x_ = start_steps_ start_steps_lure_x_inv_ = start_steps_inv_ steps_lure_x_ = steps_ steps_lure_x_inv_ = steps_inv_ self.SYNC_LURE_Y = True if scheduler_lure_y_ is None and scheduler_ is not None: self.SYNC_LURE_Y = False latent_guide_weight_lure_y = latent_guide_weight latent_guide_weight_lure_y_inv = latent_guide_weight_inv latent_guide_weights_lure_y = latent_guide_weights latent_guide_weights_lure_y_inv = latent_guide_weights_inv scheduler_lure_y_ = scheduler_ scheduler_lure_y_inv_ = scheduler_inv_ start_steps_lure_y_ = start_steps_ start_steps_lure_y_inv_ = start_steps_inv_ steps_lure_y_ = steps_ steps_lure_y_inv_ = steps_inv_ if self.mask is not None and self.mask.shape [0] > 1 and self.VIDEO is False: self.mask = self.mask [batch_num].unsqueeze(0) if self.mask_inv is not None and self.mask_inv.shape[0] > 1 and self.VIDEO is False: self.mask_inv = self.mask_inv[batch_num].unsqueeze(0) if self.mask_sync is not None and self.mask_sync.shape[0] > 1 and self.VIDEO is False: self.mask_sync = self.mask_sync[batch_num].unsqueeze(0) if self.mask_drift_x is not None and self.mask_drift_x.shape[0] > 1 and self.VIDEO is False: self.mask_drift_x = self.mask_drift_x[batch_num].unsqueeze(0) if self.mask_drift_y is not None and self.mask_drift_y.shape[0] > 1 and self.VIDEO is False: self.mask_drift_y = self.mask_drift_y[batch_num].unsqueeze(0) if self.mask_lure_x is not None and self.mask_lure_x.shape[0] > 1 and self.VIDEO is False: self.mask_lure_x = self.mask_lure_x[batch_num].unsqueeze(0) if self.mask_lure_y is not None and self.mask_lure_y.shape[0] > 1 and self.VIDEO is False: self.mask_lure_y = self.mask_lure_y[batch_num].unsqueeze(0) if self.guide_mode.startswith("fully_") and not RK_IMPLICIT: self.guide_mode = self.guide_mode[6:] # fully_pseudoimplicit is only supported for implicit samplers, default back to pseudoimplicit guide_sigma_shift = self.EO("guide_sigma_shift", 0.0) # effectively hardcoding shift to 0 !!!!!! if latent_guide_weights is None and scheduler_ is not None: total_steps = steps_ - start_steps_ latent_guide_weights = get_sigmas(self.model, scheduler_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_, dtype=self.dtype, device=self.device) latent_guide_weights = torch.cat((prepend, latent_guide_weights.to(self.device)), dim=0) if latent_guide_weights_inv is None and scheduler_inv_ is not None: total_steps = steps_inv_ - start_steps_inv_ latent_guide_weights_inv = get_sigmas(self.model, scheduler_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_inv_, dtype=self.dtype, device=self.device) latent_guide_weights_inv = torch.cat((prepend, latent_guide_weights_inv.to(self.device)), dim=0) if latent_guide_weights_sync is None and scheduler_sync_ is not None: total_steps = steps_sync_ - start_steps_sync_ latent_guide_weights_sync = get_sigmas(self.model, scheduler_sync_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_sync_, dtype=self.dtype, device=self.device) latent_guide_weights_sync = torch.cat((prepend, latent_guide_weights_sync.to(self.device)), dim=0) if latent_guide_weights_sync_inv is None and scheduler_sync_inv_ is not None: total_steps = steps_sync_inv_ - start_steps_sync_inv_ latent_guide_weights_sync_inv = get_sigmas(self.model, scheduler_sync_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_sync_inv_, dtype=self.dtype, device=self.device) latent_guide_weights_sync_inv = torch.cat((prepend, latent_guide_weights_sync_inv.to(self.device)), dim=0) if latent_guide_weights_drift_x is None and scheduler_drift_x_ is not None: total_steps = steps_drift_x_ - start_steps_drift_x_ latent_guide_weights_drift_x = get_sigmas(self.model, scheduler_drift_x_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_drift_x_, dtype=self.dtype, device=self.device) latent_guide_weights_drift_x = torch.cat((prepend, latent_guide_weights_drift_x.to(self.device)), dim=0) if latent_guide_weights_drift_x_inv is None and scheduler_drift_x_inv_ is not None: total_steps = steps_drift_x_inv_ - start_steps_drift_x_inv_ latent_guide_weights_drift_x_inv = get_sigmas(self.model, scheduler_drift_x_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_drift_x_inv_, dtype=self.dtype, device=self.device) latent_guide_weights_drift_x_inv = torch.cat((prepend, latent_guide_weights_drift_x_inv.to(self.device)), dim=0) if latent_guide_weights_drift_y is None and scheduler_drift_y_ is not None: total_steps = steps_drift_y_ - start_steps_drift_y_ latent_guide_weights_drift_y = get_sigmas(self.model, scheduler_drift_y_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_drift_y_, dtype=self.dtype, device=self.device) latent_guide_weights_drift_y = torch.cat((prepend, latent_guide_weights_drift_y.to(self.device)), dim=0) if latent_guide_weights_drift_y_inv is None and scheduler_drift_y_inv_ is not None: total_steps = steps_drift_y_inv_ - start_steps_drift_y_inv_ latent_guide_weights_drift_y_inv = get_sigmas(self.model, scheduler_drift_y_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_drift_y_inv_, dtype=self.dtype, device=self.device) latent_guide_weights_drift_y_inv = torch.cat((prepend, latent_guide_weights_drift_y_inv.to(self.device)), dim=0) if latent_guide_weights_lure_x is None and scheduler_lure_x_ is not None: total_steps = steps_lure_x_ - start_steps_lure_x_ latent_guide_weights_lure_x = get_sigmas(self.model, scheduler_lure_x_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_lure_x_, dtype=self.dtype, device=self.device) latent_guide_weights_lure_x = torch.cat((prepend, latent_guide_weights_lure_x.to(self.device)), dim=0) if latent_guide_weights_lure_x_inv is None and scheduler_lure_x_inv_ is not None: total_steps = steps_lure_x_inv_ - start_steps_lure_x_inv_ latent_guide_weights_lure_x_inv = get_sigmas(self.model, scheduler_lure_x_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_lure_x_inv_, dtype=self.dtype, device=self.device) latent_guide_weights_lure_x_inv = torch.cat((prepend, latent_guide_weights_lure_x_inv.to(self.device)), dim=0) if latent_guide_weights_lure_y is None and scheduler_lure_y_ is not None: total_steps = steps_lure_y_ - start_steps_lure_y_ latent_guide_weights_lure_y = get_sigmas(self.model, scheduler_lure_y_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_lure_y_, dtype=self.dtype, device=self.device) latent_guide_weights_lure_y = torch.cat((prepend, latent_guide_weights_lure_y.to(self.device)), dim=0) if latent_guide_weights_lure_y_inv is None and scheduler_lure_y_inv_ is not None: total_steps = steps_lure_y_inv_ - start_steps_lure_y_inv_ latent_guide_weights_lure_y_inv = get_sigmas(self.model, scheduler_lure_y_inv_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_lure_y_inv_, dtype=self.dtype, device=self.device) latent_guide_weights_lure_y_inv = torch.cat((prepend, latent_guide_weights_lure_y_inv.to(self.device)), dim=0) if latent_guide_weights_mean is None and scheduler_mean_ is not None: total_steps = steps_mean_ - start_steps_mean_ latent_guide_weights_mean = get_sigmas(self.model, scheduler_mean_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_mean_, dtype=self.dtype, device=self.device) latent_guide_weights_mean = torch.cat((prepend, latent_guide_weights_mean.to(self.device)), dim=0) if latent_guide_weights_adain is None and scheduler_adain_ is not None: total_steps = steps_adain_ - start_steps_adain_ latent_guide_weights_adain = get_sigmas(self.model, scheduler_adain_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_adain_, dtype=self.dtype, device=self.device) latent_guide_weights_adain = torch.cat((prepend, latent_guide_weights_adain.to(self.device)), dim=0) if latent_guide_weights_attninj is None and scheduler_attninj_ is not None: total_steps = steps_attninj_ - start_steps_attninj_ latent_guide_weights_attninj = get_sigmas(self.model, scheduler_attninj_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_attninj_, dtype=self.dtype, device=self.device) latent_guide_weights_attninj = torch.cat((prepend, latent_guide_weights_attninj.to(self.device)), dim=0) if latent_guide_weights_style_pos is None and scheduler_style_pos_ is not None: total_steps = steps_style_pos_ - start_steps_style_pos_ latent_guide_weights_style_pos = get_sigmas(self.model, scheduler_style_pos_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_style_pos_, dtype=self.dtype, device=self.device) latent_guide_weights_style_pos = torch.cat((prepend, latent_guide_weights_style_pos.to(self.device)), dim=0) if latent_guide_weights_style_neg is None and scheduler_style_neg_ is not None: total_steps = steps_style_neg_ - start_steps_style_neg_ latent_guide_weights_style_neg = get_sigmas(self.model, scheduler_style_neg_, total_steps, 1.0, shift=guide_sigma_shift).to(dtype=self.dtype, device=self.device) / self.sigma_max prepend = torch.zeros(start_steps_style_neg_, dtype=self.dtype, device=self.device) latent_guide_weights_style_neg = torch.cat((prepend, latent_guide_weights_style_neg.to(self.device)), dim=0) if scheduler_ != "constant": latent_guide_weights = initialize_or_scale(latent_guide_weights, latent_guide_weight, self.max_steps) if scheduler_inv_ != "constant": latent_guide_weights_inv = initialize_or_scale(latent_guide_weights_inv, latent_guide_weight_inv, self.max_steps) if scheduler_sync_ != "constant": latent_guide_weights_sync = initialize_or_scale(latent_guide_weights_sync, latent_guide_weight_sync, self.max_steps) if scheduler_sync_inv_ != "constant": latent_guide_weights_sync_inv = initialize_or_scale(latent_guide_weights_sync_inv, latent_guide_weight_sync_inv, self.max_steps) latent_guide_weights_sync = 1 - latent_guide_weights_sync if latent_guide_weights_sync is not None else latent_guide_weights latent_guide_weights_sync_inv = 1 - latent_guide_weights_sync_inv if latent_guide_weights_sync_inv is not None else latent_guide_weights_inv latent_guide_weight_sync = 1 - latent_guide_weight_sync latent_guide_weight_sync_inv = 1 - latent_guide_weight_sync_inv# these are more intuitive to use if these are reversed... so that sync weight = 1.0 means "maximum guide strength" if scheduler_drift_x_ != "constant": latent_guide_weights_drift_x = initialize_or_scale(latent_guide_weights_drift_x, latent_guide_weight_drift_x, self.max_steps) if scheduler_drift_x_inv_ != "constant": latent_guide_weights_drift_x_inv = initialize_or_scale(latent_guide_weights_drift_x_inv, latent_guide_weight_drift_x_inv, self.max_steps) if scheduler_drift_y_ != "constant": latent_guide_weights_drift_y = initialize_or_scale(latent_guide_weights_drift_y, latent_guide_weight_drift_y, self.max_steps) if scheduler_drift_y_inv_ != "constant": latent_guide_weights_drift_y_inv = initialize_or_scale(latent_guide_weights_drift_y_inv, latent_guide_weight_drift_y_inv, self.max_steps) if scheduler_lure_x_ != "constant": latent_guide_weights_lure_x = initialize_or_scale(latent_guide_weights_lure_x, latent_guide_weight_lure_x, self.max_steps) if scheduler_lure_x_inv_ != "constant": latent_guide_weights_lure_x_inv = initialize_or_scale(latent_guide_weights_lure_x_inv, latent_guide_weight_lure_x_inv, self.max_steps) if scheduler_lure_y_ != "constant": latent_guide_weights_lure_y = initialize_or_scale(latent_guide_weights_lure_y, latent_guide_weight_lure_y, self.max_steps) if scheduler_lure_y_inv_ != "constant": latent_guide_weights_lure_y_inv = initialize_or_scale(latent_guide_weights_lure_y_inv, latent_guide_weight_lure_y_inv, self.max_steps) if scheduler_mean_ != "constant": latent_guide_weights_mean = initialize_or_scale(latent_guide_weights_mean, latent_guide_weight_mean, self.max_steps) if scheduler_adain_ != "constant": latent_guide_weights_adain = initialize_or_scale(latent_guide_weights_adain, latent_guide_weight_adain, self.max_steps) if scheduler_attninj_ != "constant": latent_guide_weights_attninj = initialize_or_scale(latent_guide_weights_attninj, latent_guide_weight_attninj, self.max_steps) if scheduler_style_pos_ != "constant": latent_guide_weights_style_pos = initialize_or_scale(latent_guide_weights_style_pos, latent_guide_weight_style_pos, self.max_steps) if scheduler_style_neg_ != "constant": latent_guide_weights_style_neg = initialize_or_scale(latent_guide_weights_style_neg, latent_guide_weight_style_neg, self.max_steps) latent_guide_weights [steps_ :] = 0 latent_guide_weights_inv [steps_inv_ :] = 0 latent_guide_weights_sync [steps_sync_ :] = 1 #one latent_guide_weights_sync_inv [steps_sync_inv_ :] = 1 #one latent_guide_weights_drift_x [steps_drift_x_ :] = 0 latent_guide_weights_drift_x_inv[steps_drift_x_inv_:] = 0 latent_guide_weights_drift_y [steps_drift_y_ :] = 0 latent_guide_weights_drift_y_inv[steps_drift_y_inv_:] = 0 latent_guide_weights_lure_x [steps_lure_x_ :] = 0 latent_guide_weights_lure_x_inv [steps_lure_x_inv_ :] = 0 latent_guide_weights_lure_y [steps_lure_y_ :] = 0 latent_guide_weights_lure_y_inv [steps_lure_y_inv_ :] = 0 latent_guide_weights_mean [steps_mean_ :] = 0 latent_guide_weights_adain [steps_adain_ :] = 0 latent_guide_weights_attninj [steps_attninj_ :] = 0 latent_guide_weights_style_pos [steps_style_pos_ :] = 0 latent_guide_weights_style_neg [steps_style_neg_ :] = 0 self.lgw = F.pad(latent_guide_weights, (0, self.max_steps), value=0.0) self.lgw_inv = F.pad(latent_guide_weights_inv, (0, self.max_steps), value=0.0) self.lgw_sync = F.pad(latent_guide_weights_sync, (0, self.max_steps), value=1.0) #one self.lgw_sync_inv = F.pad(latent_guide_weights_sync_inv, (0, self.max_steps), value=1.0) #one self.lgw_drift_x = F.pad(latent_guide_weights_drift_x, (0, self.max_steps), value=0.0) self.lgw_drift_x_inv = F.pad(latent_guide_weights_drift_x_inv, (0, self.max_steps), value=0.0) self.lgw_drift_y = F.pad(latent_guide_weights_drift_y, (0, self.max_steps), value=0.0) self.lgw_drift_y_inv = F.pad(latent_guide_weights_drift_y_inv, (0, self.max_steps), value=0.0) self.lgw_lure_x = F.pad(latent_guide_weights_lure_x, (0, self.max_steps), value=0.0) self.lgw_lure_x_inv = F.pad(latent_guide_weights_lure_x_inv, (0, self.max_steps), value=0.0) self.lgw_lure_y = F.pad(latent_guide_weights_lure_y, (0, self.max_steps), value=0.0) self.lgw_lure_y_inv = F.pad(latent_guide_weights_lure_y_inv, (0, self.max_steps), value=0.0) self.lgw_mean = F.pad(latent_guide_weights_mean, (0, self.max_steps), value=0.0) self.lgw_adain = F.pad(latent_guide_weights_adain, (0, self.max_steps), value=0.0) self.lgw_attninj = F.pad(latent_guide_weights_attninj, (0, self.max_steps), value=0.0) self.lgw_style_pos = F.pad(latent_guide_weights_style_pos, (0, self.max_steps), value=0.0) self.lgw_style_neg = F.pad(latent_guide_weights_style_neg, (0, self.max_steps), value=0.0) mask, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask, self.LGW_MASK_RESCALE_MIN) self.mask = mask.to(dtype=self.dtype, device=self.device) if self.mask_inv is not None: mask_inv, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_inv, self.LGW_MASK_RESCALE_MIN) self.mask_inv = mask_inv.to(dtype=self.dtype, device=self.device) else: self.mask_inv = (1-self.mask) if self.mask_sync is not None: mask_sync, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_sync, self.LGW_MASK_RESCALE_MIN) self.mask_sync = mask_sync.to(dtype=self.dtype, device=self.device) else: self.mask_sync = self.mask if self.mask_drift_x is not None: mask_drift_x, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_drift_x, self.LGW_MASK_RESCALE_MIN) self.mask_drift_x = mask_drift_x.to(dtype=self.dtype, device=self.device) else: self.mask_drift_x = self.mask if self.mask_drift_y is not None: mask_drift_y, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_drift_y, self.LGW_MASK_RESCALE_MIN) self.mask_drift_y = mask_drift_y.to(dtype=self.dtype, device=self.device) else: self.mask_drift_y = self.mask if self.mask_lure_x is not None: mask_lure_x, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_lure_x, self.LGW_MASK_RESCALE_MIN) self.mask_lure_x = mask_lure_x.to(dtype=self.dtype, device=self.device) else: self.mask_lure_x = self.mask if self.mask_lure_y is not None: mask_lure_y, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_lure_y, self.LGW_MASK_RESCALE_MIN) self.mask_lure_y = mask_lure_y.to(dtype=self.dtype, device=self.device) else: self.mask_lure_y = self.mask mask_style_pos, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_style_pos, self.LGW_MASK_RESCALE_MIN) self.mask_style_pos = mask_style_pos.to(dtype=self.dtype, device=self.device) mask_style_neg, self.LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_style_neg, self.LGW_MASK_RESCALE_MIN) self.mask_style_neg = mask_style_neg.to(dtype=self.dtype, device=self.device) if latent_guide is not None: self.HAS_LATENT_GUIDE = True if type(latent_guide) is dict: if latent_guide ['samples'].shape[0] > 1: latent_guide['samples'] = latent_guide ['samples'][batch_num].unsqueeze(0) latent_guide_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide['samples']).clone().to(dtype=self.dtype, device=self.device) elif type(latent_guide) is torch.Tensor: latent_guide_samples = latent_guide.to(dtype=self.dtype, device=self.device) else: raise ValueError(f"Invalid latent type: {type(latent_guide)}") if self.VIDEO and latent_guide_samples.shape[2] == 1: latent_guide_samples = latent_guide_samples.repeat(1, 1, x.shape[2], 1, 1) if self.SAMPLE: self.y0 = latent_guide_samples elif sigma_init != 0.0: pass elif self.UNSAMPLE: # and self.mask is not None: mask = self.mask.to(x.device) x = (1-mask) * x + mask * latent_guide_samples.to(x.device) else: x = latent_guide_samples.to(x.device) else: self.y0 = torch.zeros_like(x, dtype=self.dtype, device=self.device) if latent_guide_inv is not None: self.HAS_LATENT_GUIDE_INV = True if type(latent_guide_inv) is dict: if latent_guide_inv['samples'].shape[0] > 1: latent_guide_inv['samples'] = latent_guide_inv['samples'][batch_num].unsqueeze(0) latent_guide_inv_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_inv['samples']).clone().to(dtype=self.dtype, device=self.device) elif type(latent_guide_inv) is torch.Tensor: latent_guide_inv_samples = latent_guide_inv.to(dtype=self.dtype, device=self.device) else: raise ValueError(f"Invalid latent type: {type(latent_guide_inv)}") if self.VIDEO and latent_guide_inv_samples.shape[2] == 1: latent_guide_inv_samples = latent_guide_inv_samples.repeat(1, 1, x.shape[2], 1, 1) if self.SAMPLE: self.y0_inv = latent_guide_inv_samples elif sigma_init != 0.0: pass elif self.UNSAMPLE: # and self.mask is not None: mask_inv = self.mask_inv.to(x.device) x = (1-mask_inv) * x + mask_inv * latent_guide_inv_samples.to(x.device) #fixed old approach, which was mask, (1-mask) else: x = latent_guide_inv_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_INV AFTER SETTING TO LG above! else: self.y0_inv = torch.zeros_like(x, dtype=self.dtype, device=self.device) if latent_guide_mean is not None: self.HAS_LATENT_GUIDE_MEAN = True if type(latent_guide_mean) is dict: if latent_guide_mean['samples'].shape[0] > 1: latent_guide_mean['samples'] = latent_guide_mean['samples'][batch_num].unsqueeze(0) latent_guide_mean_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_mean['samples']).clone().to(dtype=self.dtype, device=self.device) elif type(latent_guide_mean) is torch.Tensor: latent_guide_mean_samples = latent_guide_mean.to(dtype=self.dtype, device=self.device) else: raise ValueError(f"Invalid latent type: {type(latent_guide_mean)}") if self.VIDEO and latent_guide_mean_samples.shape[2] == 1: latent_guide_mean_samples = latent_guide_mean_samples.repeat(1, 1, x.shape[2], 1, 1) self.y0_mean = latent_guide_mean_samples """if self.SAMPLE: self.y0_mean = latent_guide_mean_samples elif self.UNSAMPLE: # and self.mask is not None: mask_mean = self.mask_mean.to(x.device) x = (1-mask_mean) * x + mask_mean * latent_guide_mean_samples.to(x.device) #fixed old approach, which was mask, (1-mask) # NECESSARY? else: x = latent_guide_mean_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_MEAN AFTER SETTING TO LG above!""" else: self.y0_mean = torch.zeros_like(x, dtype=self.dtype, device=self.device) if latent_guide_adain is not None: self.HAS_LATENT_GUIDE_ADAIN = True if type(latent_guide_adain) is dict: if latent_guide_adain['samples'].shape[0] > 1: latent_guide_adain['samples'] = latent_guide_adain['samples'][batch_num].unsqueeze(0) latent_guide_adain_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_adain['samples']).clone().to(dtype=self.dtype, device=self.device) elif type(latent_guide_adain) is torch.Tensor: latent_guide_adain_samples = latent_guide_adain.to(dtype=self.dtype, device=self.device) else: raise ValueError(f"Invalid latent type: {type(latent_guide_adain)}") if self.VIDEO and latent_guide_adain_samples.shape[2] == 1: latent_guide_adain_samples = latent_guide_adain_samples.repeat(1, 1, x.shape[2], 1, 1) self.y0_adain = latent_guide_adain_samples """if self.SAMPLE: self.y0_adain = latent_guide_adain_samples elif self.UNSAMPLE: # and self.mask is not None: if self.mask_adain is not None: mask_adain = self.mask_adain.to(x.device) x = (1-mask_adain) * x + mask_adain * latent_guide_adain_samples.to(x.device) #fixed old approach, which was mask, (1-mask) # NECESSARY? else: x = latent_guide_adain_samples.to(x.device) else: x = latent_guide_adain_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_ADAIN AFTER SETTING TO LG above!""" else: self.y0_adain = torch.zeros_like(x, dtype=self.dtype, device=self.device) if latent_guide_attninj is not None: self.HAS_LATENT_GUIDE_ATTNINJ = True if type(latent_guide_attninj) is dict: if latent_guide_attninj['samples'].shape[0] > 1: latent_guide_attninj['samples'] = latent_guide_attninj['samples'][batch_num].unsqueeze(0) latent_guide_attninj_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_attninj['samples']).clone().to(dtype=self.dtype, device=self.device) elif type(latent_guide_attninj) is torch.Tensor: latent_guide_attninj_samples = latent_guide_attninj.to(dtype=self.dtype, device=self.device) else: raise ValueError(f"Invalid latent type: {type(latent_guide_attninj)}") if self.VIDEO and latent_guide_attninj_samples.shape[2] == 1: latent_guide_attninj_samples = latent_guide_attninj_samples.repeat(1, 1, x.shape[2], 1, 1) self.y0_attninj = latent_guide_attninj_samples """if self.SAMPLE: self.y0_attninj = latent_guide_attninj_samples elif self.UNSAMPLE: # and self.mask is not None: if self.mask_attninj is not None: mask_attninj = self.mask_attninj.to(x.device) x = (1-mask_attninj) * x + mask_attninj * latent_guide_attninj_samples.to(x.device) #fixed old approach, which was mask, (1-mask) # NECESSARY? else: x = latent_guide_attninj_samples.to(x.device) else: x = latent_guide_attninj_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_ADAIN AFTER SETTING TO LG above!""" else: self.y0_attninj = torch.zeros_like(x, dtype=self.dtype, device=self.device) if latent_guide_style_pos is not None: self.HAS_LATENT_GUIDE_STYLE_POS = True if type(latent_guide_style_pos) is dict: if latent_guide_style_pos['samples'].shape[0] > 1: latent_guide_style_pos['samples'] = latent_guide_style_pos['samples'][batch_num].unsqueeze(0) latent_guide_style_pos_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_style_pos['samples']).clone().to(dtype=self.dtype, device=self.device) elif type(latent_guide_style_pos) is torch.Tensor: latent_guide_style_pos_samples = latent_guide_style_pos.to(dtype=self.dtype, device=self.device) else: raise ValueError(f"Invalid latent type: {type(latent_guide_style_pos)}") if self.VIDEO and latent_guide_style_pos_samples.shape[2] == 1: latent_guide_style_pos_samples = latent_guide_style_pos_samples.repeat(1, 1, x.shape[2], 1, 1) self.y0_style_pos = latent_guide_style_pos_samples """if self.SAMPLE: self.y0_style_pos = latent_guide_style_pos_samples elif self.UNSAMPLE: # and self.mask is not None: if self.mask_style_pos is not None: mask_style_pos = self.mask_style_pos.to(x.device) x = (1-mask_style_pos) * x + mask_style_pos * latent_guide_style_pos_samples.to(x.device) #fixed old approach, which was mask, (1-mask) # NECESSARY? else: x = latent_guide_style_pos_samples.to(x.device) else: x = latent_guide_style_pos_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_ADAIN AFTER SETTING TO LG above!""" else: self.y0_style_pos = torch.zeros_like(x, dtype=self.dtype, device=self.device) if latent_guide_style_neg is not None: self.HAS_LATENT_GUIDE_STYLE_NEG = True if type(latent_guide_style_neg) is dict: if latent_guide_style_neg['samples'].shape[0] > 1: latent_guide_style_neg['samples'] = latent_guide_style_neg['samples'][batch_num].unsqueeze(0) latent_guide_style_neg_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_style_neg['samples']).clone().to(dtype=self.dtype, device=self.device) elif type(latent_guide_style_neg) is torch.Tensor: latent_guide_style_neg_samples = latent_guide_style_neg.to(dtype=self.dtype, device=self.device) else: raise ValueError(f"Invalid latent type: {type(latent_guide_style_neg)}") if self.VIDEO and latent_guide_style_neg_samples.shape[2] == 1: latent_guide_style_neg_samples = latent_guide_style_neg_samples.repeat(1, 1, x.shape[2], 1, 1) self.y0_style_neg = latent_guide_style_neg_samples """if self.SAMPLE: self.y0_style_neg = latent_guide_style_neg_samples elif self.UNSAMPLE: # and self.mask is not None: if self.mask_style_neg is not None: mask_style_neg = self.mask_style_neg.to(x.device) x = (1-mask_style_neg) * x + mask_style_neg * latent_guide_style_neg_samples.to(x.device) #fixed old approach, which was mask, (1-mask) # NECESSARY? else: x = latent_guide_style_neg_samples.to(x.device) else: x = latent_guide_style_neg_samples.to(x.device) #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_ADAIN AFTER SETTING TO LG above!""" else: self.y0_style_neg = torch.zeros_like(x, dtype=self.dtype, device=self.device) if self.UNSAMPLE and not self.SAMPLE: #sigma_next > sigma: # TODO: VERIFY APPROACH FOR INVERSION if guide_inversion_y0 is not None: self.y0 = guide_inversion_y0 else: self.y0 = noise_sampler(sigma=self.sigma_max, sigma_next=self.sigma_min).to(dtype=self.dtype, device=self.device) self.y0 = normalize_zscore(self.y0, channelwise=True, inplace=True) self.y0 *= self.sigma_max if guide_inversion_y0_inv is not None: self.y0_inv = guide_inversion_y0_inv else: self.y0_inv = noise_sampler(sigma=self.sigma_max, sigma_next=self.sigma_min).to(dtype=self.dtype, device=self.device) self.y0_inv = normalize_zscore(self.y0_inv, channelwise=True, inplace=True) self.y0_inv*= self.sigma_max if self.VIDEO and self.frame_weights_mgr is not None: num_frames = x.shape[2] self.frame_weights = self.frame_weights_mgr.get_frame_weights_by_name('frame_weights', num_frames) self.frame_weights_inv = self.frame_weights_mgr.get_frame_weights_by_name('frame_weights_inv', num_frames) x, self.y0, self.y0_inv = self.normalize_inputs(x, self.y0, self.y0_inv) # ??? return x def prepare_weighted_masks(self, step:int, lgw_type="default") -> Tuple[Tensor, Tensor]: if lgw_type == "sync": lgw_ = self.lgw_sync [step] lgw_inv_ = self.lgw_sync_inv[step] mask = torch.ones_like (self.y0) if self.mask_sync is None else self.mask_sync mask_inv = torch.zeros_like(self.y0) if self.mask_sync is None else 1-self.mask_sync elif lgw_type == "drift_x": lgw_ = self.lgw_drift_x [step] lgw_inv_ = self.lgw_drift_x_inv[step] mask = torch.ones_like (self.y0) if self.mask_drift_x is None else self.mask_drift_x mask_inv = torch.zeros_like(self.y0) if self.mask_drift_x is None else 1-self.mask_drift_x elif lgw_type == "drift_y": lgw_ = self.lgw_drift_y [step] lgw_inv_ = self.lgw_drift_y_inv[step] mask = torch.ones_like (self.y0) if self.mask_drift_y is None else self.mask_drift_y mask_inv = torch.zeros_like(self.y0) if self.mask_drift_y is None else 1-self.mask_drift_y elif lgw_type == "lure_x": lgw_ = self.lgw_lure_x [step] lgw_inv_ = self.lgw_lure_x_inv[step] mask = torch.ones_like (self.y0) if self.mask_lure_x is None else self.mask_lure_x mask_inv = torch.zeros_like(self.y0) if self.mask_lure_x is None else 1-self.mask_lure_x elif lgw_type == "lure_y": lgw_ = self.lgw_lure_y [step] lgw_inv_ = self.lgw_lure_y_inv[step] mask = torch.ones_like (self.y0) if self.mask_lure_y is None else self.mask_lure_y mask_inv = torch.zeros_like(self.y0) if self.mask_lure_y is None else 1-self.mask_lure_y else: lgw_ = self.lgw [step] lgw_inv_ = self.lgw_inv[step] mask = torch.ones_like (self.y0) if self.mask is None else self.mask mask_inv = torch.zeros_like(self.y0) if self.mask_inv is None else self.mask_inv if self.LGW_MASK_RESCALE_MIN: lgw_mask = mask * (1-lgw_) + lgw_ lgw_mask_inv = (1-mask) * (1-lgw_inv_) + lgw_inv_ else: if self.HAS_LATENT_GUIDE: lgw_mask = mask * lgw_ else: lgw_mask = torch.zeros_like(mask) if self.HAS_LATENT_GUIDE_INV: if mask_inv is not None: lgw_mask_inv = torch.minimum(mask_inv, (1-mask) * lgw_inv_) #lgw_mask_inv = torch.minimum(1-mask_inv, (1-mask) * lgw_inv_) else: lgw_mask_inv = (1-mask) * lgw_inv_ else: lgw_mask_inv = torch.zeros_like(mask) return lgw_mask, lgw_mask_inv def get_masks_for_step(self, step:int, lgw_type="default") -> Tuple[Tensor, Tensor]: lgw_mask, lgw_mask_inv = self.prepare_weighted_masks(step, lgw_type=lgw_type) normalize_frame_weights_per_step = self.EO("normalize_frame_weights_per_step") normalize_frame_weights_per_step_inv = self.EO("normalize_frame_weights_per_step_inv") if self.VIDEO and self.frame_weights_mgr: num_frames = lgw_mask.shape[2] if self.HAS_LATENT_GUIDE: frame_weights = self.frame_weights_mgr.get_frame_weights_by_name('frame_weights', num_frames, step) apply_frame_weights(lgw_mask, frame_weights, normalize_frame_weights_per_step) if self.HAS_LATENT_GUIDE_INV: frame_weights_inv = self.frame_weights_mgr.get_frame_weights_by_name('frame_weights_inv', num_frames, step) apply_frame_weights(lgw_mask_inv, frame_weights_inv, normalize_frame_weights_per_step_inv) return lgw_mask.to(self.device), lgw_mask_inv.to(self.device) def get_cossim_adjusted_lgw_masks(self, data:Tensor, step:int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: if self.HAS_LATENT_GUIDE: y0 = self.y0.clone() else: y0 = torch.zeros_like(data) if self.HAS_LATENT_GUIDE_INV: y0_inv = self.y0_inv.clone() else: y0_inv = torch.zeros_like(data) if y0.shape[0] > 1: # this is for changing the guide on a per-step basis y0 = y0[min(step, y0.shape[0]-1)].unsqueeze(0) lgw_mask, lgw_mask_inv = self.get_masks_for_step(step) y0_cossim, y0_cossim_inv = 1.0, 1.0 if self.HAS_LATENT_GUIDE: y0_cossim = get_pearson_similarity(data, y0, mask=lgw_mask) if self.HAS_LATENT_GUIDE_INV: y0_cossim_inv = get_pearson_similarity(data, y0_inv, mask=lgw_mask_inv) #if y0_cossim < self.guide_cossim_cutoff_ or y0_cossim_inv < self.guide_bkg_cossim_cutoff_: if y0_cossim >= self.guide_cossim_cutoff_: lgw_mask *= 0 if y0_cossim_inv >= self.guide_bkg_cossim_cutoff_: lgw_mask_inv *= 0 return y0, y0_inv, lgw_mask, lgw_mask_inv @torch.no_grad def process_pseudoimplicit_guides_substep(self, x_0 : Tensor, x_ : Tensor, eps_ : Tensor, eps_prev_ : Tensor, data_ : Tensor, denoised_prev : Tensor, row : int, step : int, step_sched : int, sigmas : Tensor, NS , RK , pseudoimplicit_row_weights : Tensor, pseudoimplicit_step_weights : Tensor, full_iter : int, BONGMATH : bool, ): if "pseudoimplicit" not in self.guide_mode or (self.lgw[step_sched] == 0 and self.lgw_inv[step_sched] == 0): return x_0, x_, eps_, None, None sigma = sigmas[step] if self.s_lying_ is not None: if row >= len(self.s_lying_): return x_0, x_, eps_, None, None if self.guide_mode.startswith("fully_"): data_cossim_test = denoised_prev else: data_cossim_test = data_[row] y0, y0_inv, lgw_mask, lgw_mask_inv = self.get_cossim_adjusted_lgw_masks(data_cossim_test, step_sched) if not (lgw_mask.any() != 0 or lgw_mask_inv.any() != 0): # cossim score too similar! deactivate guide for this step return x_0, x_, eps_, None, None if "fully_pseudoimplicit" in self.guide_mode: if self.x_lying_ is None: return x_0, x_, eps_, None, None else: x_row_pseudoimplicit = self.x_lying_[row] sub_sigma_pseudoimplicit = self.s_lying_[row] if RK.IMPLICIT: x_ = RK.update_substep(x_0, x_, eps_, eps_prev_, row, RK.row_offset, NS.h_new, NS.h_new_orig, ) x_[row] = NS.rebound_overshoot_substep(x_0, x_[row]) if row > 0: x_[row] = NS.swap_noise_substep(x_0, x_[row]) if BONGMATH and step < sigmas.shape[0]-1 and not self.EO("disable_pseudoimplicit_bongmath"): x_0, x_, eps_ = RK.bong_iter(x_0, x_, eps_, eps_prev_, data_, sigma, NS.s_, row, RK.row_offset, NS.h, step, step_sched, ) else: eps_[row] = RK.get_epsilon(x_0, x_[row], denoised_prev, sigma, NS.s_[row]) if self.EO("pseudoimplicit_denoised_prev"): eps_[row] = RK.get_epsilon(x_0, x_[row], denoised_prev, sigma, NS.s_[row]) eps_substep_guide = torch.zeros_like(x_0) eps_substep_guide_inv = torch.zeros_like(x_0) if self.HAS_LATENT_GUIDE: eps_substep_guide = RK.get_guide_epsilon(x_0, x_[row], y0, sigma, NS.s_[row], NS.sigma_down, None) if self.HAS_LATENT_GUIDE_INV: eps_substep_guide_inv = RK.get_guide_epsilon(x_0, x_[row], y0_inv, sigma, NS.s_[row], NS.sigma_down, None) if self.guide_mode in {"pseudoimplicit", "pseudoimplicit_cw", "pseudoimplicit_projection", "pseudoimplicit_projection_cw"}: maxmin_ratio = (NS.sub_sigma - RK.sigma_min) / NS.sub_sigma if self.EO("guide_pseudoimplicit_power_substep_flip_maxmin_scaling"): maxmin_ratio *= (RK.rows-row) / RK.rows elif self.EO("guide_pseudoimplicit_power_substep_maxmin_scaling"): maxmin_ratio *= row / RK.rows sub_sigma_2 = NS.sub_sigma - maxmin_ratio * (NS.sub_sigma * pseudoimplicit_row_weights[row] * pseudoimplicit_step_weights[full_iter] * self.lgw[step_sched]) eps_tmp_ = eps_.clone() eps_ = self.process_channelwise(x_0, eps_, data_, row, eps_substep_guide, eps_substep_guide_inv, y0, y0_inv, lgw_mask, lgw_mask_inv, use_projection = self.guide_mode in {"pseudoimplicit_projection", "pseudoimplicit_projection_cw"}, channelwise = self.guide_mode in {"pseudoimplicit_cw", "pseudoimplicit_projection_cw"}, ) x_row_tmp = x_[row] + RK.h_fn(sub_sigma_2, NS.sub_sigma) * eps_[row] eps_ = eps_tmp_ x_row_pseudoimplicit = x_row_tmp sub_sigma_pseudoimplicit = sub_sigma_2 if RK.IMPLICIT and BONGMATH and step < sigmas.shape[0]-1 and not self.EO("disable_pseudobongmath"): x_[row] = NS.sigma_from_to(x_0, x_row_pseudoimplicit, sigma, sub_sigma_pseudoimplicit, NS.s_[row]) x_0, x_, eps_ = RK.bong_iter(x_0, x_, eps_, eps_prev_, data_, sigma, NS.s_, row, RK.row_offset, NS.h, step, step_sched, ) return x_0, x_, eps_, x_row_pseudoimplicit, sub_sigma_pseudoimplicit @torch.no_grad def prepare_fully_pseudoimplicit_guides_substep(self, x_0, x_, eps_, eps_prev_, data_, denoised_prev, row, step, step_sched, sigmas, eta_substep, overshoot_substep, s_noise_substep, NS, RK, pseudoimplicit_row_weights, pseudoimplicit_step_weights, full_iter, BONGMATH, ): if "fully_pseudoimplicit" not in self.guide_mode or (self.lgw[step_sched] == 0 and self.lgw_inv[step_sched] == 0): return x_0, x_, eps_ sigma = sigmas[step] y0, y0_inv, lgw_mask, lgw_mask_inv = self.get_cossim_adjusted_lgw_masks(denoised_prev, step_sched) if not (lgw_mask.any() != 0 or lgw_mask_inv.any() != 0): # cossim score too similar! deactivate guide for this step return x_0, x_, eps_ # PREPARE FULLY PSEUDOIMPLICIT GUIDES if self.guide_mode in {"fully_pseudoimplicit", "fully_pseudoimplicit_cw", "fully_pseudoimplicit_projection", "fully_pseudoimplicit_projection_cw"} and (self.lgw[step_sched] > 0 or self.lgw_inv[step_sched] > 0): x_lying_ = x_.clone() eps_lying_ = eps_.clone() s_lying_ = [] for r in range(RK.rows): NS.set_sde_substep(r, RK.multistep_stages, eta_substep, overshoot_substep, s_noise_substep) maxmin_ratio = (NS.sub_sigma - RK.sigma_min) / NS.sub_sigma fully_sub_sigma_2 = NS.sub_sigma - maxmin_ratio * (NS.sub_sigma * pseudoimplicit_row_weights[r] * pseudoimplicit_step_weights[full_iter] * self.lgw[step_sched]) s_lying_.append(fully_sub_sigma_2) if RK.IMPLICIT: x_ = RK.update_substep(x_0, x_, eps_, eps_prev_, r, RK.row_offset, NS.h_new, NS.h_new_orig, ) x_[r] = NS.rebound_overshoot_substep(x_0, x_[r]) if r > 0: x_[r] = NS.swap_noise_substep(x_0, x_[r]) if BONGMATH and step < sigmas.shape[0]-1 and not self.EO("disable_fully_pseudoimplicit_bongmath"): x_0, x_, eps_ = RK.bong_iter(x_0, x_, eps_, eps_prev_, data_, sigma, NS.s_, r, RK.row_offset, NS.h, step, step_sched, ) if self.EO("fully_pseudoimplicit_denoised_prev"): eps_[r] = RK.get_epsilon(x_0, x_[r], denoised_prev, sigma, NS.s_[r]) eps_substep_guide = torch.zeros_like(x_0) eps_substep_guide_inv = torch.zeros_like(x_0) if self.HAS_LATENT_GUIDE: eps_substep_guide = RK.get_guide_epsilon(x_0, x_[r], y0, sigma, NS.s_[r], NS.sigma_down, None) if self.HAS_LATENT_GUIDE_INV: eps_substep_guide_inv = RK.get_guide_epsilon(x_0, x_[r], y0_inv, sigma, NS.s_[r], NS.sigma_down, None) eps_ = self.process_channelwise(x_0, eps_, data_, row, eps_substep_guide, eps_substep_guide_inv, y0, y0_inv, lgw_mask, lgw_mask_inv, use_projection = self.guide_mode in {"fully_pseudoimplicit_projection", "fully_pseudoimplicit_projection_cw"}, channelwise = self.guide_mode in {"fully_pseudoimplicit_cw", "fully_pseudoimplicit_projection_cw"}, ) x_lying_[r] = x_[r] + RK.h_fn(fully_sub_sigma_2, NS.sub_sigma) * eps_[r] data_lying = x_[r] + RK.h_fn(0, NS.s_[r]) * eps_[r] eps_lying_[r] = RK.get_epsilon(x_0, x_[r], data_lying, sigma, NS.s_[r]) if not self.EO("pseudoimplicit_disable_eps_lying"): eps_ = eps_lying_ if not self.EO("pseudoimplicit_disable_newton_iter"): x_, eps_ = RK.newton_iter(x_0, x_, eps_, eps_prev_, data_, NS.s_, 0, NS.h, sigmas, step, "lying", ) self.x_lying_ = x_lying_ self.s_lying_ = s_lying_ return x_0, x_, eps_ @torch.no_grad def process_guides_data_substep(self, x_row : Tensor, data_row : Tensor, step : int, sigma_row : Tensor, frame_targets : Optional[Tensor] = None, ): if not self.HAS_LATENT_GUIDE and not self.HAS_LATENT_GUIDE_INV: return x_row y0, y0_inv, lgw_mask, lgw_mask_inv = self.get_cossim_adjusted_lgw_masks(data_row, step) if not (lgw_mask.any() != 0 or lgw_mask_inv.any() != 0): # cossim score too similar! deactivate guide for this step return x_row if self.VIDEO and self.frame_weights_mgr is not None and frame_targets is None: num_frames = data_row.shape[2] frame_targets = self.frame_weights_mgr.get_frame_weights_by_name('frame_targets', num_frames, step) if frame_targets is None: frame_targets = torch.tensor(self.EO("frame_targets", [1.0])) frame_targets = torch.clamp(frame_targets, 0.0, 1.0).to(self.device) if self.guide_mode in {"data", "data_projection", "lure", "lure_projection"}: if frame_targets is None: x_row = self.get_data_substep(x_row, data_row, y0, y0_inv, lgw_mask, lgw_mask_inv, step, sigma_row) else: t_dim = x_row.shape[-3] for t in range(t_dim): #temporal dimension frame_target = float(frame_targets[t] if len(frame_targets) > t else frame_targets[-1]) x_row[...,t:t+1,:,:] = self.get_data_substep( x_row [...,t:t+1,:,:], data_row [...,t:t+1,:,:], y0 [...,t:t+1,:,:], y0_inv [...,t:t+1,:,:], lgw_mask [...,t:t+1,:,:], lgw_mask_inv[...,t:t+1,:,:], step, sigma_row, frame_target) return x_row @torch.no_grad def get_data_substep(self, x_row : Tensor, data_row : Tensor, y0 : Tensor, y0_inv : Tensor, lgw_mask : Tensor, lgw_mask_inv : Tensor, step : int, sigma_row : Tensor, frame_target : float = 1.0, ): if not self.HAS_LATENT_GUIDE and not self.HAS_LATENT_GUIDE_INV: return x_row if self.guide_mode in {"data", "data_projection", "lure", "lure_projection"}: data_targets = self.EO("data_targets", [1.0]) step_target = step if len(data_targets) > step else len(data_targets)-1 cossim_target = frame_target * data_targets[step_target] if self.HAS_LATENT_GUIDE: if self.guide_mode.endswith("projection"): d_collinear_d_lerp = get_collinear(data_row, y0) d_lerp_ortho_d = get_orthogonal(y0, data_row) y0 = d_collinear_d_lerp + d_lerp_ortho_d if cossim_target == 1.0: d_slerped = y0 elif cossim_target == 0.0: d_slerped = data_row else: y0_pearsim = get_pearson_similarity(data_row, y0, mask=self.mask) slerp_weight = get_slerp_weight_for_cossim(y0_pearsim.item(), cossim_target) d_slerped = slerp_tensor(slerp_weight, data_row, y0) # lgw_mask * slerp_weight same as using mask below """if self.guide_mode == "data_projection": d_collinear_d_lerp = get_collinear(data_row, d_slerped) d_lerp_ortho_d = get_orthogonal(d_slerped, data_row) d_slerped = d_collinear_d_lerp + d_lerp_ortho_d""" if self.VE_MODEL: x_row = x_row + lgw_mask * (d_slerped - data_row) else: x_row = x_row + lgw_mask * (self.sigma_max - sigma_row) * (d_slerped - data_row) if self.HAS_LATENT_GUIDE_INV: if self.guide_mode.endswith("projection"): d_collinear_d_lerp = get_collinear(data_row, y0_inv) d_lerp_ortho_d = get_orthogonal(y0_inv, data_row) y0_inv = d_collinear_d_lerp + d_lerp_ortho_d if cossim_target == 1.0: d_slerped_inv = y0_inv elif cossim_target == 0.0: d_slerped_inv = data_row else: y0_pearsim = get_pearson_similarity(data_row, y0_inv, mask=self.mask_inv) slerp_weight = get_slerp_weight_for_cossim(y0_pearsim.item(), cossim_target) d_slerped_inv = slerp_tensor(slerp_weight, data_row, y0_inv) """if self.guide_mode == "data_projection": d_collinear_d_lerp = get_collinear(data_row, d_slerped_inv) d_lerp_ortho_d = get_orthogonal(d_slerped_inv, data_row) d_slerped_inv = d_collinear_d_lerp + d_lerp_ortho_d""" if self.VE_MODEL: x_row = x_row + lgw_mask_inv * (d_slerped_inv - data_row) else: x_row = x_row + lgw_mask_inv * (self.sigma_max - sigma_row) * (d_slerped_inv - data_row) return x_row @torch.no_grad def swap_data(self, x : Tensor, data : Tensor, y : Tensor, sigma : Tensor, mask : Optional[Tensor] = None, ): mask = 1.0 if mask is None else mask if self.VE_MODEL: return x + mask * (y - data) else: return x + mask * (self.sigma_max - sigma) * (y - data) @torch.no_grad def process_guides_eps_substep(self, x_0 : Tensor, x_row : Tensor, data_row : Tensor, eps_row : Tensor, step : int, sigma : Tensor, sigma_down : Tensor, sigma_row : Tensor, frame_targets : Optional[Tensor] = None, RK=None, ): if not self.HAS_LATENT_GUIDE and not self.HAS_LATENT_GUIDE_INV: return eps_row y0, y0_inv, lgw_mask, lgw_mask_inv = self.get_cossim_adjusted_lgw_masks(data_row, step) if not (lgw_mask.any() != 0 or lgw_mask_inv.any() != 0): # cossim score too similar! deactivate guide for this step return eps_row if self.VIDEO and data_row.ndim == 5 and frame_targets is None: num_frames = data_row.shape[2] frame_targets = self.frame_weights_mgr.get_frame_weights_by_name('frame_targets', num_frames, step) if frame_targets is None: frame_targets = self.EO("frame_targets", [1.0]) frame_targets = torch.clamp(frame_targets, 0.0, 1.0) eps_y0 = torch.zeros_like(x_0) eps_y0_inv = torch.zeros_like(x_0) if self.HAS_LATENT_GUIDE: eps_y0 = RK.get_guide_epsilon(x_0, x_row, y0, sigma, sigma_row, sigma_down, None) if self.HAS_LATENT_GUIDE_INV: eps_y0_inv = RK.get_guide_epsilon(x_0, x_row, y0_inv, sigma, sigma_row, sigma_down, None) if self.guide_mode in {"epsilon", "epsilon_projection"}: if frame_targets is None: eps_row = self.get_eps_substep(eps_row, eps_y0, eps_y0_inv, lgw_mask, lgw_mask_inv, step, sigma_row) else: t_dim = x_row.shape[-3] for t in range(t_dim): #temporal dimension frame_target = float(frame_targets[t] if len(frame_targets) > t else frame_targets[-1]) eps_row[...,t:t+1,:,:] = self.get_eps_substep( eps_row [...,t:t+1,:,:], eps_y0 [...,t:t+1,:,:], eps_y0_inv [...,t:t+1,:,:], lgw_mask [...,t:t+1,:,:], lgw_mask_inv[...,t:t+1,:,:], step, sigma_row, frame_target) return eps_row @torch.no_grad def get_eps_substep(self, eps_row : Tensor, eps_y0 : Tensor, eps_y0_inv : Tensor, lgw_mask : Tensor, lgw_mask_inv : Tensor, step : int, sigma_row : Tensor, frame_target : float = 1.0, ): if not self.HAS_LATENT_GUIDE and not self.HAS_LATENT_GUIDE_INV: return eps_row if self.guide_mode in {"epsilon", "epsilon_projection"}: eps_targets = self.EO("eps_targets", [1.0]) step_target = step if len(eps_targets) > step else len(eps_targets)-1 cossim_target = frame_target * eps_targets[step_target] if self.HAS_LATENT_GUIDE: if self.guide_mode == "epsilon_projection": d_collinear_d_lerp = get_collinear(eps_row, eps_y0) d_lerp_ortho_d = get_orthogonal(eps_y0, eps_row) eps_y0 = d_collinear_d_lerp + d_lerp_ortho_d if cossim_target == 1.0: d_slerped = eps_y0 elif cossim_target == 0.0: d_slerped = eps_row else: y0_pearsim = get_pearson_similarity(eps_row, eps_y0, mask=self.mask) slerp_weight = get_slerp_weight_for_cossim(y0_pearsim.item(), cossim_target) d_slerped = slerp_tensor(slerp_weight, eps_row, eps_y0) # lgw_mask * slerp_weight same as using mask below """if self.guide_mode == "data_projection": d_collinear_d_lerp = get_collinear(data_row, d_slerped) d_lerp_ortho_d = get_orthogonal(d_slerped, data_row) d_slerped = d_collinear_d_lerp + d_lerp_ortho_d""" eps_row = eps_row + lgw_mask * (d_slerped - eps_row) if self.HAS_LATENT_GUIDE_INV: if self.guide_mode == "epsilon_projection": d_collinear_d_lerp = get_collinear(eps_row, eps_y0_inv) d_lerp_ortho_d = get_orthogonal(eps_y0_inv, eps_row) eps_y0_inv = d_collinear_d_lerp + d_lerp_ortho_d if cossim_target == 1.0: d_slerped_inv = eps_y0_inv elif cossim_target == 0.0: d_slerped_inv = eps_row else: y0_pearsim = get_pearson_similarity(eps_row, eps_y0_inv, mask=self.mask_inv) slerp_weight = get_slerp_weight_for_cossim(y0_pearsim.item(), cossim_target) d_slerped_inv = slerp_tensor(slerp_weight, eps_row, eps_y0_inv) """if self.guide_mode == "data_projection": d_collinear_d_lerp = get_collinear(data_row, d_slerped_inv) d_lerp_ortho_d = get_orthogonal(d_slerped_inv, data_row) d_slerped_inv = d_collinear_d_lerp + d_lerp_ortho_d""" eps_row = eps_row + lgw_mask_inv * (d_slerped_inv - eps_row) return eps_row @torch.no_grad def process_guides_substep(self, x_0 : Tensor, x_ : Tensor, eps_ : Tensor, data_ : Tensor, row : int, step_sched : int, sigma : Tensor, sigma_next : Tensor, sigma_down : Tensor, s_ : Tensor, epsilon_scale : float, RK, ): if not self.HAS_LATENT_GUIDE and not self.HAS_LATENT_GUIDE_INV: return eps_, x_ y0, y0_inv, lgw_mask, lgw_mask_inv = self.get_cossim_adjusted_lgw_masks(data_[row], step_sched) if not (lgw_mask.any() != 0 or lgw_mask_inv.any() != 0): # cossim score too similar! deactivate guide for this step return eps_, x_ if self.EO(["substep_eps_ch_mean_std", "substep_eps_ch_mean", "substep_eps_ch_std", "substep_eps_mean_std", "substep_eps_mean", "substep_eps_std"]): eps_orig = eps_.clone() if self.EO("dynamic_guides_mean_std"): y_shift, y_inv_shift = normalize_latent([y0, y0_inv], [data_, data_]) y0 = y_shift if self.EO("dynamic_guides_inv"): y0_inv = y_inv_shift if self.EO("dynamic_guides_mean"): y_shift, y_inv_shift = normalize_latent([y0, y0_inv], [data_, data_], std=False) y0 = y_shift if self.EO("dynamic_guides_inv"): y0_inv = y_inv_shift if "data_old" == self.guide_mode: y0_tmp = y0.clone() if self.HAS_LATENT_GUIDE: y0_tmp = (1-lgw_mask) * data_[row] + lgw_mask * y0 y0_tmp = (1-lgw_mask_inv) * y0_tmp + lgw_mask_inv * y0_inv x_[row+1] = y0_tmp + eps_[row] if self.guide_mode == "data_old_projection": d_lerp = data_[row] + lgw_mask * (y0-data_[row]) + lgw_mask_inv * (y0_inv-data_[row]) d_collinear_d_lerp = get_collinear(data_[row], d_lerp) d_lerp_ortho_d = get_orthogonal(d_lerp, data_[row]) data_[row] = d_collinear_d_lerp + d_lerp_ortho_d x_[row+1] = data_[row] + eps_[row] * sigma #elif (self.UNSAMPLE or self.guide_mode in {"epsilon", "epsilon_cw", "epsilon_projection", "epsilon_projection_cw"}) and (self.lgw[step] > 0 or self.lgw_inv[step] > 0): elif self.guide_mode in {"epsilon", "epsilon_cw", "epsilon_projection", "epsilon_projection_cw"} and (self.lgw[step_sched] > 0 or self.lgw_inv[step_sched] > 0): if sigma_down < sigma or s_[row] < RK.sigma_max: eps_substep_guide = torch.zeros_like(x_0) eps_substep_guide_inv = torch.zeros_like(x_0) if self.HAS_LATENT_GUIDE: eps_substep_guide = RK.get_guide_epsilon(x_0, x_[row], y0, sigma, s_[row], sigma_down, epsilon_scale) if self.HAS_LATENT_GUIDE_INV: eps_substep_guide_inv = RK.get_guide_epsilon(x_0, x_[row], y0_inv, sigma, s_[row], sigma_down, epsilon_scale) tol_value = self.EO("tol", -1.0) if tol_value >= 0: for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])): current_diff = torch.norm(data_[row][b][c] - y0 [b][c]) current_diff_inv = torch.norm(data_[row][b][c] - y0_inv[b][c]) lgw_scaled = torch.nan_to_num(1-(tol_value/current_diff), 0) lgw_scaled_inv = torch.nan_to_num(1-(tol_value/current_diff_inv), 0) lgw_tmp = min(self.lgw[step_sched] , lgw_scaled) lgw_tmp_inv = min(self.lgw_inv[step_sched], lgw_scaled_inv) lgw_mask_clamp = torch.clamp(lgw_mask, max=lgw_tmp) lgw_mask_clamp_inv = torch.clamp(lgw_mask_inv, max=lgw_tmp_inv) eps_[row][b][c] = eps_[row][b][c] + lgw_mask_clamp[b][0] * (eps_substep_guide[b][c] - eps_[row][b][c]) + lgw_mask_clamp_inv[b][0] * (eps_substep_guide_inv[b][c] - eps_[row][b][c]) elif self.guide_mode in {"epsilon"}: #eps_[row] = slerp(lgw_mask.mean().item(), eps_[row], eps_substep_guide) if self.EO("slerp_epsilon_guide"): if eps_substep_guide.sum() != 0: eps_[row] = slerp_tensor(lgw_mask, eps_[row], eps_substep_guide) if eps_substep_guide_inv.sum() != 0: eps_[row] = slerp_tensor(lgw_mask_inv, eps_[row], eps_substep_guide_inv) else: eps_[row] = eps_[row] + lgw_mask * (eps_substep_guide - eps_[row]) + lgw_mask_inv * (eps_substep_guide_inv - eps_[row]) #eps_[row] = slerp_barycentric(eps_[row].norm(), eps_substep_guide.norm(), eps_substep_guide_inv.norm(), 1-lgw_mask-lgw_mask_inv, lgw_mask, lgw_mask_inv) elif self.guide_mode in {"epsilon_projection"}: if self.EO("slerp_epsilon_guide"): if eps_substep_guide.sum() != 0: eps_row_slerp = slerp_tensor(self.mask, eps_[row], eps_substep_guide) if eps_substep_guide_inv.sum() != 0: eps_row_slerp = slerp_tensor((1-self.mask), eps_row_slerp, eps_substep_guide_inv) eps_collinear_eps_slerp = get_collinear(eps_[row], eps_row_slerp) eps_slerp_ortho_eps = get_orthogonal(eps_row_slerp, eps_[row]) eps_sum = eps_collinear_eps_slerp + eps_slerp_ortho_eps eps_[row] = slerp_tensor(lgw_mask, eps_[row] , eps_sum) eps_[row] = slerp_tensor(lgw_mask_inv, eps_[row], eps_sum) else: eps_row_lerp = eps_[row] + self.mask * (eps_substep_guide-eps_[row]) + (1-self.mask) * (eps_substep_guide_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row]) eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps eps_[row] = eps_[row] + lgw_mask * (eps_sum - eps_[row]) + lgw_mask_inv * (eps_sum - eps_[row]) #eps_row_slerp = eps_[row] + self.mask * (eps_substep_guide-eps_[row]) + (1-self.mask) * (eps_substep_guide_inv-eps_[row]) elif self.guide_mode in {"epsilon_cw", "epsilon_projection_cw"}: eps_ = self.process_channelwise(x_0, eps_, data_, row, eps_substep_guide, eps_substep_guide_inv, y0, y0_inv, lgw_mask, lgw_mask_inv, use_projection = self.guide_mode == "epsilon_projection_cw", channelwise = True ) temporal_smoothing = self.EO("temporal_smoothing", 0.0) if temporal_smoothing > 0: eps_[row] = apply_temporal_smoothing(eps_[row], temporal_smoothing) if self.EO("substep_eps_ch_mean_std"): eps_[row] = normalize_latent(eps_[row], eps_orig[row]) if self.EO("substep_eps_ch_mean"): eps_[row] = normalize_latent(eps_[row], eps_orig[row], std=False) if self.EO("substep_eps_ch_std"): eps_[row] = normalize_latent(eps_[row], eps_orig[row], mean=False) if self.EO("substep_eps_mean_std"): eps_[row] = normalize_latent(eps_[row], eps_orig[row], channelwise=False) if self.EO("substep_eps_mean"): eps_[row] = normalize_latent(eps_[row], eps_orig[row], std=False, channelwise=False) if self.EO("substep_eps_std"): eps_[row] = normalize_latent(eps_[row], eps_orig[row], mean=False, channelwise=False) return eps_, x_ def process_channelwise(self, x_0 : Tensor, eps_ : Tensor, data_ : Tensor, row : int, eps_substep_guide : Tensor, eps_substep_guide_inv : Tensor, y0 : Tensor, y0_inv : Tensor, lgw_mask : Tensor, lgw_mask_inv : Tensor, use_projection : bool = False, channelwise : bool = False ): avg, avg_inv = 0, 0 for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])): avg += torch.norm(lgw_mask [b][0] * data_[row][b][c] - lgw_mask [b][0] * y0 [b][c]) avg_inv += torch.norm(lgw_mask_inv[b][0] * data_[row][b][c] - lgw_mask_inv[b][0] * y0_inv[b][c]) avg /= x_0.shape[1] avg_inv /= x_0.shape[1] for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])): if channelwise: ratio = torch.nan_to_num(torch.norm(lgw_mask [b][0] * data_[row][b][c] - lgw_mask [b][0] * y0 [b][c]) / avg, 0) ratio_inv = torch.nan_to_num(torch.norm(lgw_mask_inv[b][0] * data_[row][b][c] - lgw_mask_inv[b][0] * y0_inv[b][c]) / avg_inv, 0) else: ratio = 1. ratio_inv = 1. if self.EO("slerp_epsilon_guide"): if eps_substep_guide[b][c].sum() != 0: eps_[row][b][c] = slerp_tensor(ratio * lgw_mask[b][0], eps_[row][b][c], eps_substep_guide[b][c]) if eps_substep_guide_inv[b][c].sum() != 0: eps_[row][b][c] = slerp_tensor(ratio_inv * lgw_mask_inv[b][0], eps_[row][b][c], eps_substep_guide_inv[b][c]) else: eps_[row][b][c] = eps_[row][b][c] + ratio * lgw_mask[b][0] * (eps_substep_guide[b][c] - eps_[row][b][c]) + ratio_inv * lgw_mask_inv[b][0] * (eps_substep_guide_inv[b][c] - eps_[row][b][c]) if use_projection: if self.EO("slerp_epsilon_guide"): if eps_substep_guide[b][c].sum() != 0: eps_row_lerp = slerp_tensor(self.mask[b][0], eps_[row][b][c], eps_substep_guide[b][c]) if eps_substep_guide_inv[b][c].sum() != 0: eps_row_lerp = slerp_tensor((1-self.mask[b][0]), eps_[row][b][c], eps_substep_guide_inv[b][c]) else: eps_row_lerp = eps_[row][b][c] + self.mask[b][0] * (eps_substep_guide[b][c] - eps_[row][b][c]) + (1-self.mask[b][0]) * (eps_substep_guide_inv[b][c] - eps_[row][b][c]) # should this ever be self.mask_inv? eps_collinear_eps_lerp = get_collinear (eps_[row][b][c], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp , eps_[row][b][c]) eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps if self.EO("slerp_epsilon_guide"): if eps_substep_guide[b][c].sum() != 0: eps_[row][b][c] = slerp_tensor(ratio * lgw_mask[b][0], eps_[row][b][c], eps_sum) if eps_substep_guide_inv[b][c].sum() != 0: eps_[row][b][c] = slerp_tensor(ratio_inv * lgw_mask_inv[b][0], eps_[row][b][c], eps_sum) else: eps_[row][b][c] = eps_[row][b][c] + ratio * lgw_mask[b][0] * (eps_sum - eps_[row][b][c]) + ratio_inv * lgw_mask_inv[b][0] * (eps_sum - eps_[row][b][c]) else: if self.EO("slerp_epsilon_guide"): if eps_substep_guide[b][c].sum() != 0: eps_[row][b][c] = slerp_tensor(ratio * lgw_mask[b][0], eps_[row][b][c], eps_substep_guide[b][c]) if eps_substep_guide_inv[b][c].sum() != 0: eps_[row][b][c] = slerp_tensor(ratio_inv * lgw_mask_inv[b][0], eps_[row][b][c], eps_substep_guide_inv[b][c]) else: eps_[row][b][c] = eps_[row][b][c] + ratio * lgw_mask[b][0] * (eps_substep_guide[b][c] - eps_[row][b][c]) + ratio_inv * lgw_mask_inv[b][0] * (eps_substep_guide_inv[b][c] - eps_[row][b][c]) return eps_ def normalize_inputs(self, x:Tensor, y0:Tensor, y0_inv:Tensor): """ Modifies and returns 'x' by matching its mean and/or std to y0 and/or y0_inv. Controlled by extra_options. Returns: - x (modified) - y0 (may be modified to match mean and std from y0_inv) - y0_inv (unchanged) """ if self.guide_mode == "epsilon_guide_mean_std_from_bkg": y0 = normalize_latent(y0, y0_inv) input_norm = self.EO("input_norm", "") input_std = self.EO("input_std", 1.0) if input_norm == "input_ch_mean_set_std_to": x = normalize_latent(x, set_std=input_std) if input_norm == "input_ch_set_std_to": x = normalize_latent(x, set_std=input_std, mean=False) if input_norm == "input_mean_set_std_to": x = normalize_latent(x, set_std=input_std, channelwise=False) if input_norm == "input_std_set_std_to": x = normalize_latent(x, set_std=input_std, mean=False, channelwise=False) return x, y0, y0_inv def apply_frame_weights(mask, frame_weights, normalize=False): original_mask_mean = mask.mean() if frame_weights is not None: for f in range(mask.shape[2]): frame_weight = frame_weights[f] mask[..., f:f+1, :, :] *= frame_weight if normalize: mask_mean = mask.mean() mask *= (original_mask_mean / mask_mean) def prepare_mask(x, mask, LGW_MASK_RESCALE_MIN) -> tuple[torch.Tensor, bool]: if mask is None: mask = torch.ones_like(x[:,0:1,...]) LGW_MASK_RESCALE_MIN = False return mask, LGW_MASK_RESCALE_MIN target_height = x.shape[-2] target_width = x.shape[-1] spatial_mask = None if x.ndim == 5 and mask.shape[0] > 1 and mask.ndim < 4: target_frames = x.shape[-3] spatial_mask = mask.unsqueeze(0).unsqueeze(0) # [B, H, W] -> [1, 1, B, H, W] spatial_mask = F.interpolate(spatial_mask, size=(target_frames, target_height, target_width), mode='trilinear', align_corners=False) # [1, 1, F, H, W] repeat_shape = [1] # batch for i in range(1, x.ndim - 3): repeat_shape.append(x.shape[i]) repeat_shape.extend([1, 1, 1]) # frames, height, width elif mask.ndim == 4: #temporal mask batch mask = F.interpolate(mask, size=(target_height, target_width), mode='bilinear', align_corners=False) mask = mask.repeat(x.shape[-4],1,1,1) mask.unsqueeze_(0) else: spatial_mask = mask.unsqueeze(1) spatial_mask = F.interpolate(spatial_mask, size=(target_height, target_width), mode='bilinear', align_corners=False) while spatial_mask.ndim < x.ndim: spatial_mask = spatial_mask.unsqueeze(2) repeat_shape = [1] # batch for i in range(1, x.ndim - 2): repeat_shape.append(x.shape[i]) repeat_shape.extend([1, 1]) # height and width repeat_shape[1] = 1 # only need one channel for masks if spatial_mask is not None: mask = spatial_mask.repeat(*repeat_shape).to(x.dtype) del spatial_mask return mask, LGW_MASK_RESCALE_MIN def apply_temporal_smoothing(tensor, temporal_smoothing): if temporal_smoothing <= 0 or tensor.ndim != 5: return tensor kernel_size = 5 padding = kernel_size // 2 temporal_kernel = torch.tensor( [0.1, 0.2, 0.4, 0.2, 0.1], device=tensor.device, dtype=tensor.dtype ) * temporal_smoothing temporal_kernel[kernel_size//2] += (1 - temporal_smoothing) temporal_kernel = temporal_kernel / temporal_kernel.sum() # resahpe for conv1d b, c, f, h, w = tensor.shape data_flat = tensor.permute(0, 1, 3, 4, 2).reshape(-1, f) # apply smoohting data_smooth = F.conv1d( data_flat.unsqueeze(1), temporal_kernel.view(1, 1, -1), padding=padding ).squeeze(1) return data_smooth.view(b, c, h, w, f).permute(0, 1, 4, 2, 3) def get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, row_offset, rk_type, b=None, c=None): s_in = x_0.new_ones([x_0.shape[0]]) if b is not None and c is not None: index = (b, c) elif b is not None: index = (b,) else: index = () if RK_Method_Beta.is_exponential(rk_type): eps_row = y0 [index] - x_0[index] eps_row_inv = y0_inv[index] - x_0[index] else: eps_row = (x_[row][index] - y0 [index]) / (s_[row] * s_in) # was row+row_offset before for x_!! not right... also? potential issues here with x_[row+1] being RK.rows+2 with gauss-legendre_2s 1 imp step 1 imp substep eps_row_inv = (x_[row][index] - y0_inv[index]) / (s_[row] * s_in) return eps_row, eps_row_inv def get_guide_epsilon(x_0, x_, y0, sigma, rk_type, b=None, c=None): s_in = x_0.new_ones([x_0.shape[0]]) if b is not None and c is not None: index = (b, c) elif b is not None: index = (b,) else: index = () if RK_Method_Beta.is_exponential(rk_type): eps = y0 [index] - x_0[index] else: eps = (x_[index] - y0 [index]) / (sigma * s_in) return eps @torch.no_grad def noise_cossim_guide_tiled(x_list, guide, cossim_mode="forward", tile_size=2, step=0): guide_tiled = rearrange(guide, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size) x_tiled_list = [ rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size) for x in x_list ] x_tiled_stack = torch.stack([x_tiled[0] for x_tiled in x_tiled_list]) # [n_x, n_tiles, c, h, w] guide_flat = guide_tiled[0].view(guide_tiled.shape[1], -1).unsqueeze(0) # [1, n_tiles, c*h*w] x_flat = x_tiled_stack.view(x_tiled_stack.size(0), x_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w] cossim_tmp_all = F.cosine_similarity(x_flat, guide_flat, dim=-1) # [n_x, n_tiles] if cossim_mode == "forward": indices = cossim_tmp_all.argmax(dim=0) elif cossim_mode == "reverse": indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal": indices = torch.abs(cossim_tmp_all).argmin(dim=0) elif cossim_mode == "forward_reverse": if step % 2 == 0: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_forward": if step % 2 == 1: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal_reverse": if step % 2 == 0: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_orthogonal": if step % 2 == 1: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) else: target_value = float(cossim_mode) indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0) x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w] x_tiled_out = x_tiled_out.unsqueeze(0) x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size) return x_detiled @torch.no_grad def noise_cossim_eps_tiled(x_list, eps, noise_list, cossim_mode="forward", tile_size=2, step=0): eps_tiled = rearrange(eps, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size) x_tiled_list = [ rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size) for x in x_list ] noise_tiled_list = [ rearrange(noise, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size) for noise in noise_list ] noise_tiled_stack = torch.stack([noise_tiled[0] for noise_tiled in noise_tiled_list]) # [n_x, n_tiles, c, h, w] eps_expanded = eps_tiled[0].view(eps_tiled.shape[1], -1).unsqueeze(0) # [1, n_tiles, c*h*w] noise_flat = noise_tiled_stack.view(noise_tiled_stack.size(0), noise_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w] cossim_tmp_all = F.cosine_similarity(noise_flat, eps_expanded, dim=-1) # [n_x, n_tiles] if cossim_mode == "forward": indices = cossim_tmp_all.argmax(dim=0) elif cossim_mode == "reverse": indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal": indices = torch.abs(cossim_tmp_all).argmin(dim=0) elif cossim_mode == "orthogonal_pos": positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) elif cossim_mode == "orthogonal_neg": negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "orthogonal_posneg": if step % 2 == 0: positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) else: negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "orthogonal_negpos": if step % 2 == 1: positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) else: negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "forward_reverse": if step % 2 == 0: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_forward": if step % 2 == 1: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal_reverse": if step % 2 == 0: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_orthogonal": if step % 2 == 1: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) else: target_value = float(cossim_mode) indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0) #else: # raise ValueError(f"Unknown cossim_mode: {cossim_mode}") x_tiled_stack = torch.stack([x_tiled[0] for x_tiled in x_tiled_list]) # [n_x, n_tiles, c, h, w] x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w] x_tiled_out = x_tiled_out.unsqueeze(0) # restore batch dim x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size) return x_detiled @torch.no_grad def noise_cossim_guide_eps_tiled(x_0, x_list, y0, noise_list, cossim_mode="forward", tile_size=2, step=0, sigma=None, rk_type=None): x_tiled_stack = torch.stack([ rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0] for x in x_list ]) # [n_x, n_tiles, c, h, w] eps_guide_stack = torch.stack([ rearrange(x - y0, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0] for x in x_list ]) # [n_x, n_tiles, c, h, w] del x_list noise_tiled_stack = torch.stack([ rearrange(noise, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0] for noise in noise_list ]) # [n_x, n_tiles, c, h, w] del noise_list noise_flat = noise_tiled_stack.view(noise_tiled_stack.size(0), noise_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w] eps_guide_flat = eps_guide_stack.view(eps_guide_stack.size(0), eps_guide_stack.size(1), -1) # [n_x, n_tiles, c*h*w] cossim_tmp_all = F.cosine_similarity(noise_flat, eps_guide_flat, dim=-1) # [n_x, n_tiles] del noise_tiled_stack, noise_flat, eps_guide_stack, eps_guide_flat if cossim_mode == "forward": indices = cossim_tmp_all.argmax(dim=0) elif cossim_mode == "reverse": indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal": indices = torch.abs(cossim_tmp_all).argmin(dim=0) elif cossim_mode == "orthogonal_pos": positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) elif cossim_mode == "orthogonal_neg": negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "orthogonal_posneg": if step % 2 == 0: positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) else: negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "orthogonal_negpos": if step % 2 == 1: positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) else: negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "forward_reverse": if step % 2 == 0: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_forward": if step % 2 == 1: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal_reverse": if step % 2 == 0: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_orthogonal": if step % 2 == 1: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) else: target_value = float(cossim_mode) indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0) x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w] del x_tiled_stack x_tiled_out = x_tiled_out.unsqueeze(0) x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size) return x_detiled class NoiseStepHandlerOSDE: def __init__(self, x, eps=None, data=None, x_init=None, guide=None, guide_bkg=None): self.noise = None self.x = x self.eps = eps self.data = data self.x_init = x_init self.guide = guide self.guide_bkg = guide_bkg self.eps_list = None self.noise_cossim_map = { "eps_orthogonal": [self.noise, self.eps], "eps_data_orthogonal": [self.noise, self.eps, self.data], "data_orthogonal": [self.noise, self.data], "xinit_orthogonal": [self.noise, self.x_init], "x_orthogonal": [self.noise, self.x], "x_data_orthogonal": [self.noise, self.x, self.data], "x_eps_orthogonal": [self.noise, self.x, self.eps], "x_eps_data_orthogonal": [self.noise, self.x, self.eps, self.data], "x_eps_data_xinit_orthogonal": [self.noise, self.x, self.eps, self.data, self.x_init], "x_eps_guide_orthogonal": [self.noise, self.x, self.eps, self.guide], "x_eps_guide_bkg_orthogonal": [self.noise, self.x, self.eps, self.guide_bkg], "noise_orthogonal": [self.noise, self.x_init], "guide_orthogonal": [self.noise, self.guide], "guide_bkg_orthogonal": [self.noise, self.guide_bkg], } def check_cossim_source(self, source): return source in self.noise_cossim_map def get_ortho_noise(self, noise, prev_noises=None, max_iter=100, max_score=1e-7, NOISE_COSSIM_SOURCE="eps_orthogonal"): if NOISE_COSSIM_SOURCE not in self.noise_cossim_map: raise ValueError(f"Invalid NOISE_COSSIM_SOURCE: {NOISE_COSSIM_SOURCE}") self.noise_cossim_map[NOISE_COSSIM_SOURCE][0] = noise params = self.noise_cossim_map[NOISE_COSSIM_SOURCE] noise = get_orthogonal_noise_from_channelwise(*params, max_iter=max_iter, max_score=max_score) return noise # NOTE: NS AND SUBSTEP ADDED! def handle_tiled_etc_noise_steps( x_0, x, x_prenoise, x_init, eps, denoised, y0, y0_inv, step, rk_type, RK, NS, SUBSTEP, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t, NOISE_COSSIM_SOURCE, NOISE_COSSIM_MODE, noise_cossim_tile_size, noise_cossim_iterations, extra_options): EO = ExtraOptions(extra_options) x_tmp = [] cossim_tmp = [] noise_tmp_list = [] if step > EO("noise_cossim_end_step", MAX_STEPS): NOISE_COSSIM_SOURCE = EO("noise_cossim_takeover_source" , "eps") NOISE_COSSIM_MODE = EO("noise_cossim_takeover_mode" , "forward" ) noise_cossim_tile_size = EO("noise_cossim_takeover_tile" , noise_cossim_tile_size ) noise_cossim_iterations = EO("noise_cossim_takeover_iterations", noise_cossim_iterations) for i in range(noise_cossim_iterations): #x_tmp.append(NS.swap_noise(x_0, x, sigma, sigma, sigma_next, )) x_tmp.append(NS.add_noise_post(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) )#y0, lgw, sigma_down are currently unused noise_tmp = x_tmp[i] - x if EO("noise_noise_zscore_norm"): noise_tmp = normalize_zscore(noise_tmp, channelwise=False, inplace=True) if EO("noise_noise_zscore_norm_cw"): noise_tmp = normalize_zscore(noise_tmp, channelwise=True, inplace=True) if EO("noise_eps_zscore_norm"): eps = normalize_zscore(eps, channelwise=False, inplace=True) if EO("noise_eps_zscore_norm_cw"): eps = normalize_zscore(eps, channelwise=True, inplace=True) if NOISE_COSSIM_SOURCE in ("eps_tiled", "guide_epsilon_tiled", "guide_bkg_epsilon_tiled", "iig_tiled"): noise_tmp_list.append(noise_tmp) if NOISE_COSSIM_SOURCE == "eps": cossim_tmp.append(get_cosine_similarity(eps, noise_tmp)) if NOISE_COSSIM_SOURCE == "eps_ch": cossim_total = torch.zeros_like(eps[0][0][0][0]) for ch in range(eps.shape[1]): cossim_total += get_cosine_similarity(eps[0][ch], noise_tmp[0][ch]) cossim_tmp.append(cossim_total) elif NOISE_COSSIM_SOURCE == "data": cossim_tmp.append(get_cosine_similarity(denoised, noise_tmp)) elif NOISE_COSSIM_SOURCE == "latent": cossim_tmp.append(get_cosine_similarity(x_prenoise, noise_tmp)) elif NOISE_COSSIM_SOURCE == "x_prenoise": cossim_tmp.append(get_cosine_similarity(x_prenoise, x_tmp[i])) elif NOISE_COSSIM_SOURCE == "x": cossim_tmp.append(get_cosine_similarity(x, x_tmp[i])) elif NOISE_COSSIM_SOURCE == "x_data": cossim_tmp.append(get_cosine_similarity(denoised, x_tmp[i])) elif NOISE_COSSIM_SOURCE == "x_init_vs_noise": cossim_tmp.append(get_cosine_similarity(x_init, noise_tmp)) elif NOISE_COSSIM_SOURCE == "mom": cossim_tmp.append(get_cosine_similarity(denoised, x + sigma_next*noise_tmp)) elif NOISE_COSSIM_SOURCE == "guide": cossim_tmp.append(get_cosine_similarity(y0, x_tmp[i])) elif NOISE_COSSIM_SOURCE == "guide_bkg": cossim_tmp.append(get_cosine_similarity(y0_inv, x_tmp[i])) if step < EO("noise_cossim_start_step", 0): x = x_tmp[0] elif (NOISE_COSSIM_SOURCE == "eps_tiled"): x = noise_cossim_eps_tiled(x_tmp, eps, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step) elif (NOISE_COSSIM_SOURCE == "guide_epsilon_tiled"): x = noise_cossim_guide_eps_tiled(x_0, x_tmp, y0, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step, sigma=sigma, rk_type=rk_type) elif (NOISE_COSSIM_SOURCE == "guide_bkg_epsilon_tiled"): x = noise_cossim_guide_eps_tiled(x_0, x_tmp, y0_inv, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step, sigma=sigma, rk_type=rk_type) elif (NOISE_COSSIM_SOURCE == "guide_tiled"): x = noise_cossim_guide_tiled(x_tmp, y0, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step) elif (NOISE_COSSIM_SOURCE == "guide_bkg_tiled"): x = noise_cossim_guide_tiled(x_tmp, y0_inv, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size) else: for i in range(len(x_tmp)): if (NOISE_COSSIM_MODE == "forward") and (cossim_tmp[i] == max(cossim_tmp)): x = x_tmp[i] break elif (NOISE_COSSIM_MODE == "reverse") and (cossim_tmp[i] == min(cossim_tmp)): x = x_tmp[i] break elif (NOISE_COSSIM_MODE == "orthogonal") and (abs(cossim_tmp[i]) == min(abs(val) for val in cossim_tmp)): x = x_tmp[i] break elif (NOISE_COSSIM_MODE != "forward") and (NOISE_COSSIM_MODE != "reverse") and (NOISE_COSSIM_MODE != "orthogonal"): x = x_tmp[0] break return x def get_masked_epsilon_projection(x_0, x_, eps_, y0, y0_inv, s_, row, row_offset, rk_type, LG, step): eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, row_offset, rk_type) eps_row_lerp = eps_[row] + LG.mask * (eps_row-eps_[row]) + (1-LG.mask) * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row]) eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps lgw_mask, lgw_mask_inv = LG.get_masks_for_step(step) eps_substep_guide = eps_[row] + lgw_mask * (eps_sum - eps_[row]) + lgw_mask_inv * (eps_sum - eps_[row]) return eps_substep_guide ================================================ FILE: beta/rk_method_beta.py ================================================ import torch from torch import Tensor from typing import Optional, Callable, Tuple, List, Dict, Any, Union import comfy.model_patcher import comfy.supported_models import itertools from .phi_functions import Phi from .rk_coefficients_beta import get_implicit_sampler_name_list, get_rk_methods_beta from ..helper import ExtraOptions from ..latents import get_orthogonal, get_collinear, get_cosine_similarity, tile_latent, untile_latent from ..res4lyf import RESplain MAX_STEPS = 10000 def get_data_from_step (x:Tensor, x_next:Tensor, sigma:Tensor, sigma_next:Tensor) -> Tensor: h = sigma_next - sigma return (sigma_next * x - sigma * x_next) / h def get_epsilon_from_step(x:Tensor, x_next:Tensor, sigma:Tensor, sigma_next:Tensor) -> Tensor: h = sigma_next - sigma return (x - x_next) / h class RK_Method_Beta: def __init__(self, model, rk_type : str, VE_MODEL : bool, noise_anchor : float, noise_boost_normalize : bool = True, model_device : str = 'cuda', work_device : str = 'cpu', dtype : torch.dtype = torch.float64, extra_options : str = "" ): self.work_device = work_device self.model_device = model_device self.dtype : torch.dtype = dtype self.model = model if hasattr(model, "model"): model_sampling = model.model.model_sampling elif hasattr(model, "inner_model"): model_sampling = model.inner_model.inner_model.model_sampling self.sigma_min : Tensor = model_sampling.sigma_min.to(dtype=dtype, device=work_device) self.sigma_max : Tensor = model_sampling.sigma_max.to(dtype=dtype, device=work_device) self.rk_type : str = rk_type self.IMPLICIT : str = rk_type in get_implicit_sampler_name_list(nameOnly=True) self.EXPONENTIAL : bool = RK_Method_Beta.is_exponential(rk_type) self.VE_MODEL : bool = VE_MODEL self.SYNC_SUBSTEP_MEAN_CW : bool = noise_boost_normalize self.A : Optional[Tensor] = None self.B : Optional[Tensor] = None self.U : Optional[Tensor] = None self.V : Optional[Tensor] = None self.rows : int = 0 self.cols : int = 0 self.denoised : Optional[Tensor] = None self.uncond : Optional[Tensor] = None self.y0 : Optional[Tensor] = None self.y0_inv : Optional[Tensor] = None self.multistep_stages : int = 0 self.row_offset : Optional[int] = None self.cfg_cw : float = 1.0 self.extra_args : Optional[Dict[str, Any]] = None self.extra_options : str = extra_options self.EO : ExtraOptions = ExtraOptions(extra_options) self.reorder_tableau_indices : list[int] = self.EO("reorder_tableau_indices", [-1]) self.LINEAR_ANCHOR_X_0 : float = noise_anchor self.tile_sizes : Optional[List[Tuple[int,int]]] = None self.tile_cnt : int = 0 self.latent_compression_ratio : int = 8 @staticmethod def is_exponential(rk_type:str) -> bool: if rk_type.startswith(( "res", "dpmpp", "ddim", "pec", "etdrk", "lawson", "abnorsett", )): return True else: return False @staticmethod def create(model, rk_type : str, VE_MODEL : bool, noise_anchor : float = 1.0, noise_boost_normalize : bool = True, model_device : str = 'cuda', work_device : str = 'cpu', dtype : torch.dtype = torch.float64, extra_options : str = "" ) -> "Union[RK_Method_Exponential, RK_Method_Linear]": if RK_Method_Beta.is_exponential(rk_type): return RK_Method_Exponential(model, rk_type, VE_MODEL, noise_anchor, noise_boost_normalize, model_device, work_device, dtype, extra_options) else: return RK_Method_Linear (model, rk_type, VE_MODEL, noise_anchor, noise_boost_normalize, model_device, work_device, dtype, extra_options) def __call__(self): raise NotImplementedError("This method got clownsharked!") def model_epsilon(self, x:Tensor, sigma:Tensor, **extra_args) -> Tuple[Tensor, Tensor]: s_in = x.new_ones([x.shape[0]]) denoised = self.model(x, sigma * s_in, **extra_args) denoised = self.calc_cfg_channelwise(denoised) eps = (x - denoised) / (sigma * s_in).view(x.shape[0], 1, 1, 1) #return x0 ###################################THIS WORKS ONLY WITH THE MODEL SAMPLING PATCH return eps, denoised def model_denoised(self, x:Tensor, sigma:Tensor, **extra_args) -> Tensor: s_in = x.new_ones([x.shape[0]]) control_tiles = None y0_style_pos = self.extra_args['model_options']['transformer_options'].get("y0_style_pos") y0_style_neg = self.extra_args['model_options']['transformer_options'].get("y0_style_neg") y0_style_pos_tile, sy0_style_neg_tiles = None, None if self.EO("tile_model_calls"): tile_h = self.EO("tile_h", 128) tile_w = self.EO("tile_w", 128) denoised_tiles = [] tiles, orig_shape, grid, strides = tile_latent(x, tile_size=(tile_h,tile_w)) for i in range(tiles.shape[0]): tile = tiles[i].unsqueeze(0) denoised_tile = self.model(tile, sigma * s_in, **extra_args) denoised_tiles.append(denoised_tile) denoised_tiles = torch.cat(denoised_tiles, dim=0) denoised = untile_latent(denoised_tiles, orig_shape, grid, strides) elif self.tile_sizes is not None: tile_h_full = self.tile_sizes[self.tile_cnt % len(self.tile_sizes)][0] tile_w_full = self.tile_sizes[self.tile_cnt % len(self.tile_sizes)][1] if tile_h_full == -1: tile_h = x.shape[-2] tile_h_full = tile_h * self.latent_compression_ratio else: tile_h = tile_h_full // self.latent_compression_ratio if tile_w_full == -1: tile_w = x.shape[-1] tile_w_full = tile_w * self.latent_compression_ratio else: tile_w = tile_w_full // self.latent_compression_ratio #tile_h = tile_h_full // self.latent_compression_ratio #tile_w = tile_w_full // self.latent_compression_ratio self.tile_cnt += 1 #if len(self.tile_sizes) == 1 and self.tile_cnt % 2 == 1: # tile_h, tile_w = tile_w, tile_h # tile_h_full, tile_w_full = tile_w_full, tile_h_full if (self.tile_cnt // len(self.tile_sizes)) % 2 == 1 and self.EO("tiles_autorotate"): tile_h, tile_w = tile_w, tile_h tile_h_full, tile_w_full = tile_w_full, tile_h_full xt_negative = self.model.inner_model.conds.get('xt_negative', self.model.inner_model.conds.get('negative')) negative_control = xt_negative[0].get('control') if negative_control is not None and hasattr(negative_control, 'cond_hint_original'): negative_cond_hint_init = negative_control.cond_hint.clone() if negative_control.cond_hint is not None else None xt_positive = self.model.inner_model.conds.get('xt_positive', self.model.inner_model.conds.get('positive')) positive_control = xt_positive[0].get('control') if positive_control is not None and hasattr(positive_control, 'cond_hint_original'): positive_cond_hint_init = positive_control.cond_hint.clone() if positive_control.cond_hint is not None else None if positive_control.cond_hint_original.shape[-1] != x.shape[-2] * self.latent_compression_ratio or positive_control.cond_hint_original.shape[-2] != x.shape[-1] * self.latent_compression_ratio: positive_control_pretile = comfy.utils.bislerp(positive_control.cond_hint_original.clone().to(torch.float16).to('cuda'), x.shape[-1] * self.latent_compression_ratio, x.shape[-2] * self.latent_compression_ratio) positive_control.cond_hint_original = positive_control_pretile.to(positive_control.cond_hint_original) positive_control_pretile = positive_control.cond_hint_original.clone().to(torch.float16).to('cuda') control_tiles, control_orig_shape, control_grid, control_strides = tile_latent(positive_control_pretile, tile_size=(tile_h_full,tile_w_full)) control_tiles = control_tiles denoised_tiles = [] tiles, orig_shape, grid, strides = tile_latent(x, tile_size=(tile_h,tile_w)) if y0_style_pos is not None: y0_style_pos_tiles, _, _, _ = tile_latent(y0_style_pos, tile_size=(tile_h,tile_w)) if y0_style_neg is not None: y0_style_neg_tiles, _, _, _ = tile_latent(y0_style_neg, tile_size=(tile_h,tile_w)) for i in range(tiles.shape[0]): tile = tiles[i].unsqueeze(0) self.extra_args['model_options']['transformer_options']['x_tmp'] = tile if control_tiles is not None: positive_control.cond_hint = control_tiles[i].unsqueeze(0).to(positive_control.cond_hint) if negative_control is not None: negative_control.cond_hint = control_tiles[i].unsqueeze(0).to(positive_control.cond_hint) if y0_style_pos is not None: self.extra_args['model_options']['transformer_options']['y0_style_pos'] = y0_style_pos_tiles[i].unsqueeze(0) if y0_style_neg is not None: self.extra_args['model_options']['transformer_options']['y0_style_neg'] = y0_style_neg_tiles[i].unsqueeze(0) denoised_tile = self.model(tile, sigma * s_in, **extra_args) denoised_tiles.append(denoised_tile) denoised_tiles = torch.cat(denoised_tiles, dim=0) denoised = untile_latent(denoised_tiles, orig_shape, grid, strides) else: denoised = self.model(x, sigma * s_in, **extra_args) if control_tiles is not None: positive_control.cond_hint = positive_cond_hint_init if negative_control is not None: negative_control.cond_hint = negative_cond_hint_init if y0_style_pos is not None: self.extra_args['model_options']['transformer_options']['y0_style_pos'] = y0_style_pos if y0_style_neg is not None: self.extra_args['model_options']['transformer_options']['y0_style_neg'] = y0_style_neg denoised = self.calc_cfg_channelwise(denoised) return denoised def update_transformer_options(self, transformer_options : Optional[dict] = None, ): self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options) return def set_coeff(self, rk_type : str, h : Tensor, c1 : float = 0.0, c2 : float = 0.5, c3 : float = 1.0, step : int = 0, sigmas : Optional[Tensor] = None, sigma_down : Optional[Tensor] = None, ) -> None: self.rk_type = rk_type self.IMPLICIT = rk_type in get_implicit_sampler_name_list(nameOnly=True) self.EXPONENTIAL = RK_Method_Beta.is_exponential(rk_type) sigma = sigmas[step] sigma_next = sigmas[step+1] h_prev = [] a, b, u, v, ci, multistep_stages, hybrid_stages, FSAL = get_rk_methods_beta(rk_type, h, c1, c2, c3, h_prev, step, sigmas, sigma, sigma_next, sigma_down, self.extra_options, ) self.multistep_stages = multistep_stages self.hybrid_stages = hybrid_stages self.A = torch.tensor(a, dtype=h.dtype, device=h.device) self.B = torch.tensor(b, dtype=h.dtype, device=h.device) self.C = torch.tensor(ci, dtype=h.dtype, device=h.device) self.U = torch.tensor(u, dtype=h.dtype, device=h.device) if u is not None else None self.V = torch.tensor(v, dtype=h.dtype, device=h.device) if v is not None else None self.rows = self.A.shape[0] self.cols = self.A.shape[1] self.row_offset = 1 if not self.IMPLICIT and self.A[0].sum() == 0 else 0 if self.IMPLICIT and self.reorder_tableau_indices[0] != -1: self.reorder_tableau(self.reorder_tableau_indices) def reorder_tableau(self, indices:list[int]) -> None: #if indices[0]: self.A = self.A [indices] self.B[0] = self.B[0][indices] self.C = self.C [indices] self.C = torch.cat((self.C, self.C[-1:])) return def update_substep(self, x_0 : Tensor, x_ : Tensor, eps_ : Tensor, eps_prev_ : Tensor, row : int, row_offset : int, h_new : Tensor, h_new_orig : Tensor, lying_eps_row_factor : float = 1.0, sigma : Optional[Tensor] = None, ) -> Tensor: if row < self.rows - row_offset and self.multistep_stages == 0: row_tmp_offset = row + row_offset else: row_tmp_offset = row + 1 #zr_base = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_) # TODO: why unused? if self.SYNC_SUBSTEP_MEAN_CW and lying_eps_row_factor != 1.0: zr_orig = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_) x_orig_row = x_0 + h_new * zr_orig #eps_row = eps_ [row].clone() #eps_prev_row = eps_prev_[row].clone() eps_ [row] *= lying_eps_row_factor eps_prev_[row] *= lying_eps_row_factor if self.EO("exp2lin_override"): zr = self.zum2(row+row_offset+self.multistep_stages, eps_, eps_prev_, h_new, sigma) x_[row_tmp_offset] = x_0 + zr else: zr = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_) x_[row_tmp_offset] = x_0 + h_new * zr if self.SYNC_SUBSTEP_MEAN_CW and lying_eps_row_factor != 1.0: x_[row_tmp_offset] = x_[row_tmp_offset] - x_[row_tmp_offset].mean(dim=(-2,-1), keepdim=True) + x_orig_row.mean(dim=(-2,-1), keepdim=True) #eps_ [row] = eps_row #eps_prev_[row] = eps_prev_row if (self.SYNC_SUBSTEP_MEAN_CW and h_new != h_new_orig) or self.EO("sync_mean_noise"): if not self.EO("disable_sync_mean_noise"): x_row_down = x_0 + h_new_orig * zr x_[row_tmp_offset] = x_[row_tmp_offset] - x_[row_tmp_offset].mean(dim=(-2,-1), keepdim=True) + x_row_down.mean(dim=(-2,-1), keepdim=True) return x_ def zum2(self, row:int, k:Tensor, k_prev:Tensor=None, h_new:Tensor=None, sigma:Tensor=None) -> Tensor: if row < self.rows: return self.a_k_einsum2(row, k, h_new, sigma) else: row = row - self.rows return self.b_k_einsum2(row, k, h_new, sigma) def a_k_einsum2(self, row:int, k:Tensor, h:Tensor, sigma:Tensor) -> Tensor: return torch.einsum('i,j,k,i... -> ...', self.A[row], h.unsqueeze(0), -sigma.unsqueeze(0), k[:self.cols]) def b_k_einsum2(self, row:int, k:Tensor, h:Tensor, sigma:Tensor) -> Tensor: return torch.einsum('i,j,k,i... -> ...', self.B[row], h.unsqueeze(0), -sigma.unsqueeze(0), k[:self.cols]) def a_k_einsum(self, row:int, k :Tensor) -> Tensor: return torch.einsum('i, i... -> ...', self.A[row], k[:self.cols]) def b_k_einsum(self, row:int, k :Tensor) -> Tensor: return torch.einsum('i, i... -> ...', self.B[row], k[:self.cols]) def u_k_einsum(self, row:int, k_prev:Tensor) -> Tensor: return torch.einsum('i, i... -> ...', self.U[row], k_prev[:self.cols]) if (self.U is not None and k_prev is not None) else 0 def v_k_einsum(self, row:int, k_prev:Tensor) -> Tensor: return torch.einsum('i, i... -> ...', self.V[row], k_prev[:self.cols]) if (self.V is not None and k_prev is not None) else 0 def zum(self, row:int, k:Tensor, k_prev:Tensor=None,) -> Tensor: if row < self.rows: return self.a_k_einsum(row, k) + self.u_k_einsum(row, k_prev) else: row = row - self.rows return self.b_k_einsum(row, k) + self.v_k_einsum(row, k_prev) def zum_tableau(self, k:Tensor, k_prev:Tensor=None,) -> Tensor: a_k_sum = torch.einsum('ij, j... -> i...', self.A, k[:self.cols]) u_k_sum = torch.einsum('ij, j... -> i...', self.U, k_prev[:self.cols]) if (self.U is not None and k_prev is not None) else 0 return a_k_sum + u_k_sum def get_x(self, data:Tensor, noise:Tensor, sigma:Tensor): if self.VE_MODEL: return data + sigma * noise else: return (self.sigma_max - sigma) * data + sigma * noise def init_cfg_channelwise(self, x:Tensor, cfg_cw:float=1.0, **extra_args) -> Dict[str, Any]: self.uncond = [torch.full_like(x, 0.0)] self.cfg_cw = cfg_cw if cfg_cw != 1.0: def post_cfg_function(args): self.uncond[0] = args["uncond_denoised"] return args["denoised"] model_options = extra_args.get("model_options", {}).copy() extra_args["model_options"] = comfy.model_patcher.set_model_options_post_cfg_function(model_options, post_cfg_function, disable_cfg1_optimization=True) return extra_args def calc_cfg_channelwise(self, denoised:Tensor) -> Tensor: if self.cfg_cw != 1.0: avg = 0 for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])): avg += torch.norm(denoised[b][c] - self.uncond[0][b][c]) avg /= denoised.shape[1] for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])): ratio = torch.nan_to_num(torch.norm(denoised[b][c] - self.uncond[0][b][c]) / avg, 0) denoised_new = self.uncond[0] + ratio * self.cfg_cw * (denoised - self.uncond[0]) return denoised_new else: return denoised @staticmethod def calculate_res_2m_step( x_0 : Tensor, denoised_ : Tensor, sigma_down : Tensor, sigmas : Tensor, step : int, ) -> Tuple[Tensor, Tensor]: if denoised_[2].sum() == 0: return None, None sigma = sigmas[step] sigma_prev = sigmas[step-1] h_prev = -torch.log(sigma/sigma_prev) h = -torch.log(sigma_down/sigma) c1 = 0 c2 = (-h_prev / h).item() ci = [c1,c2] φ = Phi(h, ci, analytic_solution=True) b2 = φ(2)/c2 b1 = φ(1) - b2 eps_2 = denoised_[1] - x_0 eps_1 = denoised_[0] - x_0 h_a_k_sum = h * (b1 * eps_1 + b2 * eps_2) x = torch.exp(-h) * x_0 + h_a_k_sum denoised = x_0 + (sigma / (sigma - sigma_down)) * h_a_k_sum return x, denoised @staticmethod def calculate_res_3m_step( x_0 : Tensor, denoised_ : Tensor, sigma_down : Tensor, sigmas : Tensor, step : int, ) -> Tuple[Tensor, Tensor]: if denoised_[3].sum() == 0: return None, None sigma = sigmas[step] sigma_prev = sigmas[step-1] sigma_prev2 = sigmas[step-2] h = -torch.log(sigma_down/sigma) h_prev = -torch.log(sigma/sigma_prev) h_prev2 = -torch.log(sigma/sigma_prev2) c1 = 0 c2 = (-h_prev / h).item() c3 = (-h_prev2 / h).item() ci = [c1,c2,c3] φ = Phi(h, ci, analytic_solution=True) gamma = (3*(c3**3) - 2*c3) / (c2*(2 - 3*c2)) b3 = (1 / (gamma * c2 + c3)) * φ(2, -h) b2 = gamma * b3 b1 = φ(1, -h) - b2 - b3 eps_3 = denoised_[2] - x_0 eps_2 = denoised_[1] - x_0 eps_1 = denoised_[0] - x_0 h_a_k_sum = h * (b1 * eps_1 + b2 * eps_2 + b3 * eps_3) x = torch.exp(-h) * x_0 + h_a_k_sum denoised = x_0 + (sigma / (sigma - sigma_down)) * h_a_k_sum return x, denoised def swap_rk_type_at_step_or_threshold(self, x_0 : Tensor, data_prev_ : Tensor, NS, sigmas : Tensor, step : Tensor, rk_swap_step : int, rk_swap_threshold : float, rk_swap_type : str, rk_swap_print : bool, ) -> str: if rk_swap_type == "": if self.EXPONENTIAL: rk_swap_type = "res_3m" else: rk_swap_type = "deis_3m" if step > rk_swap_step and self.rk_type != rk_swap_type: RESplain("Switching rk_type to:", rk_swap_type) self.rk_type = rk_swap_type if RK_Method_Beta.is_exponential(rk_swap_type): self.__class__ = RK_Method_Exponential else: self.__class__ = RK_Method_Linear if rk_swap_type in get_implicit_sampler_name_list(nameOnly=True): self.IMPLICIT = True self.row_offset = 0 NS.row_offset = 0 else: self.IMPLICIT = False self.row_offset = 1 NS.row_offset = 1 NS.h_fn = self.h_fn NS.t_fn = self.t_fn NS.sigma_fn = self.sigma_fn if step > 2 and sigmas[step+1] > 0 and self.rk_type != rk_swap_type and rk_swap_threshold > 0: x_res_2m, denoised_res_2m = self.calculate_res_2m_step(x_0, data_prev_, NS.sigma_down, sigmas, step) x_res_3m, denoised_res_3m = self.calculate_res_3m_step(x_0, data_prev_, NS.sigma_down, sigmas, step) if denoised_res_2m is not None: if rk_swap_print: RESplain("res_3m - res_2m:", torch.norm(denoised_res_3m - denoised_res_2m).item()) if rk_swap_threshold > torch.norm(denoised_res_2m - denoised_res_3m): RESplain("Switching rk_type to:", rk_swap_type, "at step:", step) self.rk_type = rk_swap_type if RK_Method_Beta.is_exponential(rk_swap_type): self.__class__ = RK_Method_Exponential else: self.__class__ = RK_Method_Linear if rk_swap_type in get_implicit_sampler_name_list(nameOnly=True): self.IMPLICIT = True self.row_offset = 0 NS.row_offset = 0 else: self.IMPLICIT = False self.row_offset = 1 NS.row_offset = 1 NS.h_fn = self.h_fn NS.t_fn = self.t_fn NS.sigma_fn = self.sigma_fn return self.rk_type def bong_iter(self, x_0 : Tensor, x_ : Tensor, eps_ : Tensor, eps_prev_ : Tensor, data_ : Tensor, sigma : Tensor, s_ : Tensor, row : int, row_offset: int, h : Tensor, step : int, step_sched: int, BONGMATH_Y : bool = False, y0_bongflow : Optional[Tensor] = None, noise_sync: Optional[Tensor] = None, eps_x_ : Optional[Tensor] = None, eps_y_ : Optional[Tensor] = None, #eps_x2y_ : Optional[Tensor] = None, data_x_ : Optional[Tensor] = None, data_y_ : Optional[Tensor] = None, #yt_ : Optional[Tensor] = None, #yt_0 : Optional[Tensor] = None, LG = None, ) -> Tuple[Tensor, Tensor, Tensor]: if x_0.ndim == 4: norm_dim = (-2,-1) elif x_0.ndim == 5: norm_dim = (-4,-2,-1) if BONGMATH_Y: lgw_mask_, lgw_mask_inv_ = LG.get_masks_for_step(step_sched) lgw_mask_sync_, lgw_mask_sync_inv_ = LG.get_masks_for_step(step_sched, lgw_type="sync") weight_mask = lgw_mask_+lgw_mask_inv_ if LG.SYNC_SEPARATE: sync_mask = lgw_mask_sync_+lgw_mask_sync_inv_ else: sync_mask = 1. if self.EO("bong_start_step", 0) > step or step > self.EO("bong_stop_step", 10000) or (self.unsample_bongmath == False and s_[-1] > s_[0]): return x_0, x_, eps_ bong_iter_max_row = self.rows - row_offset if self.EO("bong_iter_max_row_full"): bong_iter_max_row = self.rows if self.EO("bong_iter_lock_x_0_ch_means"): x_0_ch_means = x_0.mean(dim=norm_dim, keepdim=True) if self.EO("bong_iter_lock_x_row_ch_means"): x_row_means = [] for rr in range(row+row_offset): x_row_mean = x_[rr].mean(dim=norm_dim, keepdim=True) x_row_means.append(x_row_mean) if row < bong_iter_max_row and self.multistep_stages == 0: bong_strength = self.EO("bong_strength", 1.0) if bong_strength != 1.0: x_0_tmp = x_0 .clone() x_tmp_ = x_ .clone() eps_tmp_ = eps_.clone() for i in range(100): #bongmath for eps_prev_ not implemented? x_0 = x_[row+row_offset] - h * self.zum(row+row_offset, eps_, eps_prev_) if self.EO("bong_iter_lock_x_0_ch_means"): x_0 = x_0 - x_0.mean(dim=norm_dim, keepdim=True) + x_0_ch_means for rr in range(row+row_offset): x_[rr] = x_0 + h * self.zum(rr, eps_, eps_prev_) if self.EO("bong_iter_lock_x_row_ch_means"): for rr in range(row+row_offset): x_[rr] = x_[rr] - x_[rr].mean(dim=norm_dim, keepdim=True) + x_row_means[rr] for rr in range(row+row_offset): if self.EO("zonkytar"): #eps_[rr] = self.get_unsample_epsilon(x_[rr], x_0, data_[rr], sigma, s_[rr]) eps_[rr] = self.get_epsilon(x_[rr], x_0, data_[rr], sigma, s_[rr]) else: if BONGMATH_Y and not self.EO("disable_bongmath_y"): if self.EXPONENTIAL: eps_x_ = data_x_ - x_0 eps_x2y_ = data_y_ - x_0 if self.VE_MODEL: eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (-eps_y_+sigma*(-noise_sync)) if self.EO("sync_x2y"): eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (-eps_x2y_+sigma*(-noise_sync)) else: eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (-eps_y_+sigma*(y0_bongflow-noise_sync)) if self.EO("sync_x2y"): eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (-eps_x2y_+sigma*(y0_bongflow-noise_sync)) else: eps_x_ [:s_.shape[0]] = (x_[:s_.shape[0]] - data_x_[:s_.shape[0]]) / s_.view(-1,1,1,1,1) # or should it be vs x_0??? eps_x2y_ = torch.zeros_like(eps_x_) eps_x2y_[:s_.shape[0]] = (x_[:s_.shape[0]] - data_y_[:s_.shape[0]]) / s_.view(-1,1,1,1,1) # or should it be vs x_0??? if self.VE_MODEL: eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (noise_sync-eps_y_) if self.EO("sync_x2y"): eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (noise_sync-eps_x2y_) else: eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (noise_sync-eps_y_-y0_bongflow) if self.EO("sync_x2y"): eps_ = sync_mask * eps_x_ + (1-sync_mask) * eps_x2y_ + weight_mask * (noise_sync-eps_x2y_-y0_bongflow) else: eps_[rr] = self.get_epsilon(x_0, x_[rr], data_[rr], sigma, s_[rr]) if bong_strength != 1.0: x_0 = x_0_tmp + bong_strength * (x_0 - x_0_tmp) x_ = x_tmp_ + bong_strength * (x_ - x_tmp_) eps_ = eps_tmp_ + bong_strength * (eps_ - eps_tmp_) return x_0, x_, eps_ #, yt_0, yt_ def newton_iter(self, x_0 : Tensor, x_ : Tensor, eps_ : Tensor, eps_prev_ : Tensor, data_ : Tensor, s_ : Tensor, row : int, h : Tensor, sigmas : Tensor, step : int, newton_name: str, SYNC_GUIDE_ACTIVE: bool, ) -> Tuple[Tensor, Tensor]: if SYNC_GUIDE_ACTIVE: return x_, eps_ newton_iter_name = "newton_iter_" + newton_name default_anchor_x_all = False if newton_name == "lying": default_anchor_x_all = True newton_iter = self.EO(newton_iter_name, 100) newton_iter_skip_last_steps = self.EO(newton_iter_name + "_skip_last_steps", 0) newton_iter_mixing_rate = self.EO(newton_iter_name + "_mixing_rate", 1.0) newton_iter_anchor = self.EO(newton_iter_name + "_anchor", 0) newton_iter_anchor_x_all = self.EO(newton_iter_name + "_anchor_x_all", default_anchor_x_all) newton_iter_type = self.EO(newton_iter_name + "_type", "from_epsilon") newton_iter_sequence = self.EO(newton_iter_name + "_sequence", "double") row_b_offset = 0 if self.EO(newton_iter_name + "_include_row_b"): row_b_offset = 1 if step >= len(sigmas)-1-newton_iter_skip_last_steps or sigmas[step+1] == 0 or not self.IMPLICIT: return x_, eps_ sigma = sigmas[step] start, stop = 0, self.rows+row_b_offset if newton_name == "pre": start = row elif newton_name == "post": start = row + 1 if newton_iter_anchor >= 0: eps_anchor = eps_[newton_iter_anchor].clone() if newton_iter_anchor_x_all: x_orig_ = x_.clone() for n_iter in range(newton_iter): for r in range(start, stop): if newton_iter_anchor >= 0: eps_[newton_iter_anchor] = eps_anchor.clone() if newton_iter_anchor_x_all: x_ = x_orig_.clone() x_tmp, eps_tmp = x_[r].clone(), eps_[r].clone() seq_start, seq_stop = r, r+1 if newton_iter_sequence == "double": seq_start, seq_stop = start, stop for r_ in range(seq_start, seq_stop): x_[r_] = x_0 + h * self.zum(r_, eps_, eps_prev_) for r_ in range(seq_start, seq_stop): if newton_iter_type == "from_data": data_[r_] = get_data_from_step(x_0, x_[r_], sigma, s_[r_]) eps_ [r_] = self.get_epsilon(x_0, x_[r_], data_[r_], sigma, s_[r_]) elif newton_iter_type == "from_step": eps_ [r_] = get_epsilon_from_step(x_0, x_[r_], sigma, s_[r_]) elif newton_iter_type == "from_alt": eps_ [r_] = x_0/sigma - x_[r_]/s_[r_] elif newton_iter_type == "from_epsilon": eps_ [r_] = self.get_epsilon(x_0, x_[r_], data_[r_], sigma, s_[r_]) if self.EO(newton_iter_name + "_opt"): opt_timing, opt_type, opt_subtype = self.EO(newton_iter_name+"_opt", [str]) opt_start, opt_stop = 0, self.rows+row_b_offset if opt_timing == "early": opt_stop = row + 1 elif opt_timing == "late": opt_start = row + 1 for r2 in range(opt_start, opt_stop): if r_ != r2: if opt_subtype == "a": eps_a = eps_[r2] eps_b = eps_[r_] elif opt_subtype == "b": eps_a = eps_[r_] eps_b = eps_[r2] if opt_type == "ortho": eps_ [r_] = get_orthogonal(eps_a, eps_b) elif opt_type == "collin": eps_ [r_] = get_collinear (eps_a, eps_b) elif opt_type == "proj": eps_ [r_] = get_collinear (eps_a, eps_b) + get_orthogonal(eps_b, eps_a) x_ [r_] = x_tmp + newton_iter_mixing_rate * (x_ [r_] - x_tmp) eps_[r_] = eps_tmp + newton_iter_mixing_rate * (eps_[r_] - eps_tmp) if newton_iter_sequence == "double": break return x_, eps_ class RK_Method_Exponential(RK_Method_Beta): def __init__(self, model, rk_type : str, VE_MODEL : bool, noise_anchor : float, noise_boost_normalize : bool, model_device : str = 'cuda', work_device : str = 'cpu', dtype : torch.dtype = torch.float64, extra_options : str = "", ): super().__init__(model, rk_type, VE_MODEL, noise_anchor, noise_boost_normalize, model_device = model_device, work_device = work_device, dtype = dtype, extra_options = extra_options, ) @staticmethod def alpha_fn(neg_h:Tensor) -> Tensor: return torch.exp(neg_h) @staticmethod def sigma_fn(t:Tensor) -> Tensor: #return 1/(torch.exp(-t)+1) return t.neg().exp() @staticmethod def t_fn(sigma:Tensor) -> Tensor: #return -torch.log((1.-sigma)/sigma) return sigma.log().neg() @staticmethod def h_fn(sigma_down:Tensor, sigma:Tensor) -> Tensor: #return (-torch.log((1.-sigma_down)/sigma_down)) - (-torch.log((1.-sigma)/sigma)) return -torch.log(sigma_down/sigma) def __call__(self, x : Tensor, sub_sigma : Tensor, x_0 : Optional[Tensor] = None, sigma : Optional[Tensor] = None, transformer_options : Optional[dict] = None, ) -> Tuple[Tensor, Tensor]: x_0 = x if x_0 is None else x_0 sigma = sub_sigma if sigma is None else sigma if transformer_options is not None: self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options) denoised = self.model_denoised(x.to(self.model_device), sub_sigma.to(self.model_device), **self.extra_args).to(sigma.device) eps_anchored = (x_0 - denoised) / sigma eps_unmoored = (x - denoised) / sub_sigma eps = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored) denoised = x_0 - sigma * eps epsilon = denoised - x_0 #epsilon = denoised - x if self.EO("exp2lin_override"): epsilon = (x_0 - denoised) / sigma return epsilon, denoised def get_eps(self, *args): if len(args) == 3: x, denoised, sigma = args return denoised - x elif len(args) == 5: x_0, x, denoised, sigma, sub_sigma = args eps_anchored = (x_0 - denoised) / sigma eps_unmoored = (x - denoised) / sub_sigma eps = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored) denoised = x_0 - sigma * eps eps_out = denoised - x_0 if self.EO("exp2lin_override"): eps_out = (x_0 - denoised) / sigma return eps_out else: raise ValueError(f"get_eps expected 3 or 5 arguments, got {len(args)}") def get_epsilon(self, x_0 : Tensor, x : Tensor, denoised : Tensor, sigma : Tensor, sub_sigma : Tensor, ) -> Tensor: eps_anchored = (x_0 - denoised) / sigma eps_unmoored = (x - denoised) / sub_sigma eps = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored) denoised = x_0 - sigma * eps if self.EO("exp2lin_override"): return (x_0 - denoised) / sigma else: return denoised - x_0 def get_epsilon_anchored(self, x_0:Tensor, denoised:Tensor, sigma:Tensor) -> Tensor: return denoised - x_0 def get_guide_epsilon(self, x_0 : Tensor, x : Tensor, y : Tensor, sigma : Tensor, sigma_cur : Tensor, sigma_down : Optional[Tensor] = None, epsilon_scale : Optional[Tensor] = None, ) -> Tensor: sigma_cur = epsilon_scale if epsilon_scale is not None else sigma_cur if sigma_down > sigma: eps_unmoored = (sigma_cur/(self.sigma_max - sigma_cur)) * (x - y) else: eps_unmoored = y - x if self.EO("manually_anchor_unsampler"): if sigma_down > sigma: eps_anchored = (sigma /(self.sigma_max - sigma)) * (x_0 - y) else: eps_anchored = y - x_0 eps_guide = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored) else: eps_guide = eps_unmoored return eps_guide class RK_Method_Linear(RK_Method_Beta): def __init__(self, model, rk_type : str, VE_MODEL : bool, noise_anchor : float, noise_boost_normalize : bool, model_device : str = 'cuda', work_device : str = 'cpu', dtype : torch.dtype = torch.float64, extra_options : str = "", ): super().__init__(model, rk_type, VE_MODEL, noise_anchor, noise_boost_normalize, model_device = model_device, work_device = work_device, dtype = dtype, extra_options = extra_options, ) @staticmethod def alpha_fn(neg_h:Tensor) -> Tensor: return torch.ones_like(neg_h) @staticmethod def sigma_fn(t:Tensor) -> Tensor: return t @staticmethod def t_fn(sigma:Tensor) -> Tensor: return sigma @staticmethod def h_fn(sigma_down:Tensor, sigma:Tensor) -> Tensor: return sigma_down - sigma def __call__(self, x : Tensor, sub_sigma : Tensor, x_0 : Optional[Tensor] = None, sigma : Optional[Tensor] = None, transformer_options : Optional[dict] = None, ) -> Tuple[Tensor, Tensor]: x_0 = x if x_0 is None else x_0 sigma = sub_sigma if sigma is None else sigma if transformer_options is not None: self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options) denoised = self.model_denoised(x.to(self.model_device), sub_sigma.to(self.model_device), **self.extra_args).to(sigma.device) epsilon_anchor = (x_0 - denoised) / sigma epsilon_unmoored = (x - denoised) / sub_sigma epsilon = epsilon_unmoored + self.LINEAR_ANCHOR_X_0 * (epsilon_anchor - epsilon_unmoored) return epsilon, denoised def get_eps(self, *args): if len(args) == 3: x, denoised, sigma = args return (x - denoised) / sigma elif len(args == 5): x_0, x, denoised, sigma, sub_sigma = args eps_anchor = (x_0 - denoised) / sigma eps_unmoored = (x - denoised) / sub_sigma return eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchor - eps_unmoored) else: raise ValueError(f"get_eps expected 3 or 5 arguments, got {len(args)}") def get_epsilon(self, x_0 : Tensor, x : Tensor, denoised : Tensor, sigma : Tensor, sub_sigma : Tensor, ) -> Tensor: eps_anchor = (x_0 - denoised) / sigma eps_unmoored = (x - denoised) / sub_sigma return eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchor - eps_unmoored) def get_epsilon_anchored(self, x_0:Tensor, denoised:Tensor, sigma:Tensor) -> Tensor: return (x_0 - denoised) / sigma def get_guide_epsilon(self, x_0 : Tensor, x : Tensor, y : Tensor, sigma : Tensor, sigma_cur : Tensor, sigma_down : Optional[Tensor] = None, epsilon_scale : Optional[Tensor] = None, ) -> Tensor: if sigma_down > sigma: sigma_ratio = self.sigma_max - sigma_cur.clone() else: sigma_ratio = sigma_cur.clone() sigma_ratio = epsilon_scale if epsilon_scale is not None else sigma_ratio if sigma_down is None: return (x - y) / sigma_ratio else: if sigma_down > sigma: return (y - x) / sigma_ratio else: return (x - y) / sigma_ratio """ if EO("bong2m") and RK.multistep_stages > 0 and step < len(sigmas)-4: h_no_eta = -torch.log(sigmas[step+1]/sigmas[step]) h_prev1_no_eta = -torch.log(sigmas[step] /sigmas[step-1]) c2_prev = (-h_prev1_no_eta / h_no_eta).item() eps_prev = denoised_data_prev - x_0 φ = Phi(h_prev, [0.,c2_prev]) a2_1 = c2_prev * φ(1,2) for i in range(100): x_prev = x_0 - h_prev * (a2_1 * eps_prev) eps_prev = denoised_data_prev - x_prev eps_[1] = eps_prev if EO("bong3m") and RK.multistep_stages > 0 and step < len(sigmas)-10: h_no_eta = -torch.log(sigmas[step+1]/sigmas[step]) h_prev1_no_eta = -torch.log(sigmas[step] /sigmas[step-1]) h_prev2_no_eta = -torch.log(sigmas[step] /sigmas[step-2]) c2_prev = (-h_prev1_no_eta / h_no_eta).item() c3_prev = (-h_prev2_no_eta / h_no_eta).item() eps_prev2 = denoised_data_prev2 - x_0 eps_prev = denoised_data_prev - x_0 φ = Phi(h_prev1_no_eta, [0.,c2_prev, c3_prev]) a2_1 = c2_prev * φ(1,2) for i in range(100): x_prev = x_0 - h_prev1_no_eta * (a2_1 * eps_prev) eps_prev = denoised_data_prev2 - x_prev eps_[1] = eps_prev φ = Phi(h_prev2_no_eta, [0.,c3_prev, c3_prev]) def calculate_gamma(c2_prev, c3_prev): return (3*(c3_prev**3) - 2*c3_prev) / (c2_prev*(2 - 3*c2_prev)) gamma = calculate_gamma(c2_prev, c3_prev) a2_1 = c2_prev * φ(1,2) a3_2 = gamma * c2_prev * φ(2,2) + (c3_prev ** 2 / c2_prev) * φ(2, 3) a3_1 = c3_prev * φ(1,3) - a3_2 for i in range(100): x_prev2 = x_0 - h_prev2_no_eta * (a3_1 * eps_prev + a3_2 * eps_prev2) x_prev = x_prev2 + h_prev2_no_eta * (a2_1 * eps_prev) eps_prev2 = denoised_data_prev - x_prev2 eps_prev = denoised_data_prev2 - x_prev eps_[2] = eps_prev2 """ ================================================ FILE: beta/rk_noise_sampler_beta.py ================================================ import torch from torch import Tensor from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar if TYPE_CHECKING: from .rk_method_beta import RK_Method_Exponential, RK_Method_Linear import comfy.model_patcher import comfy.supported_models from .noise_classes import NOISE_GENERATOR_CLASSES, NOISE_GENERATOR_CLASSES_SIMPLE from .constants import MAX_STEPS from ..helper import ExtraOptions, has_nested_attr from ..latents import normalize_zscore, get_orthogonal, get_collinear from ..res4lyf import RESplain NOISE_MODE_NAMES = ["none", #"hard_sq", "hard", "lorentzian", "soft", "soft-linear", "softer", "eps", "sinusoidal", "exp", "vpsde", "er4", "hard_var", ] def get_data_from_step(x, x_next, sigma, sigma_next): # assumes 100% linear trajectory h = sigma_next - sigma return (sigma_next * x - sigma * x_next) / h def get_epsilon_from_step(x, x_next, sigma, sigma_next): h = sigma_next - sigma return (x - x_next) / h class RK_NoiseSampler: def __init__(self, RK : Union["RK_Method_Exponential", "RK_Method_Linear"], model, step : int=0, device : str='cuda', dtype : torch.dtype=torch.float64, extra_options : str="" ): self.device = device self.dtype = dtype self.model = model if has_nested_attr(model, "inner_model.inner_model.model_sampling"): model_sampling = model.inner_model.inner_model.model_sampling elif has_nested_attr(model, "model.model_sampling"): model_sampling = model.model.model_sampling self.sigma_max = model_sampling.sigma_max.to(dtype=self.dtype, device=self.device) self.sigma_min = model_sampling.sigma_min.to(dtype=self.dtype, device=self.device) self.sigma_fn = RK.sigma_fn self.t_fn = RK.t_fn self.h_fn = RK.h_fn self.row_offset = 1 if not RK.IMPLICIT else 0 self.step = step self.noise_sampler = None self.noise_sampler2 = None self.noise_mode_sde = None self.noise_mode_sde_substep = None self.LOCK_H_SCALE = True self.CONST = isinstance(model_sampling, comfy.model_sampling.CONST) self.VARIANCE_PRESERVING = isinstance(model_sampling, comfy.model_sampling.CONST) self.extra_options = extra_options self.EO = ExtraOptions(extra_options) self.DOWN_SUBSTEP = self.EO("down_substep") self.DOWN_STEP = self.EO("down_step") self.init_noise = None def init_noise_samplers(self, x : Tensor, noise_seed : int, noise_seed_substep : int, noise_sampler_type : str, noise_sampler_type2 : str, noise_mode_sde : str, noise_mode_sde_substep : str, overshoot_mode : str, overshoot_mode_substep : str, noise_boost_step : float, noise_boost_substep : float, alpha : float, alpha2 : float, k : float = 1.0, k2 : float = 1.0, scale : float = 0.1, scale2 : float = 0.1, last_rng = None, last_rng_substep = None, ) -> None: self.noise_sampler_type = noise_sampler_type self.noise_sampler_type2 = noise_sampler_type2 self.noise_mode_sde = noise_mode_sde self.noise_mode_sde_substep = noise_mode_sde_substep self.overshoot_mode = overshoot_mode self.overshoot_mode_substep = overshoot_mode_substep self.noise_boost_step = noise_boost_step self.noise_boost_substep = noise_boost_substep self.s_in = x.new_ones([1], dtype=self.dtype, device=self.device) if noise_seed < 0 and last_rng is None: seed = torch.initial_seed()+1 RESplain("SDE noise seed: ", seed, " (set via torch.initial_seed()+1)", debug=True) if noise_seed < 0 and last_rng is not None: seed = torch.initial_seed() RESplain("SDE noise seed: ", seed, " (set via torch.initial_seed())", debug=True) else: seed = noise_seed RESplain("SDE noise seed: ", seed, debug=True) #seed2 = seed + MAX_STEPS #for substep noise generation. offset needed to ensure seeds are not reused if noise_sampler_type == "fractal": self.noise_sampler = NOISE_GENERATOR_CLASSES.get(noise_sampler_type )(x=x, seed=seed, sigma_min=self.sigma_min, sigma_max=self.sigma_max) self.noise_sampler.alpha = alpha self.noise_sampler.k = k self.noise_sampler.scale = scale if noise_sampler_type2 == "fractal": self.noise_sampler2 = NOISE_GENERATOR_CLASSES.get(noise_sampler_type2)(x=x, seed=noise_seed_substep, sigma_min=self.sigma_min, sigma_max=self.sigma_max) self.noise_sampler2.alpha = alpha2 self.noise_sampler2.k = k2 self.noise_sampler2.scale = scale2 else: self.noise_sampler = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_sampler_type )(x=x, seed=seed, sigma_min=self.sigma_min, sigma_max=self.sigma_max) self.noise_sampler2 = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_sampler_type2)(x=x, seed=noise_seed_substep, sigma_min=self.sigma_min, sigma_max=self.sigma_max) if last_rng is not None: self.noise_sampler .generator.set_state(last_rng) self.noise_sampler2.generator.set_state(last_rng_substep) def set_substep_list(self, RK:Union["RK_Method_Exponential", "RK_Method_Linear"]) -> None: self.multistep_stages = RK.multistep_stages self.rows = RK.rows self.C = RK.C self.s_ = self.sigma_fn(self.t_fn(self.sigma) + self.h * self.C) def get_substep_list(self, RK:Union["RK_Method_Exponential", "RK_Method_Linear"], sigma, h) -> None: s_ = RK.sigma_fn(RK.t_fn(sigma) + h * RK.C) return s_ def get_sde_coeff(self, sigma_next:Tensor, sigma_down:Tensor=None, sigma_up:Tensor=None, eta:float=0.0, VP_OVERRIDE=None) -> Tuple[Tensor,Tensor,Tensor]: VARIANCE_PRESERVING = VP_OVERRIDE if VP_OVERRIDE is not None else self.VARIANCE_PRESERVING if VARIANCE_PRESERVING: if sigma_down is not None: alpha_ratio = (1 - sigma_next) / (1 - sigma_down) sigma_up = (sigma_next ** 2 - sigma_down ** 2 * alpha_ratio ** 2) ** 0.5 elif sigma_up is not None: if sigma_up >= sigma_next: RESplain("Maximum VPSDE noise level exceeded: falling back to hard noise mode.", debug=True) if eta >= 1: sigma_up = sigma_next * 0.9999 #avoid sqrt(neg_num) later else: sigma_up = sigma_next * eta if VP_OVERRIDE is not None: sigma_signal = 1 - sigma_next else: sigma_signal = self.sigma_max - sigma_next sigma_residual = (sigma_next ** 2 - sigma_up ** 2) ** .5 alpha_ratio = sigma_signal + sigma_residual sigma_down = sigma_residual / alpha_ratio else: alpha_ratio = torch.ones_like(sigma_next) if sigma_down is not None: sigma_up = (sigma_next ** 2 - sigma_down ** 2) ** .5 # not sure this is correct #TODO: CHECK THIS elif sigma_up is not None: sigma_down = (sigma_next ** 2 - sigma_up ** 2) ** .5 return alpha_ratio, sigma_down, sigma_up def set_sde_step(self, sigma:Tensor, sigma_next:Tensor, eta:float, overshoot:float, s_noise:float) -> None: self.sigma_0 = sigma self.sigma_next = sigma_next self.s_noise = s_noise self.eta = eta self.overshoot = overshoot self.sigma_up_eta, self.sigma_eta, self.sigma_down_eta, self.alpha_ratio_eta \ = self.get_sde_step(sigma, sigma_next, eta, self.noise_mode_sde, self.DOWN_STEP, SUBSTEP=False) self.sigma_up, self.sigma, self.sigma_down, self.alpha_ratio \ = self.get_sde_step(sigma, sigma_next, overshoot, self.overshoot_mode, self.DOWN_STEP, SUBSTEP=False) self.h = self.h_fn(self.sigma_down, self.sigma) self.h_no_eta = self.h_fn(self.sigma_next, self.sigma) self.h = self.h + self.noise_boost_step * (self.h_no_eta - self.h) def set_sde_substep(self, row : int, multistep_stages : int, eta_substep : float, overshoot_substep : float, s_noise_substep : float, full_iter : int = 0, diag_iter : int = 0, implicit_steps_full : int = 0, implicit_steps_diag : int = 0 ) -> None: # start with stepsizes for no overshoot/noise addition/noise swapping self.sub_sigma_up_eta = self.sub_sigma_up = 0.0 self.sub_sigma_eta = self.sub_sigma = self.s_[row] self.sub_sigma_down_eta = self.sub_sigma_down = self.sub_sigma_next = self.s_[row+self.row_offset+multistep_stages] self.sub_alpha_ratio_eta = self.sub_alpha_ratio = 1.0 self.s_noise_substep = s_noise_substep self.eta_substep = eta_substep self.overshoot_substep = overshoot_substep if row < self.rows and self.s_[row+self.row_offset+multistep_stages] > 0: if diag_iter > 0 and diag_iter == implicit_steps_diag and self.EO("implicit_substep_skip_final_eta"): pass elif diag_iter > 0 and self.EO("implicit_substep_only_first_eta"): pass elif full_iter > 0 and full_iter == implicit_steps_full and self.EO("implicit_step_skip_final_eta"): pass elif full_iter > 0 and self.EO("implicit_step_only_first_eta"): pass elif (full_iter > 0 or diag_iter > 0) and self.noise_sampler_type2 == "brownian": pass # brownian noise does not increment its seed when generated, deactivate on implicit repeats to avoid burn elif full_iter > 0 and self.EO("implicit_step_only_first_all_eta"): self.sigma_down_eta = self.sigma_next self.sigma_up_eta *= 0 self.alpha_ratio_eta /= self.alpha_ratio_eta self.sigma_down = self.sigma_next self.sigma_up *= 0 self.alpha_ratio /= self.alpha_ratio self.h_new = self.h = self.h_no_eta elif (row < self.rows-self.row_offset-multistep_stages or diag_iter < implicit_steps_diag) or self.EO("substep_eta_use_final"): self.sub_sigma_up, self.sub_sigma, self.sub_sigma_down, self.sub_alpha_ratio = self.get_sde_substep(sigma = self.s_[row], sigma_next = self.s_[row+self.row_offset+multistep_stages], eta = overshoot_substep, noise_mode_override = self.overshoot_mode_substep, DOWN = self.DOWN_SUBSTEP) self.sub_sigma_up_eta, self.sub_sigma_eta, self.sub_sigma_down_eta, self.sub_alpha_ratio_eta = self.get_sde_substep(sigma = self.s_[row], sigma_next = self.s_[row+self.row_offset+multistep_stages], eta = eta_substep, noise_mode_override = self.noise_mode_sde_substep, DOWN = self.DOWN_SUBSTEP) if self.h_fn(self.sub_sigma_next, self.sigma) != 0: self.h_new = self.h * self.h_fn(self.sub_sigma_down, self.sigma) / self.h_fn(self.sub_sigma_next, self.sigma) self.h_eta = self.h * self.h_fn(self.sub_sigma_down_eta, self.sigma) / self.h_fn(self.sub_sigma_next, self.sigma) self.h_new_orig = self.h_new.clone() self.h_new = self.h_new + self.noise_boost_substep * (self.h - self.h_eta) else: self.h_new = self.h_eta = self.h self.h_new_orig = self.h_new.clone() def get_sde_substep(self, sigma :Tensor, sigma_next :Tensor, eta :float = 0.0 , noise_mode_override :Optional[str] = None , DOWN :bool = False, ) -> Tuple[Tensor,Tensor,Tensor,Tensor]: return self.get_sde_step(sigma=sigma, sigma_next=sigma_next, eta=eta, noise_mode_override=noise_mode_override, DOWN=DOWN, SUBSTEP=True,) def get_sde_step(self, sigma :Tensor, sigma_next :Tensor, eta :float = 0.0 , noise_mode_override :Optional[str] = None , DOWN :bool = False, SUBSTEP :bool = False, VP_OVERRIDE = None, ) -> Tuple[Tensor,Tensor,Tensor,Tensor]: VARIANCE_PRESERVING = VP_OVERRIDE if VP_OVERRIDE is not None else self.VARIANCE_PRESERVING if noise_mode_override is not None: noise_mode = noise_mode_override elif SUBSTEP: noise_mode = self.noise_mode_sde_substep else: noise_mode = self.noise_mode_sde if DOWN: #calculates noise level by first scaling sigma_down from sigma_next, instead of sigma_up from sigma_next eta_fn = lambda eta_scale: 1-eta_scale sud_fn = lambda sd: (sd, None) else: eta_fn = lambda eta_scale: eta_scale sud_fn = lambda su: (None, su) su, sd, sud = None, None, None eta_ratio = None sigma_base = sigma_next sigmax = self.sigma_max if VP_OVERRIDE is None else 1 match noise_mode: case "hard": eta_ratio = eta case "exp": h = -(sigma_next/sigma).log() eta_ratio = (1 - (-2*eta*h).exp())**.5 case "soft": eta_ratio = 1-(1 - eta) + eta * ((sigma_next) / sigma) case "softer": eta_ratio = 1-torch.sqrt(1 - (eta**2 * (sigma**2 - sigma_next**2)) / sigma**2) case "soft-linear": eta_ratio = 1-eta * (sigma_next - sigma) case "sinusoidal": eta_ratio = eta * torch.sin(torch.pi * (sigma_next / sigmax)) ** 2 case "eps": eta_ratio = eta * torch.sqrt((sigma_next/sigma) ** 2 * (sigma ** 2 - sigma_next ** 2) ) case "lorentzian": eta_ratio = eta alpha = 1 / ((sigma_next.to(sigma.dtype))**2 + 1) sigma_base = ((1 - alpha) ** 0.5).to(sigma.dtype) case "hard_var": sigma_var = (-1 + torch.sqrt(1 + 4 * sigma)) / 2 if sigma_next > sigma_var: eta_ratio = 0 sigma_base = sigma_next else: eta_ratio = eta sigma_base = torch.sqrt((sigma - sigma_next).abs() + 1e-10) case "hard_sq": sigma_hat = sigma * (1 + eta) su = (sigma_hat ** 2 - sigma ** 2) ** .5 #su if VARIANCE_PRESERVING: alpha_ratio, sd, su = self.get_sde_coeff(sigma_next, None, su, eta, VARIANCE_PRESERVING) else: sd = sigma_next sigma = sigma_hat alpha_ratio = torch.ones_like(sigma) case "vpsde": alpha_ratio, sd, su = self.get_vpsde_step_RF(sigma, sigma_next, eta) case "er4": #def noise_scaler(sigma): # return sigma * ((sigma ** 0.3).exp() + 10.0) noise_scaler = lambda sigma: sigma * ((sigma ** eta).exp() + 10.0) alpha_ratio = noise_scaler(sigma_next) / noise_scaler(sigma) sigma_up = (sigma_next ** 2 - sigma ** 2 * alpha_ratio ** 2) ** 0.5 eta_ratio = sigma_up / sigma_next if eta_ratio is not None: sud = sigma_base * eta_fn(eta_ratio) alpha_ratio, sd, su = self.get_sde_coeff(sigma_next, *sud_fn(sud), eta, VARIANCE_PRESERVING) su = torch.nan_to_num(su, 0.0) sd = torch.nan_to_num(sd, float(sigma_next)) alpha_ratio = torch.nan_to_num(alpha_ratio, 1.0) return su, sigma, sd, alpha_ratio def get_vpsde_step_RF(self, sigma:Tensor, sigma_next:Tensor, eta:float) -> Tuple[Tensor,Tensor,Tensor]: dt = sigma - sigma_next sigma_up = eta * sigma * dt**0.5 alpha_ratio = 1 - dt * (eta**2/4) * (1 + sigma) sigma_down = sigma_next - (eta/4)*sigma*(1-sigma)*(sigma - sigma_next) return sigma_up, sigma_down, alpha_ratio def linear_noise_init(self, y:Tensor, sigma_curr:Tensor, x_base:Optional[Tensor]=None, x_curr:Optional[Tensor]=None, mask:Optional[Tensor]=None) -> Tensor: y_noised = (self.sigma_max - sigma_curr) * y + sigma_curr * self.init_noise if x_curr is not None: x_curr = x_curr + sigma_curr * (self.init_noise - y) x_base = x_base + self.sigma * (self.init_noise - y) return y_noised, x_base, x_curr if mask is not None: y_noised = mask * y_noised + (1-mask) * y return y_noised def linear_noise_step(self, y:Tensor, sigma_curr:Optional[Tensor]=None, x_base:Optional[Tensor]=None, x_curr:Optional[Tensor]=None, brownian_sigma:Optional[Tensor]=None, brownian_sigma_next:Optional[Tensor]=None, mask:Optional[Tensor]=None) -> Tensor: if self.sigma_up_eta == 0 or self.sigma_next == 0: return y, x_base, x_curr sigma_curr = self.sub_sigma if sigma_curr is None else sigma_curr brownian_sigma = sigma_curr if brownian_sigma is None else brownian_sigma brownian_sigma_next = self.sigma_next.clone() if brownian_sigma_next is None else brownian_sigma_next if brownian_sigma == brownian_sigma_next: brownian_sigma_next *= 0.999 if brownian_sigma_next > brownian_sigma and not self.EO("disable_brownian_swap"): # should this really be done? brownian_sigma, brownian_sigma_next = brownian_sigma_next, brownian_sigma noise = self.noise_sampler(sigma=brownian_sigma, sigma_next=brownian_sigma_next) noise = normalize_zscore(noise, channelwise=True, inplace=True) y_noised = (self.sigma_max - sigma_curr) * y + sigma_curr * noise if x_curr is not None: x_curr = x_curr + sigma_curr * (noise - y) x_base = x_base + self.sigma * (noise - y) return y_noised, x_base, x_curr if mask is not None: y_noised = mask * y_noised + (1-mask) * y return y_noised def linear_noise_substep(self, y:Tensor, sigma_curr:Optional[Tensor]=None, x_base:Optional[Tensor]=None, x_curr:Optional[Tensor]=None, brownian_sigma:Optional[Tensor]=None, brownian_sigma_next:Optional[Tensor]=None, mask:Optional[Tensor]=None) -> Tensor: if self.sub_sigma_up_eta == 0 or self.sub_sigma_next == 0: return y, x_base, x_curr sigma_curr = self.sub_sigma if sigma_curr is None else sigma_curr brownian_sigma = sigma_curr if brownian_sigma is None else brownian_sigma brownian_sigma_next = self.sub_sigma_next.clone() if brownian_sigma_next is None else brownian_sigma_next if brownian_sigma == brownian_sigma_next: brownian_sigma_next *= 0.999 if brownian_sigma_next > brownian_sigma and not self.EO("disable_brownian_swap"): # should this really be done? brownian_sigma, brownian_sigma_next = brownian_sigma_next, brownian_sigma noise = self.noise_sampler2(sigma=brownian_sigma, sigma_next=brownian_sigma_next) noise = normalize_zscore(noise, channelwise=True, inplace=True) y_noised = (self.sigma_max - sigma_curr) * y + sigma_curr * noise if x_curr is not None: x_curr = x_curr + sigma_curr * (noise - y) x_base = x_base + self.sigma * (noise - y) return y_noised, x_base, x_curr if mask is not None: y_noised = mask * y_noised + (1-mask) * y return y_noised def swap_noise_step(self, x_0:Tensor, x_next:Tensor, brownian_sigma:Optional[Tensor]=None, brownian_sigma_next:Optional[Tensor]=None, mask:Optional[Tensor]=None) -> Tensor: if self.sigma_up_eta == 0 or self.sigma_next == 0: return x_next brownian_sigma = self.sigma.clone() if brownian_sigma is None else brownian_sigma brownian_sigma_next = self.sigma_next.clone() if brownian_sigma_next is None else brownian_sigma_next if brownian_sigma == brownian_sigma_next: brownian_sigma_next *= 0.999 eps_next = (x_0 - x_next) / (self.sigma - self.sigma_next) denoised_next = x_0 - self.sigma * eps_next if brownian_sigma_next > brownian_sigma and not self.EO("disable_brownian_swap"): # should this really be done? brownian_sigma, brownian_sigma_next = brownian_sigma_next, brownian_sigma noise = self.noise_sampler(sigma=brownian_sigma, sigma_next=brownian_sigma_next) noise = normalize_zscore(noise, channelwise=True, inplace=True) x_noised = self.alpha_ratio_eta * (denoised_next + self.sigma_down_eta * eps_next) + self.sigma_up_eta * noise * self.s_noise if mask is not None: x = mask * x_noised + (1-mask) * x_next else: x = x_noised return x def swap_noise_substep(self, x_0:Tensor, x_next:Tensor, brownian_sigma:Optional[Tensor]=None, brownian_sigma_next:Optional[Tensor]=None, mask:Optional[Tensor]=None, guide:Optional[Tensor]=None) -> Tensor: if self.sub_sigma_up_eta == 0 or self.sub_sigma_next == 0: return x_next brownian_sigma = self.sub_sigma.clone() if brownian_sigma is None else brownian_sigma brownian_sigma_next = self.sub_sigma_next.clone() if brownian_sigma_next is None else brownian_sigma_next if brownian_sigma == brownian_sigma_next: brownian_sigma_next *= 0.999 eps_next = (x_0 - x_next) / (self.sigma - self.sub_sigma_next) denoised_next = x_0 - self.sigma * eps_next if brownian_sigma_next > brownian_sigma and not self.EO("disable_brownian_swap"): # should this really be done? brownian_sigma, brownian_sigma_next = brownian_sigma_next, brownian_sigma noise = self.noise_sampler2(sigma=brownian_sigma, sigma_next=brownian_sigma_next) noise = normalize_zscore(noise, channelwise=True, inplace=True) x_noised = self.sub_alpha_ratio_eta * (denoised_next + self.sub_sigma_down_eta * eps_next) + self.sub_sigma_up_eta * noise * self.s_noise_substep if mask is not None: x = mask * x_noised + (1-mask) * x_next else: x = x_noised return x def swap_noise_inv_substep(self, x_0:Tensor, x_next:Tensor, eta_substep:float, row:int, row_offset_multistep_stages:int, brownian_sigma:Optional[Tensor]=None, brownian_sigma_next:Optional[Tensor]=None, mask:Optional[Tensor]=None, guide:Optional[Tensor]=None) -> Tensor: if self.sub_sigma_up_eta == 0 or self.sub_sigma_next == 0: return x_next brownian_sigma = self.sub_sigma.clone() if brownian_sigma is None else brownian_sigma brownian_sigma_next = self.sub_sigma_next.clone() if brownian_sigma_next is None else brownian_sigma_next if brownian_sigma == brownian_sigma_next: brownian_sigma_next *= 0.999 eps_next = (x_0 - x_next) / ((1-self.sigma) - (1-self.sub_sigma_next)) denoised_next = x_0 - (1-self.sigma) * eps_next if brownian_sigma_next > brownian_sigma and not self.EO("disable_brownian_swap"): # should this really be done? brownian_sigma, brownian_sigma_next = brownian_sigma_next, brownian_sigma noise = self.noise_sampler2(sigma=brownian_sigma, sigma_next=brownian_sigma_next) noise = normalize_zscore(noise, channelwise=True, inplace=True) sub_sigma_up, sub_sigma, sub_sigma_down, sub_alpha_ratio = self.get_sde_substep(sigma = 1-self.s_[row], sigma_next = 1-self.s_[row_offset_multistep_stages], eta = eta_substep, noise_mode_override = self.noise_mode_sde_substep, DOWN = self.DOWN_SUBSTEP) x_noised = sub_alpha_ratio * (denoised_next + sub_sigma_down * eps_next) + sub_sigma_up * noise * self.s_noise_substep if mask is not None: x = mask * x_noised + (1-mask) * x_next else: x = x_noised return x def swap_noise(self, x_0 :Tensor, x_next :Tensor, sigma_0 :Tensor, sigma :Tensor, sigma_next :Tensor, sigma_down :Tensor, sigma_up :Tensor, alpha_ratio :Tensor, s_noise :float, SUBSTEP :bool = False, brownian_sigma :Optional[Tensor] = None, brownian_sigma_next :Optional[Tensor] = None, ) -> Tensor: if sigma_up == 0: return x_next if brownian_sigma is None: brownian_sigma = sigma.clone() if brownian_sigma_next is None: brownian_sigma_next = sigma_next.clone() if sigma_next == 0: return x_next if brownian_sigma == brownian_sigma_next: brownian_sigma_next *= 0.999 eps_next = (x_0 - x_next) / (sigma_0 - sigma_next) denoised_next = x_0 - sigma_0 * eps_next if brownian_sigma_next > brownian_sigma: s_tmp = brownian_sigma brownian_sigma = brownian_sigma_next brownian_sigma_next = s_tmp if not SUBSTEP: noise = self.noise_sampler(sigma=brownian_sigma, sigma_next=brownian_sigma_next) else: noise = self.noise_sampler2(sigma=brownian_sigma, sigma_next=brownian_sigma_next) noise = normalize_zscore(noise, channelwise=True, inplace=True) x = alpha_ratio * (denoised_next + sigma_down * eps_next) + sigma_up * noise * s_noise return x # not used. WARNING: some parameters have a different order than swap_noise! def add_noise_pre(self, x_0 :Tensor, x :Tensor, sigma_up :Tensor, sigma_0 :Tensor, sigma :Tensor, sigma_next :Tensor, real_sigma_down :Tensor, alpha_ratio :Tensor, s_noise :float, noise_mode :str, SDE_NOISE_EXTERNAL :bool = False, sde_noise_t :Optional[Tensor] = None, SUBSTEP :bool = False, ) -> Tensor: if not self.CONST and noise_mode == "hard_sq": if self.LOCK_H_SCALE: x = self.swap_noise(x_0 = x_0, x = x, sigma = sigma, sigma_0 = sigma_0, sigma_next = sigma_next, real_sigma_down = real_sigma_down, sigma_up = sigma_up, alpha_ratio = alpha_ratio, s_noise = s_noise, SUBSTEP = SUBSTEP, ) else: x = self.add_noise( x = x, sigma_up = sigma_up, sigma = sigma, sigma_next = sigma_next, alpha_ratio = alpha_ratio, s_noise = s_noise, SDE_NOISE_EXTERNAL = SDE_NOISE_EXTERNAL, sde_noise_t = sde_noise_t, SUBSTEP = SUBSTEP, ) return x # only used for handle_tiled_etc_noise_steps() in rk_guide_func_beta.py def add_noise_post(self, x_0 :Tensor, x :Tensor, sigma_up :Tensor, sigma_0 :Tensor, sigma :Tensor, sigma_next :Tensor, real_sigma_down :Tensor, alpha_ratio :Tensor, s_noise :float, noise_mode :str, SDE_NOISE_EXTERNAL :bool = False, sde_noise_t :Optional[Tensor] = None, SUBSTEP :bool = False, ) -> Tensor: if self.CONST or (not self.CONST and noise_mode != "hard_sq"): if self.LOCK_H_SCALE: x = self.swap_noise(x_0 = x_0, x = x, sigma = sigma, sigma_0 = sigma_0, sigma_next = sigma_next, real_sigma_down = real_sigma_down, sigma_up = sigma_up, alpha_ratio = alpha_ratio, s_noise = s_noise, SUBSTEP = SUBSTEP, ) else: x = self.add_noise( x = x, sigma_up = sigma_up, sigma = sigma, sigma_next = sigma_next, alpha_ratio = alpha_ratio, s_noise = s_noise, SDE_NOISE_EXTERNAL = SDE_NOISE_EXTERNAL, sde_noise_t = sde_noise_t, SUBSTEP = SUBSTEP, ) return x def add_noise(self, x :Tensor, sigma_up :Tensor, sigma :Tensor, sigma_next :Tensor, alpha_ratio :Tensor, s_noise :float, SDE_NOISE_EXTERNAL :bool = False, sde_noise_t :Optional[Tensor] = None, SUBSTEP :bool = False, ) -> Tensor: if sigma_next > 0.0 and sigma_up > 0.0: if sigma_next > sigma: sigma, sigma_next = sigma_next, sigma if sigma == sigma_next: sigma_next = sigma * 0.9999 if not SUBSTEP: noise = self.noise_sampler (sigma=sigma, sigma_next=sigma_next) else: noise = self.noise_sampler2(sigma=sigma, sigma_next=sigma_next) #noise_ortho = get_orthogonal(noise, x) #noise_ortho = noise_ortho / noise_ortho.std()model, noise = normalize_zscore(noise, channelwise=True, inplace=True) if SDE_NOISE_EXTERNAL: noise = (1-s_noise) * noise + s_noise * sde_noise_t x_next = alpha_ratio * x + noise * sigma_up * s_noise return x_next else: return x def sigma_from_to(self, x_0 : Tensor, x_down : Tensor, sigma : Tensor, sigma_down : Tensor, sigma_next : Tensor) -> Tensor: #sigma, sigma_from, sigma_to eps = (x_0 - x_down) / (sigma - sigma_down) denoised = x_0 - sigma * eps x_next = denoised + sigma_next * eps # VESDE vs VPSDE equiv.? return x_next def rebound_overshoot_step(self, x_0:Tensor, x:Tensor) -> Tensor: eps = (x_0 - x) / (self.sigma - self.sigma_down) denoised = x_0 - self.sigma * eps x = denoised + self.sigma_next * eps return x def rebound_overshoot_substep(self, x_0:Tensor, x:Tensor) -> Tensor: if self.sigma - self.sub_sigma_down > 0: sub_eps = (x_0 - x) / (self.sigma - self.sub_sigma_down) sub_denoised = x_0 - self.sigma * sub_eps x = sub_denoised + self.sub_sigma_next * sub_eps return x def prepare_sigmas(self, sigmas : Tensor, sigmas_override : Tensor, d_noise : float, d_noise_start_step : int, sampler_mode : str) -> Tuple[Tensor,bool]: #SIGMA_MIN = torch.full_like(self.sigma_min, 0.00227896) if self.sigma_min < 0.00227896 else self.sigma_min # prevent black image with unsampling flux, which has a sigma_min of 0.0002 SIGMA_MIN = self.sigma_min #torch.full_like(self.sigma_min, max(0.01, self.sigma_min.item())) if sigmas_override is not None: sigmas = sigmas_override.clone().to(sigmas.device).to(sigmas.dtype) if d_noise_start_step == 0: sigmas = sigmas.clone() * d_noise UNSAMPLE_FROM_ZERO = False if sigmas[0] == 0.0: #remove padding used to prevent comfy from adding noise to the latent (for unsampling, etc.) UNSAMPLE = True if sigmas[-1] == 0.0: UNSAMPLE_FROM_ZERO = True #sigmas = sigmas[1:-1] # was cleaving off 1.0 at the end when restart looping sigmas = sigmas[1:] if sigmas[-1] == 0.0: sigmas = sigmas[:-1] else: UNSAMPLE = False if hasattr(self.model, "sigmas"): self.model.sigmas = sigmas if sampler_mode == "standard": UNSAMPLE = False consecutive_duplicate_mask = torch.cat((torch.tensor([True], device=sigmas.device), torch.diff(sigmas) != 0)) sigmas = sigmas[consecutive_duplicate_mask] if sigmas[-1] == 0: if sigmas[-2] < SIGMA_MIN: sigmas[-2] = SIGMA_MIN elif (sigmas[-2] - SIGMA_MIN).abs() > 1e-4: sigmas = torch.cat((sigmas[:-1], SIGMA_MIN.unsqueeze(0), sigmas[-1:])) elif UNSAMPLE_FROM_ZERO and not torch.isclose(sigmas[0], SIGMA_MIN): sigmas = torch.cat([SIGMA_MIN.unsqueeze(0), sigmas]) self.sigmas = sigmas self.UNSAMPLE = UNSAMPLE self.d_noise = d_noise self.sampler_mode = sampler_mode return sigmas, UNSAMPLE def extract_latent_swap_noise(self, x:Tensor, x_noise_swapped:Tensor, sigma:Tensor, old_noise:Tensor) -> Tensor: return (x - x_noise_swapped) / sigma + old_noise def update_latent_swap_noise(self, x:Tensor, sigma:Tensor, old_noise:Tensor, new_noise:Tensor) -> Tensor: return x + sigma * (new_noise - old_noise) ================================================ FILE: beta/rk_sampler_beta.py ================================================ import torch from torch import Tensor import torch.nn.functional as F from tqdm.auto import trange import gc from typing import Optional, Callable, Tuple, List, Dict, Any, Union import math import copy from comfy.model_sampling import EPS import comfy from ..res4lyf import RESplain from ..helper import ExtraOptions, FrameWeightsManager from ..latents import lagrange_interpolation, get_collinear, get_orthogonal, get_cosine_similarity, get_pearson_similarity, get_slerp_weight_for_cossim, get_slerp_ratio, slerp_tensor, get_edge_mask, normalize_zscore, compute_slerp_ratio_for_target, find_slerp_ratio_grid from ..style_transfer import apply_scattersort_spatial, apply_adain_spatial from .rk_method_beta import RK_Method_Beta from .rk_noise_sampler_beta import RK_NoiseSampler from .rk_guide_func_beta import LatentGuide from .phi_functions import Phi from .constants import MAX_STEPS, GUIDE_MODE_NAMES_PSEUDOIMPLICIT def init_implicit_sampling( RK : RK_Method_Beta, x_0 : Tensor, x_ : Tensor, eps_ : Tensor, eps_prev_ : Tensor, data_ : Tensor, eps : Tensor, denoised : Tensor, denoised_prev2 : Tensor, step : int, sigmas : Tensor, h : Tensor, s_ : Tensor, EO : ExtraOptions, SYNC_GUIDE_ACTIVE, ): sigma = sigmas[step] if EO("implicit_skip_model_call_at_start") and denoised.sum() + eps.sum() != 0: if denoised_prev2.sum() == 0: eps_ [0] = eps.clone() data_[0] = denoised.clone() eps_ [0] = RK.get_epsilon_anchored(x_0, denoised, sigma) else: sratio = sigma - s_[0] data_[0] = denoised + sratio * (denoised - denoised_prev2) elif EO("implicit_full_skip_model_call_at_start") and denoised.sum() + eps.sum() != 0: if denoised_prev2.sum() == 0: eps_ [0] = eps.clone() data_[0] = denoised.clone() eps_ [0] = RK.get_epsilon_anchored(x_0, denoised, sigma) else: for r in range(RK.rows): sratio = sigma - s_[r] data_[r] = denoised + sratio * (denoised - denoised_prev2) eps_ [r] = RK.get_epsilon_anchored(x_0, data_[r], s_[r]) elif EO("implicit_lagrange_skip_model_call_at_start") and denoised.sum() + eps.sum() != 0: if denoised_prev2.sum() == 0: eps_ [0] = eps.clone() data_[0] = denoised.clone() eps_ [0] = RK.get_epsilon_anchored(x_0, denoised, sigma) else: sigma_prev = sigmas[step-1] h_prev = sigma - sigma_prev w = h / h_prev substeps_prev = len(RK.C[:-1]) for r in range(RK.rows): sratio = sigma - s_[r] data_[r] = lagrange_interpolation([0,1], [denoised_prev2, denoised], 1 + w*RK.C[r]).squeeze(0) + denoised_prev2 - denoised eps_ [r] = RK.get_epsilon_anchored(x_0, data_[r], s_[r]) if EO("implicit_lagrange_skip_model_call_at_start_0_only"): for r in range(RK.rows): eps_ [r] = eps_ [0].clone() * s_[0] / s_[r] data_[r] = denoised.clone() elif EO("implicit_lagrange_init") and denoised.sum() + eps.sum() != 0: sigma_prev = sigmas[step-1] h_prev = sigma - sigma_prev w = h / h_prev substeps_prev = len(RK.C[:-1]) z_prev_ = eps_.clone() for r in range (substeps_prev): z_prev_[r] = h * RK.zum(r, eps_) # u,v not implemented for lagrange guess for implicit zi_1 = lagrange_interpolation(RK.C[:-1], z_prev_[:substeps_prev], RK.C[0]).squeeze(0) # + x_prev - x_0""" x_[0] = x_0 + zi_1 else: eps_[0], data_[0] = RK(x_[0], sigma, x_0, sigma) if not EO(("implicit_lagrange_init", "radaucycle", "implicit_full_skip_model_call_at_start", "implicit_lagrange_skip_model_call_at_start")): for r in range(RK.rows): eps_ [r] = eps_ [0].clone() * sigma / s_[r] data_[r] = data_[0].clone() x_, eps_ = RK.newton_iter(x_0, x_, eps_, eps_prev_, data_, s_, 0, h, sigmas, step, "init", SYNC_GUIDE_ACTIVE) return x_, eps_, data_ @torch.no_grad() def sample_rk_beta( model, x : Tensor, sigmas : Tensor, sigmas_override : Optional[Tensor] = None, extra_args : Optional[Tensor] = None, callback : Optional[Callable] = None, disable : bool = None, sampler_mode : str = "standard", rk_type : str = "res_2m", implicit_sampler_name : str = "use_explicit", c1 : float = 0.0, c2 : float = 0.5, c3 : float = 1.0, noise_sampler_type : str = "gaussian", noise_sampler_type_substep : str = "gaussian", noise_mode_sde : str = "hard", noise_mode_sde_substep : str = "hard", eta : float = 0.5, eta_substep : float = 0.5, noise_scaling_weight : float = 0.0, noise_scaling_type : str = "sampler", noise_scaling_mode : str = "linear", noise_scaling_eta : float = 0.0, noise_scaling_cycles : int = 1, noise_scaling_weights : Optional[Tensor] = None, noise_scaling_etas : Optional[Tensor] = None, noise_boost_step : float = 0.0, noise_boost_substep : float = 0.0, noise_boost_normalize : bool = True, noise_anchor : float = 1.0, s_noise : float = 1.0, s_noise_substep : float = 1.0, d_noise : float = 1.0, d_noise_start_step : int = 0, d_noise_inv : float = 1.0, d_noise_inv_start_step : int = 0, alpha : float = -1.0, alpha_substep : float = -1.0, k : float = 1.0, k_substep : float = 1.0, momentum : float = 0.0, overshoot_mode : str = "hard", overshoot_mode_substep : str = "hard", overshoot : float = 0.0, overshoot_substep : float = 0.0, implicit_type : str = "predictor-corrector", implicit_type_substeps : str = "predictor-corrector", implicit_steps_diag : int = 0, implicit_steps_full : int = 0, etas : Optional[Tensor] = None, etas_substep : Optional[Tensor] = None, s_noises : Optional[Tensor] = None, s_noises_substep : Optional[Tensor] = None, momentums : Optional[Tensor] = None, regional_conditioning_weights : Optional[Tensor] = None, regional_conditioning_floors : Optional[Tensor] = None, narcissism_start_step : int = 0, narcissism_end_step : int = 5, LGW_MASK_RESCALE_MIN : bool = True, guides : Optional[Tuple[Any, ...]] = None, epsilon_scales : Optional[Tensor] = None, frame_weights_mgr : Optional[FrameWeightsManager] = None, sde_noise : list [Tensor] = [], noise_seed : int = -1, noise_initial : Optional[Tensor] = None, image_initial : Optional[Tensor] = None, cfgpp : float = 0.0, cfg_cw : float = 1.0, BONGMATH : bool = True, unsample_bongmath = None, state_info : Optional[dict[str, Any]] = None, state_info_out : Optional[dict[str, Any]] = None, rk_swap_type : str = "", rk_swap_step : int = MAX_STEPS, rk_swap_threshold : float = 0.0, rk_swap_print : bool = False, steps_to_run : int = -1, start_at_step : int = -1, tile_sizes : Optional[List[Tuple[int,int]]] = None, flow_sync_eps : float = 0.0, sde_mask : Optional[Tensor] = None, batch_num : int = 0, extra_options : str = "", AttnMask = None, RegContext = None, RegParam = None, AttnMask_neg = None, RegContext_neg = None, RegParam_neg = None, ): if sampler_mode == "NULL": return x EO = ExtraOptions(extra_options) default_dtype = EO("default_dtype", torch.float64) extra_args = {} if extra_args is None else extra_args model_device = model.inner_model.inner_model.device #x.device work_device = 'cpu' if EO("work_device_cpu") else model_device state_info = {} if state_info is None else state_info state_info_out = {} if state_info_out is None else state_info_out VE_MODEL = isinstance(model.inner_model.inner_model.model_sampling, EPS) RENOISE = False if 'raw_x' in state_info and sampler_mode in {"resample", "unsample"}: if x.shape == state_info['raw_x'].shape: x = state_info['raw_x'].to(work_device) #clone() else: denoised = comfy.utils.bislerp(state_info['denoised'], x.shape[-1], x.shape[-2]) x = denoised.to(x) RENOISE = True RESplain("Continuing from raw latent from previous sampler.", debug=False) start_step = 0 if 'end_step' in state_info and (sampler_mode == "resample" or sampler_mode == "unsample"): if state_info['completed'] != True and state_info['end_step'] != 0 and state_info['end_step'] != -1 and state_info['end_step'] < len(state_info['sigmas'])-1 : #incomplete run in previous sampler node if state_info['sampler_mode'] in {"standard","resample"} and sampler_mode == "unsample" and sigmas[2] < sigmas[1]: sigmas = torch.flip(state_info['sigmas'], dims=[0]) start_step = (len(sigmas)-1) - (state_info['end_step']) #-1) #removed -1 at the end here. correct? if state_info['sampler_mode'] == "unsample" and sampler_mode == "resample" and sigmas[2] > sigmas[1]: sigmas = torch.flip(state_info['sigmas'], dims=[0]) start_step = (len(sigmas)-1) - state_info['end_step'] #-1) elif state_info['sampler_mode'] == "unsample" and sampler_mode == "resample": start_step = 0 if state_info['sampler_mode'] in {"standard", "resample"} and sampler_mode == "resample": start_step = state_info['end_step'] if state_info['end_step'] != -1 else 0 if start_step > 0: sigmas = state_info['sigmas'].clone() if sde_mask is not None: from .rk_guide_func_beta import prepare_mask sde_mask, _ = prepare_mask(x, sde_mask, LGW_MASK_RESCALE_MIN) sde_mask = sde_mask.to(x.device).to(x.dtype) x = x .to(dtype=default_dtype, device=work_device) sigmas = sigmas.to(dtype=default_dtype, device=work_device) c1 = EO("c1" , c1) c2 = EO("c2" , c2) c3 = EO("c3" , c3) cfg_cw = EO("cfg_cw" , cfg_cw) noise_seed = EO("noise_seed" , noise_seed) noise_seed_substep = EO("noise_seed_substep" , noise_seed + MAX_STEPS) pseudoimplicit_row_weights = EO("pseudoimplicit_row_weights" , [1. for _ in range(100)]) pseudoimplicit_step_weights = EO("pseudoimplicit_step_weights", [1. for _ in range(max(implicit_steps_diag, implicit_steps_full)+1)]) noise_scaling_cycles = EO("noise_scaling_cycles", 1) noise_boost_step = EO("noise_boost_step", 0.0) noise_boost_substep = EO("noise_boost_substep", 0.0) # SETUP SAMPLER if implicit_sampler_name not in ("use_explicit", "none"): rk_type = implicit_sampler_name RESplain("rk_type:", rk_type) if implicit_sampler_name == "none": implicit_steps_diag = implicit_steps_full = 0 RK = RK_Method_Beta.create(model, rk_type, VE_MODEL, noise_anchor, noise_boost_normalize, model_device=model_device, work_device=work_device, dtype=default_dtype, extra_options=extra_options) RK.extra_args = RK.init_cfg_channelwise(x, cfg_cw, **extra_args) RK.tile_sizes = tile_sizes RK.extra_args['model_options']['transformer_options']['regional_conditioning_weight'] = 0.0 RK.extra_args['model_options']['transformer_options']['regional_conditioning_floor'] = 0.0 RK.unsample_bongmath = BONGMATH if unsample_bongmath is None else unsample_bongmath # allow turning off bongmath for unsampling with cycles # SETUP SIGMAS sigmas_orig = sigmas.clone() NS = RK_NoiseSampler(RK, model, device=work_device, dtype=default_dtype, extra_options=extra_options) sigmas, UNSAMPLE = NS.prepare_sigmas(sigmas, sigmas_override, d_noise, d_noise_start_step, sampler_mode) if UNSAMPLE and sigmas_orig[0] == 0.0 and sigmas_orig[0] != sigmas[0] and sigmas[1] < sigmas[2]: sigmas = torch.cat([torch.full_like(sigmas[0], 0.0).unsqueeze(0), sigmas]) if start_step == 0: start_step = 1 else: start_step -= 1 if sampler_mode in {"resample", "unsample"}: state_info_sigma_next = state_info.get('sigma_next', -1) state_info_start_step = (sigmas == state_info_sigma_next).nonzero().flatten() if state_info_start_step.shape[0] > 0: start_step = state_info_start_step.item() start_step = start_at_step if start_at_step >= 0 else start_step SDE_NOISE_EXTERNAL = False if sde_noise is not None: if len(sde_noise) > 0 and sigmas[1] > sigmas[2]: SDE_NOISE_EXTERNAL = True sigma_up_total = torch.zeros_like(sigmas[0]) for i in range(len(sde_noise)-1): sigma_up_total += sigmas[i+1] etas = torch.full_like(sigmas, eta / sigma_up_total) if 'last_rng' in state_info and sampler_mode in {"resample", "unsample"}: last_rng = state_info['last_rng'].clone() last_rng_substep = state_info['last_rng_substep'].clone() else: last_rng = None last_rng_substep = None NS.init_noise_samplers(x, noise_seed, noise_seed_substep, noise_sampler_type, noise_sampler_type_substep, noise_mode_sde, noise_mode_sde_substep, \ overshoot_mode, overshoot_mode_substep, noise_boost_step, noise_boost_substep, alpha, alpha_substep, k, k_substep, \ last_rng=last_rng, last_rng_substep=last_rng_substep,) data_ = None eps_ = None eps = torch.zeros_like(x, dtype=default_dtype, device=work_device) denoised = torch.zeros_like(x, dtype=default_dtype, device=work_device) denoised_prev = torch.zeros_like(x, dtype=default_dtype, device=work_device) denoised_prev2 = torch.zeros_like(x, dtype=default_dtype, device=work_device) x_ = None eps_prev_ = None denoised_data_prev = None denoised_data_prev2 = None h_prev = None eps_y2x_ = None eps_x2y_ = None eps_y_ = None eps_prev_y_ = None data_y_ = None yt_ = None yt_0 = None eps_yt_ = None eps_x_ = None data_y_ = None data_x_ = None z_ = None # for tracking residual noise for model scattersort/synchronized diffusion y0_bongflow = state_info.get('y0_bongflow') y0_bongflow_orig = state_info.get('y0_bongflow_orig') noise_bongflow = state_info.get('noise_bongflow') y0_standard_guide = state_info.get('y0_standard_guide') y0_inv_standard_guide = state_info.get('y0_inv_standard_guide') data_prev_y_ = state_info.get('data_prev_y_') data_prev_x_ = state_info.get('data_prev_x_') data_prev_x2y_ = state_info.get('data_prev_x2y_') # BEGIN SAMPLING LOOP num_steps = len(sigmas[start_step:])-2 if sigmas[-1] == 0 else len(sigmas[start_step:])-1 if steps_to_run >= 0: current_steps = min(num_steps, steps_to_run) num_steps = start_step + min(num_steps, steps_to_run) else: current_steps = num_steps num_steps = start_step + num_steps #current_steps = current_steps + 1 if sigmas[-1] == 0 and steps_to_run < 0 and UNSAMPLE else current_steps INIT_SAMPLE_LOOP = True step = start_step sigma, sigma_next, data_prev_ = None, None, None if (num_steps-1) == len(sigmas)-2 and sigmas[-1] == 0 and sigmas[-2] == NS.sigma_min: progress_bar = trange(current_steps+1, disable=disable) else: progress_bar = trange(current_steps, disable=disable) # SETUP GUIDES LG = LatentGuide(model, sigmas, UNSAMPLE, VE_MODEL, LGW_MASK_RESCALE_MIN, extra_options, device=work_device, dtype=default_dtype, frame_weights_mgr=frame_weights_mgr) guide_inversion_y0 = state_info.get('guide_inversion_y0') guide_inversion_y0_inv = state_info.get('guide_inversion_y0_inv') x = LG.init_guides(x, RK.IMPLICIT, guides, NS.noise_sampler, batch_num, sigmas[step], guide_inversion_y0, guide_inversion_y0_inv) LG.y0 = y0_standard_guide if y0_standard_guide is not None else LG.y0 LG.y0_inv = y0_inv_standard_guide if y0_inv_standard_guide is not None else LG.y0_inv if (LG.mask != 1.0).any() and ((LG.y0 == 0).all() or (LG.y0_inv == 0).all()) : # and not LG.guide_mode.startswith("flow"): # (LG.y0.sum() == 0 or LG.y0_inv.sum() == 0): SKIP_PSEUDO = True RESplain("skipping pseudo...") if LG.y0 .sum() == 0: SKIP_PSEUDO_Y = "y0" elif LG.y0_inv.sum() == 0: SKIP_PSEUDO_Y = "y0_inv" else: SKIP_PSEUDO = False if guides is not None and guides.get('guide_mode', '') != "inversion" or sampler_mode != "unsample": #do not set denoised_prev to noise guide with inversion! if LG.y0.sum() != 0 and LG.y0_inv.sum() != 0: denoised_prev = LG.mask * LG.y0 + (1-LG.mask) * LG.y0_inv elif LG.y0.sum() != 0: denoised_prev = LG.y0 elif LG.y0_inv.sum() != 0: denoised_prev = LG.y0_inv data_cached = None if EO("pseudo_mix_strength"): orig_y0 = LG.y0.clone() orig_y0_inv = LG.y0_inv.clone() #gc.collect() BASE_STARTED = False INV_STARTED = False FLOW_STARTED = False FLOW_STOPPED = False noise_xt, noise_yt = None, None FLOW_RESUMED = False if state_info.get('FLOW_STARTED', False) and not state_info.get('FLOW_STOPPED', False): FLOW_RESUMED = True y0 = state_info['y0'].to(work_device) data_cached = state_info['data_cached'].to(work_device) data_x_prev_ = state_info['data_x_prev_'].to(work_device) if noise_initial is not None: x_init = noise_initial.to(x) RK.update_transformer_options({'x_init': x_init._copy() if hasattr(x_init, 'is_nested') and x_init.is_nested else x_init.clone()}) #progress_bar = trange(len(sigmas)-1-start_step, disable=disable) #if EO("eps_adain") or EO("x_init_to_model"): if AttnMask is not None: RK.update_transformer_options({'AttnMask' : AttnMask}) RK.update_transformer_options({'RegContext': RegContext}) if AttnMask_neg is not None: RK.update_transformer_options({'AttnMask_neg' : AttnMask_neg}) RK.update_transformer_options({'RegContext_neg': RegContext_neg}) if EO("y0_to_transformer_options"): RK.update_transformer_options({'y0': LG.y0.clone()}) if EO("y0_inv_to_transformer_options"): RK.update_transformer_options({'y0_inv': LG.y0_inv.clone()}) for block in model.inner_model.inner_model.diffusion_model.double_stream_blocks: for attr in ["txt_q_cache", "txt_k_cache", "txt_v_cache", "img_q_cache", "img_k_cache", "img_v_cache"]: if hasattr(block.block.attn1, attr): delattr(block.block.attn1, attr) for block in model.inner_model.inner_model.diffusion_model.single_stream_blocks: block.block.attn1.EO = EO for attr in ["txt_q_cache", "txt_k_cache", "txt_v_cache", "img_q_cache", "img_k_cache", "img_v_cache"]: if hasattr(block.block.attn1, attr): delattr(block.block.attn1, attr) RK.update_transformer_options({'ExtraOptions': copy.deepcopy(EO)}) if EO("update_cross_attn"): update_cross_attn = { 'src_llama_start': EO('src_llama_start', 0), 'src_llama_end': EO('src_llama_end', 0), 'src_t5_start': EO('src_t5_start', 0), 'src_t5_end': EO('src_t5_end', 0), 'tgt_llama_start': EO('tgt_llama_start', 0), 'tgt_llama_end': EO('tgt_llama_end', 0), 'tgt_t5_start': EO('tgt_t5_start', 0), 'tgt_t5_end': EO('tgt_t5_end', 0), 'skip_cross_attn': EO('skip_cross_attn', False), 'update_q': EO('update_q', False), 'update_k': EO('update_k', True), 'update_v': EO('update_v', True), 'lamb': EO('lamb', 0.01), 'erase': EO('erase', 10.0), } RK.update_transformer_options({'update_cross_attn': update_cross_attn}) else: RK.update_transformer_options({'update_cross_attn': None}) if LG.HAS_LATENT_GUIDE_ADAIN: RK.update_transformer_options({'blocks_adain_cache': []}) if LG.HAS_LATENT_GUIDE_ATTNINJ: RK.update_transformer_options({'blocks_attninj_cache': []}) if LG.HAS_LATENT_GUIDE_STYLE_POS: if LG.HAS_LATENT_GUIDE and y0_standard_guide is None: y0_cache = LG.y0.clone().cpu() RK.update_transformer_options({'y0_standard_guide': LG.y0}) sigmas_scheduled = sigmas.clone() # store for return in state_info_out if EO("sigma_restarts"): sigma_restarts = 1 + EO("sigma_restarts", 0) sigmas = sigmas[step:num_steps+1].repeat(sigma_restarts) step = 0 num_steps = 2 * sigma_restarts - 1 if RENOISE: # TODO: adapt for noise inversion somehow if VE_MODEL: x = x + sigmas[step] * NS.noise_sampler(sigma=sigmas[step], sigma_next=sigmas[step+1]) else: x = (1 - sigmas[step]) * x + sigmas[step] * NS.noise_sampler(sigma=sigmas[step], sigma_next=sigmas[step+1]) LG.ADAIN_NOISE_MODE = "" StyleMMDiT = None if guides is not None: RK.update_transformer_options({"freqsep_lowpass_method": guides.get("freqsep_lowpass_method")}) RK.update_transformer_options({"freqsep_sigma": guides.get("freqsep_sigma")}) RK.update_transformer_options({"freqsep_kernel_size": guides.get("freqsep_kernel_size")}) RK.update_transformer_options({"freqsep_inner_kernel_size": guides.get("freqsep_inner_kernel_size")}) RK.update_transformer_options({"freqsep_stride": guides.get("freqsep_stride")}) RK.update_transformer_options({"freqsep_lowpass_weight": guides.get("freqsep_lowpass_weight")}) RK.update_transformer_options({"freqsep_highpass_weight":guides.get("freqsep_highpass_weight")}) RK.update_transformer_options({"freqsep_mask": guides.get("freqsep_mask")}) StyleMMDiT = guides.get('StyleMMDiT') if StyleMMDiT is not None: StyleMMDiT.init_guides(model) LG.ADAIN_NOISE_MODE = StyleMMDiT.noise_mode if EO("mycoshock"): StyleMMDiT.Retrojector = model.inner_model.inner_model.diffusion_model.Retrojector image_initial_shock = StyleMMDiT.apply_data_shock(image_initial.to(x)) if VE_MODEL: x = image_initial_shock.to(x) + sigmas[0] * noise_initial.to(x) else: x = (1 - sigmas[0]) * image_initial_shock.to(x) + sigmas[0] * noise_initial.to(x) RK.update_transformer_options({"model_sampling": model.inner_model.inner_model.model_sampling}) # BEGIN SAMPLING LOOP while step < num_steps: sigma, sigma_next = sigmas[step], sigmas[step+1] if sigma_next > sigma: step_sched = torch.where(torch.flip(sigmas, dims=[0]) == sigma)[0][0].item() else: step_sched = step SYNC_GUIDE_ACTIVE = LG.guide_mode.startswith("sync") and (LG.lgw[step_sched] != 0 or LG.lgw_inv[step_sched] != 0 or LG.lgw_sync[step_sched] != 0 or LG.lgw_sync_inv[step_sched] != 0) if StyleMMDiT is not None: RK.update_transformer_options({'StyleMMDiT': StyleMMDiT}) else: if LG.HAS_LATENT_GUIDE_ADAIN: if LG.lgw_adain[step_sched] == 0.0: RK.update_transformer_options({'y0_adain': None}) RK.update_transformer_options({'blocks_adain': {}}) RK.update_transformer_options({'sort_and_scatter': {}}) else: RK.update_transformer_options({'y0_adain': LG.y0_adain.clone()}) if 'blocks_adain_mmdit' in guides: blocks_adain = { "double_weights": [val * LG.lgw_adain[step_sched] for val in guides['blocks_adain_mmdit']['double_weights']], "single_weights": [val * LG.lgw_adain[step_sched] for val in guides['blocks_adain_mmdit']['single_weights']], "double_blocks" : guides['blocks_adain_mmdit']['double_blocks'], "single_blocks" : guides['blocks_adain_mmdit']['single_blocks'], } RK.update_transformer_options({'blocks_adain': blocks_adain}) RK.update_transformer_options({'sort_and_scatter': guides['sort_and_scatter']}) RK.update_transformer_options({'noise_mode_adain': guides['sort_and_scatter']['noise_mode']}) if LG.HAS_LATENT_GUIDE_ATTNINJ: if LG.lgw_attninj[step_sched] == 0.0: RK.update_transformer_options({'y0_attninj': None}) RK.update_transformer_options({'blocks_attninj' : {}}) RK.update_transformer_options({'blocks_attninj_qkv': {}}) else: RK.update_transformer_options({'y0_attninj': LG.y0_attninj.clone()}) if 'blocks_attninj_mmdit' in guides: blocks_attninj = { "double_weights": [val * LG.lgw_attninj[step_sched] for val in guides['blocks_attninj_mmdit']['double_weights']], "single_weights": [val * LG.lgw_attninj[step_sched] for val in guides['blocks_attninj_mmdit']['single_weights']], "double_blocks" : guides['blocks_attninj_mmdit']['double_blocks'], "single_blocks" : guides['blocks_attninj_mmdit']['single_blocks'], } RK.update_transformer_options({'blocks_attninj' : blocks_attninj}) RK.update_transformer_options({'blocks_attninj_qkv': guides['blocks_attninj_qkv']}) if LG.HAS_LATENT_GUIDE_STYLE_POS: if LG.lgw_style_pos[step_sched] == 0.0: RK.update_transformer_options({'y0_style_pos': None}) RK.update_transformer_options({'y0_style_pos_weight': 0.0}) RK.update_transformer_options({'y0_style_pos_synweight': 0.0}) RK.update_transformer_options({'y0_style_pos_mask': None}) else: RK.update_transformer_options({'y0_style_pos': LG.y0_style_pos.clone()}) RK.update_transformer_options({'y0_style_pos_weight': LG.lgw_style_pos[step_sched]}) RK.update_transformer_options({'y0_style_pos_synweight': guides['synweight_style_pos']}) RK.update_transformer_options({'y0_style_pos_mask': LG.mask_style_pos}) RK.update_transformer_options({'y0_style_pos_mask_edge': guides.get('mask_edge_style_pos')}) RK.update_transformer_options({'y0_style_method': guides['style_method']}) RK.update_transformer_options({'y0_style_tile_height': guides.get('style_tile_height')}) RK.update_transformer_options({'y0_style_tile_width': guides.get('style_tile_width')}) RK.update_transformer_options({'y0_style_tile_padding': guides.get('style_tile_padding')}) if EO("style_edge_width"): RK.update_transformer #if LG.HAS_LATENT_GUIDE: # y0_cache = LG.y0.clone().cpu() # RK.update_transformer_options({'y0_standard_guide': LG.y0}) if LG.HAS_LATENT_GUIDE_INV and y0_inv_standard_guide is None: y0_inv_cache = LG.y0_inv.clone().cpu() RK.update_transformer_options({'y0_inv_standard_guide': LG.y0_inv}) if LG.HAS_LATENT_GUIDE_STYLE_NEG: if LG.lgw_style_neg[step_sched] == 0.0: RK.update_transformer_options({'y0_style_neg': None}) RK.update_transformer_options({'y0_style_neg_weight': 0.0}) RK.update_transformer_options({'y0_style_neg_synweight': 0.0}) RK.update_transformer_options({'y0_style_neg_mask': None}) else: RK.update_transformer_options({'y0_style_neg': LG.y0_style_neg.clone()}) RK.update_transformer_options({'y0_style_neg_weight': LG.lgw_style_neg[step_sched]}) RK.update_transformer_options({'y0_style_neg_synweight': guides['synweight_style_neg']}) RK.update_transformer_options({'y0_style_neg_mask': LG.mask_style_neg}) RK.update_transformer_options({'y0_style_neg_mask_edge': guides.get('mask_edge_style_neg')}) RK.update_transformer_options({'y0_style_method': guides['style_method']}) RK.update_transformer_options({'y0_style_tile_height': guides.get('style_tile_height')}) RK.update_transformer_options({'y0_style_tile_width': guides.get('style_tile_width')}) RK.update_transformer_options({'y0_style_tile_padding': guides.get('style_tile_padding')}) if AttnMask_neg is not None: RK.update_transformer_options({'regional_conditioning_weight_neg': RegParam_neg.weights[step_sched]}) RK.update_transformer_options({'regional_conditioning_floor_neg': RegParam_neg.floors[step_sched]}) if AttnMask is not None: RK.update_transformer_options({'regional_conditioning_weight': RegParam.weights[step_sched]}) RK.update_transformer_options({'regional_conditioning_floor': RegParam.floors[step_sched]}) elif regional_conditioning_weights is not None: RK.extra_args['model_options']['transformer_options']['regional_conditioning_weight'] = regional_conditioning_weights[step_sched] RK.extra_args['model_options']['transformer_options']['regional_conditioning_floor'] = regional_conditioning_floors [step_sched] epsilon_scale = float(epsilon_scales [step_sched]) if epsilon_scales is not None else None eta = etas [step_sched].to(x) if etas is not None else eta eta_substep = etas_substep [step_sched].to(x) if etas_substep is not None else eta_substep s_noise = s_noises [step_sched].to(x) if s_noises is not None else s_noise s_noise_substep = s_noises_substep [step_sched].to(x) if s_noises_substep is not None else s_noise_substep noise_scaling_eta = noise_scaling_etas [step_sched].to(x) if noise_scaling_etas is not None else noise_scaling_eta noise_scaling_weight = noise_scaling_weights[step_sched].to(x) if noise_scaling_weights is not None else noise_scaling_weight NS.set_sde_step(sigma, sigma_next, eta, overshoot, s_noise) RK.set_coeff(rk_type, NS.h, c1, c2, c3, step, sigmas, NS.sigma_down) NS.set_substep_list(RK) if (noise_scaling_eta > 0 or noise_scaling_weight != 0) and noise_scaling_type != "model_d": if noise_scaling_type == "model_alpha": VP_OVERRIDE=True else: VP_OVERRIDE=None if noise_scaling_type in {"sampler", "model", "model_alpha"}: if noise_scaling_type == "model_alpha": sigma_divisor = NS.sigma_max else: sigma_divisor = 1.0 if RK.multistep_stages > 0: # hardcoded s_[1] for multistep samplers, which are never multistage lying_su, lying_sigma, lying_sd, lying_alpha_ratio = NS.get_sde_step(NS.s_[1]/sigma_divisor, NS.s_[0]/sigma_divisor, noise_scaling_eta, noise_scaling_mode, VP_OVERRIDE=VP_OVERRIDE) else: lying_su, lying_sigma, lying_sd, lying_alpha_ratio = NS.get_sde_step(sigma/sigma_divisor, NS.sigma_down/sigma_divisor, noise_scaling_eta, noise_scaling_mode, VP_OVERRIDE=VP_OVERRIDE) for _ in range(noise_scaling_cycles-1): lying_su, lying_sigma, lying_sd, lying_alpha_ratio = NS.get_sde_step(sigma/sigma_divisor, lying_sd/sigma_divisor, noise_scaling_eta, noise_scaling_mode, VP_OVERRIDE=VP_OVERRIDE) lying_s_ = NS.get_substep_list(RK, sigma, RK.h_fn(lying_sd, lying_sigma)) lying_s_ = NS.s_ + noise_scaling_weight * (lying_s_ - NS.s_) else: lying_s_ = NS.s_.clone() rk_swap_stages = 3 if rk_swap_type != "" else 0 data_prev_len = len(data_prev_)-1 if data_prev_ is not None else 3 recycled_stages = max(rk_swap_stages, RK.multistep_stages, RK.hybrid_stages, data_prev_len) if INIT_SAMPLE_LOOP: INIT_SAMPLE_LOOP = False x_, data_, eps_, eps_prev_ = (torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) for _ in range(4)) if LG.ADAIN_NOISE_MODE == "smart": z_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) z_[0] = noise_initial.clone() RK.update_transformer_options({'z_' : z_}) if sampler_mode in {"unsample", "resample"}: data_prev_ = state_info.get('data_prev_') if data_prev_ is not None: if x.shape == state_info['raw_x'].shape: data_prev_ = state_info['data_prev_'].clone().to(dtype=default_dtype, device=work_device) else: data_prev_ = torch.stack([comfy.utils.bislerp(data_prev_item, x.shape[-1], x.shape[-2]) for data_prev_item in state_info['data_prev_']]) data_prev_ = data_prev_.to(x) else: data_prev_ = torch.zeros(4, *x.shape, dtype=default_dtype, device=work_device) # multistep max is 4m... so 4 needed else: data_prev_ = torch.zeros(4, *x.shape, dtype=default_dtype, device=work_device) # multistep max is 4m... so 4 needed recycled_stages = len(data_prev_)-1 if RK.rows+2 > x_.shape[0]: row_gap = RK.rows+2 - x_.shape[0] x_gap_, data_gap_, eps_gap_, eps_prev_gap_ = (torch.zeros(row_gap, *x.shape, dtype=default_dtype, device=work_device) for _ in range(4)) x_ = torch.cat((x_ ,x_gap_) , dim=0) data_ = torch.cat((data_ ,data_gap_) , dim=0) eps_ = torch.cat((eps_ ,eps_gap_) , dim=0) eps_prev_ = torch.cat((eps_prev_,eps_prev_gap_), dim=0) if LG.ADAIN_NOISE_MODE == "smart": z_gap_ = torch.zeros(row_gap, *x.shape, dtype=default_dtype, device=work_device) z_ = torch.cat((z_ ,z_gap_) , dim=0) RK.update_transformer_options({'z_' : z_}) sde_noise_t = None if SDE_NOISE_EXTERNAL: if step >= len(sde_noise): SDE_NOISE_EXTERNAL=False else: sde_noise_t = sde_noise[step] x_[0] = x.clone() # PRENOISE METHOD HERE! x_0 = x_[0].clone() if EO("guide_step_cutoff") or EO("guide_step_min"): x_0_orig = x_0.clone() # RECYCLE STAGES FOR MULTISTEP if RK.multistep_stages > 0 or RK.hybrid_stages > 0: if SYNC_GUIDE_ACTIVE: lgw_mask_, lgw_mask_inv_ = LG.get_masks_for_step(step) lgw_mask_sync_, lgw_mask_sync_inv_ = LG.get_masks_for_step(step, lgw_type="sync") weight_mask = lgw_mask_+lgw_mask_inv_ if LG.SYNC_SEPARATE: sync_mask = lgw_mask_sync_+lgw_mask_sync_inv_ else: sync_mask = 1. if VE_MODEL: yt_0 = y0_bongflow + sigma * noise_bongflow else: yt_0 = (1-sigma) * y0_bongflow + sigma * noise_bongflow for ms in range(min(len(data_prev_), len(eps_))): eps_x = RK.get_epsilon_anchored(x_0, data_prev_x_[ms], sigma) eps_y = RK.get_epsilon_anchored(yt_0, data_prev_y_[ms], sigma) eps_x2y = RK.get_epsilon_anchored(yt_0, data_prev_y_[ms], sigma) if RK.EXPONENTIAL: if VE_MODEL: eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + sigma*(-noise_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + sigma*(-noise_bongflow)) else: eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + sigma*(y0_bongflow-noise_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + sigma*(y0_bongflow-noise_bongflow)) else: if VE_MODEL: eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + (noise_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + (noise_bongflow)) else: eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + (noise_bongflow-y0_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + (noise_bongflow-y0_bongflow)) #if RK.EXPONENTIAL: # if VE_MODEL: # eps_[ms] = sync_mask * weight_mask_inv * (eps_x - weight_mask * eps_y) + weight_mask * sigma*(-noise_bongflow) # else: # #eps_[ms] = (lgw_mask_sync_+lgw_mask_sync_inv_) * (1-(lgw_mask_+lgw_mask_inv_)) * (eps_x - (lgw_mask_+lgw_mask_inv_) * eps_y) + (lgw_mask_+lgw_mask_inv_) * sigma*(y0_bongflow-noise_bongflow) # eps_[ms] = sync_mask * weight_mask_inv * (eps_x - weight_mask * eps_y) + weight_mask * sigma*(y0_bongflow-noise_bongflow) #else: # if VE_MODEL: # eps_[ms] = sync_mask * weight_mask_inv * (eps_x - weight_mask * eps_y) + weight_mask * (noise_bongflow) # else: # #eps_[ms] = (lgw_mask_sync_+lgw_mask_sync_inv_) * (1-(lgw_mask_+lgw_mask_inv_)) * (eps_x - (lgw_mask_+lgw_mask_inv_) * eps_y) + (lgw_mask_+lgw_mask_inv_) * (noise_bongflow-y0_bongflow) # eps_[ms] = sync_mask * weight_mask_inv * (eps_x - weight_mask * eps_y) + weight_mask * (noise_bongflow-y0_bongflow) eps_prev_ = eps_.clone() else: for ms in range(min(len(data_prev_), len(eps_))): eps_[ms] = RK.get_epsilon_anchored(x_0, data_prev_[ms], sigma) eps_prev_ = eps_.clone() # INITIALIZE IMPLICIT SAMPLING if RK.IMPLICIT: x_, eps_, data_ = init_implicit_sampling(RK, x_0, x_, eps_, eps_prev_, data_, eps, denoised, denoised_prev2, step, sigmas, NS.h, NS.s_, EO, SYNC_GUIDE_ACTIVE) implicit_steps_total = (implicit_steps_full + 1) * (implicit_steps_diag + 1) # BEGIN FULLY IMPLICIT LOOP cossim_counter = 0 adaptive_lgw = LG.lgw.clone() full_iter = 0 while full_iter < implicit_steps_full+1: if RK.IMPLICIT: x_, eps_ = RK.newton_iter(x_0, x_, eps_, eps_prev_, data_, NS.s_, 0, NS.h, sigmas, step, "init", SYNC_GUIDE_ACTIVE) # PREPARE FULLY PSEUDOIMPLICIT GUIDES if step > 0 or not SKIP_PSEUDO: if full_iter > 0 and EO("fully_implicit_reupdate_x"): x_[0] = NS.sigma_from_to(x_0, x, sigma, sigma_next, NS.s_[0]) x_0 = NS.sigma_from_to(x_0, x, sigma, sigma_next, sigma) if EO("fully_pseudo_init") and full_iter == 0: guide_mode_tmp = LG.guide_mode LG.guide_mode = "fully_" + LG.guide_mode x_0, x_, eps_ = LG.prepare_fully_pseudoimplicit_guides_substep(x_0, x_, eps_, eps_prev_, data_, denoised_prev, 0, step, step_sched, sigmas, eta_substep, overshoot_substep, s_noise_substep, \ NS, RK, pseudoimplicit_row_weights, pseudoimplicit_step_weights, full_iter, BONGMATH) if EO("fully_pseudo_init") and full_iter == 0: LG.guide_mode = guide_mode_tmp # TABLEAU LOOP for row in range(RK.rows - RK.multistep_stages - RK.row_offset + 1): diag_iter = 0 while diag_iter < implicit_steps_diag+1: if noise_sampler_type_substep == "brownian" and (full_iter > 0 or diag_iter > 0): eta_substep = 0. NS.set_sde_substep(row, RK.multistep_stages, eta_substep, overshoot_substep, s_noise_substep, full_iter, diag_iter, implicit_steps_full, implicit_steps_diag) # PRENOISE METHOD HERE! # A-TABLEAU if row < RK.rows: # PREPARE PSEUDOIMPLICIT GUIDES if step > 0 or not SKIP_PSEUDO: x_0, x_, eps_, x_row_pseudoimplicit, sub_sigma_pseudoimplicit = LG.process_pseudoimplicit_guides_substep(x_0, x_, eps_, eps_prev_, data_, denoised_prev, row, step, step_sched, sigmas, NS, RK, \ pseudoimplicit_row_weights, pseudoimplicit_step_weights, full_iter, BONGMATH) # PREPARE MODEL CALL if LG.guide_mode in GUIDE_MODE_NAMES_PSEUDOIMPLICIT and (step > 0 or not SKIP_PSEUDO) and (LG.lgw[step_sched] > 0 or LG.lgw_inv[step_sched] > 0) and x_row_pseudoimplicit is not None: x_tmp = x_row_pseudoimplicit s_tmp = sub_sigma_pseudoimplicit # Fully implicit iteration (explicit only) # or... Fully implicit iteration (implicit only... not standard) elif (full_iter > 0 and RK.row_offset == 1 and row == 0) or (full_iter > 0 and RK.row_offset == 0 and row == 0 and EO("fully_implicit_update_x")): if EO("fully_explicit_pogostick_eta"): super_alpha_ratio, super_sigma_down, super_sigma_up = NS.get_sde_coeff(sigma, sigma_next, None, eta) x = super_alpha_ratio * x + super_sigma_up * NS.noise_sampler(sigma=sigma_next, sigma_next=sigma) x_tmp = x s_tmp = sigma elif EO("enable_fully_explicit_lagrange_rebound1"): substeps_prev = len(RK.C[:-1]) x_tmp = lagrange_interpolation(RK.C[1:-1], x_[1:substeps_prev], RK.C[0]).squeeze(0) elif EO("enable_fully_explicit_lagrange_rebound2"): substeps_prev = len(RK.C[:-1]) x_tmp = lagrange_interpolation(RK.C[1:], x_[1:substeps_prev+1], RK.C[0]).squeeze(0) elif EO("enable_fully_explicit_rebound1"): # 17630, faded dots, just crap eps_tmp, denoised_tmp = RK(x, sigma_next, x, sigma_next) eps_tmp = (x - denoised_tmp) / sigma_next x_[0] = denoised_tmp + sigma * eps_tmp x_0 = x_[0] x_tmp = x_[0] s_tmp = sigma elif implicit_type == "rebound": # TODO: ADAPT REBOUND IMPLICIT TO WORK WITH FLOW GUIDE MODE eps_tmp, denoised_tmp = RK(x, sigma_next, x_0, sigma) eps_tmp = (x - denoised_tmp) / sigma_next x = denoised_tmp + sigma * eps_tmp x_tmp = x s_tmp = sigma elif implicit_type == "retro-eta" and (NS.sub_sigma_up > 0 or NS.sub_sigma_up_eta > 0): x_tmp = NS.sigma_from_to(x_0, x, sigma, sigma_next, sigma) s_tmp = sigma elif implicit_type == "bongmath" and (NS.sub_sigma_up > 0 or NS.sub_sigma_up_eta > 0): if BONGMATH: x_tmp = x_[row] s_tmp = NS.s_[row] else: x_tmp = NS.sigma_from_to(x_0, x, sigma, sigma_next, sigma) s_tmp = sigma else: x_tmp = x s_tmp = sigma_next # All others else: # three potential toggle options: force rebound/model call, force PC style, force pogostick style if diag_iter > 0: # Diagonally implicit iteration (explicit or implicit) if EO("diag_explicit_pogostick_eta"): super_alpha_ratio, super_sigma_down, super_sigma_up = NS.get_sde_coeff(NS.s_[row], NS.s_[row+RK.row_offset+RK.multistep_stages], None, eta) x_[row+RK.row_offset] = super_alpha_ratio * x_[row+RK.row_offset] + super_sigma_up * NS.noise_sampler(sigma=NS.s_[row+RK.row_offset+RK.multistep_stages], sigma_next=NS.s_[row]) x_tmp = x_[row+RK.row_offset] s_tmp = sigma elif implicit_type_substeps == "rebound": eps_[row], data_[row] = RK(x_[row+RK.row_offset], NS.s_[row+RK.row_offset+RK.multistep_stages], x_0, sigma) x_ = RK.update_substep(x_0, x_, eps_, eps_prev_, row, RK.row_offset, NS.h_new, NS.h_new_orig) x_[row+RK.row_offset] = NS.rebound_overshoot_substep(x_0, x_[row+RK.row_offset]) x_[row+RK.row_offset] = NS.sigma_from_to(x_0, x_[row+RK.row_offset], sigma, NS.s_[row+RK.row_offset+RK.multistep_stages], NS.s_[row]) x_tmp = x_[row+RK.row_offset] s_tmp = NS.s_[row] elif implicit_type_substeps == "retro-eta" and (NS.sub_sigma_up > 0 or NS.sub_sigma_up_eta > 0): x_tmp = NS.sigma_from_to(x_0, x_[row+RK.row_offset], sigma, NS.s_[row+RK.row_offset+RK.multistep_stages], NS.s_[row]) s_tmp = NS.s_[row] elif implicit_type_substeps == "bongmath" and (NS.sub_sigma_up > 0 or NS.sub_sigma_up_eta > 0) and not EO("disable_diag_explicit_bongmath_rebound"): if BONGMATH: x_tmp = x_[row] s_tmp = NS.s_[row] else: x_tmp = NS.sigma_from_to(x_0, x_[row+RK.row_offset], sigma, NS.s_[row+RK.row_offset+RK.multistep_stages], NS.s_[row]) s_tmp = NS.s_[row] else: x_tmp = x_[row+RK.row_offset] s_tmp = NS.s_[row+RK.row_offset+RK.multistep_stages] else: x_tmp = x_[row] s_tmp = NS.sub_sigma if RK.IMPLICIT: if not EO("disable_implicit_guide_preproc"): eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step_sched, sigma, sigma_next, NS.sigma_down, NS.s_, epsilon_scale, RK) eps_prev_, x_ = LG.process_guides_substep(x_0, x_, eps_prev_, data_, row, step_sched, sigma, sigma_next, NS.sigma_down, NS.s_, epsilon_scale, RK) if row == 0 and (EO("implicit_lagrange_init") or EO("radaucycle")): pass else: x_[row+RK.row_offset] = x_0 + NS.h_new * RK.zum(row+RK.row_offset, eps_, eps_prev_) x_[row+RK.row_offset] = NS.rebound_overshoot_substep(x_0, x_[row+RK.row_offset]) if row > 0: if not LG.guide_mode.startswith("flow") or (LG.lgw[step_sched] == 0 and LG.lgw[step+1] == 0 and LG.lgw_inv[step_sched] == 0 and LG.lgw_inv[step+1] == 0): x_row_tmp = NS.swap_noise_substep(x_0, x_[row+RK.row_offset], mask=sde_mask, guide=LG.y0) if LG.ADAIN_NOISE_MODE == "smart": #_smartnoise_implicit"): data_next = denoised + NS.h_new * RK.zum(row+RK.row_offset+RK.multistep_stages, data_, data_prev_) if VE_MODEL: z_[row+RK.row_offset] = (x_row_tmp - data_next) / s_tmp else: z_[row+RK.row_offset] = (x_row_tmp - (NS.sigma_max-s_tmp)*data_next) / s_tmp RK.update_transformer_options({'z_' : z_}) if SYNC_GUIDE_ACTIVE: noise_bongflow_new = (x_row_tmp - x_[row+RK.row_offset]) / s_tmp + noise_bongflow yt_[row+RK.row_offset] += s_tmp * (noise_bongflow_new - noise_bongflow) x_0 += sigma * (noise_bongflow_new - noise_bongflow) if not EO("disable_i_bong"): for i_bong in range(len(NS.s_)): x_[i_bong] += NS.s_[i_bong] * (noise_bongflow_new - noise_bongflow) noise_bongflow = noise_bongflow_new x_[row+RK.row_offset] = x_row_tmp if SYNC_GUIDE_ACTIVE: if VE_MODEL: yt_[:NS.s_.shape[0], 0] = y0_bongflow + NS.s_.view(-1, *[1]*(x.ndim-1)) * (noise_bongflow) yt_0 = y0_bongflow + sigma * (noise_bongflow) else: yt_[:NS.s_.shape[0], 0] = y0_bongflow + NS.s_.view(-1, *[1]*(x.ndim-1)) * (noise_bongflow - y0_bongflow) yt_0 = y0_bongflow + sigma * (noise_bongflow - y0_bongflow) if RK.EXPONENTIAL: eps_y_ = data_y_ - yt_0 # yt_ # watch out for fuckery with size of tableau being smaller later in a chained sampler else: if BONGMATH: eps_y_[:NS.s_.shape[0]] = (yt_[:NS.s_.shape[0]] - data_y_[:NS.s_.shape[0]]) / NS.s_.view(-1,*[1]*(x_.ndim-1)) else: eps_y_[:NS.s_.shape[0]] = (yt_0.repeat(NS.s_.shape[0], *[1]*(x_.ndim-1)) - data_y_[:NS.s_.shape[0]]) / sigma # calc exact to c0 node if not BONGMATH: if RK.EXPONENTIAL: eps_x_ = data_x_ - x_0 else: eps_x_ = (x_0 - data_x_) / sigma weight_mask = lgw_mask_+lgw_mask_inv_ if LG.SYNC_SEPARATE: sync_mask = lgw_mask_sync_+lgw_mask_sync_inv_ else: sync_mask = 1. for ms in range(len(eps_)): if RK.EXPONENTIAL: if VE_MODEL: # ZERO IS THIS # ONE IS THIS eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + sigma*(-noise_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + sigma*(-noise_bongflow)) else: eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + sigma*(y0_bongflow-noise_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + sigma*(y0_bongflow-noise_bongflow)) else: if VE_MODEL: eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + (noise_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + (noise_bongflow)) else: eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + (noise_bongflow-y0_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + (noise_bongflow-y0_bongflow)) if BONGMATH and step < sigmas.shape[0]-1 and sigma > 0.03 and not EO("disable_implicit_prebong"): BONGMATH_Y = SYNC_GUIDE_ACTIVE x_0, x_, eps_ = RK.bong_iter(x_0, x_, eps_, eps_prev_, data_, sigma, NS.s_, row, RK.row_offset, NS.h, step, step_sched, BONGMATH_Y, y0_bongflow, noise_bongflow, eps_x_, eps_y_, data_x_, data_y_, LG) # TRY WITH h_new ?? # BONGMATH_Y, y0_bongflow, noise_bongflow, eps_x_, eps_y_, eps_x2y_, data_x_, LG) # TRY WITH h_new ?? #if EO("eps_adain_smartnoise_bongmath"): if LG.ADAIN_NOISE_MODE == "smart": if VE_MODEL: z_[:NS.s_.shape[0], ...] = (x_ - data_)[:NS.s_.shape[0], ...] / NS.s_.view(-1,*[1]*(x_.ndim-1)) else: z_[:NS.s_.shape[0], ...] = (x_[:NS.s_.shape[0], ...] - (NS.sigma_max - NS.s_.view(-1,*[1]*(x_.ndim-1)))*data_[:NS.s_.shape[0], ...])[:NS.s_.shape[0], ...] / NS.s_.view(-1,*[1]*(x_.ndim-1)) RK.update_transformer_options({'z_' : z_}) x_tmp = x_[row+RK.row_offset] lying_eps_row_factor = 1.0 # MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL if RK.IMPLICIT and row == 0 and (EO("implicit_lazy_recycle_first_model_call_at_start") or EO("radaucycle") or RK.C[0] == 0.0): pass else: if s_tmp == 0: break x_, eps_ = RK.newton_iter(x_0, x_, eps_, eps_prev_, data_, NS.s_, row, NS.h, sigmas, step, "pre", SYNC_GUIDE_ACTIVE) # will this do anything? not x_tmp # DETAIL BOOST if noise_scaling_type == "model_alpha" and noise_scaling_weight != 0 and noise_scaling_eta > 0: s_tmp = s_tmp + noise_scaling_weight * (s_tmp * lying_alpha_ratio - s_tmp) if noise_scaling_type == "model" and noise_scaling_weight != 0 and noise_scaling_eta > 0: s_tmp = lying_s_[row] if RK.multistep_stages > 0: s_tmp = lying_sd # SYNC GUIDE --------------------------- if LG.guide_mode.startswith("sync") and (LG.lgw[step_sched] == 0 and LG.lgw_inv[step_sched] == 0 and LG.lgw_sync[step_sched] == 0 and LG.lgw_sync_inv[step_sched] == 0): data_cached = None elif SYNC_GUIDE_ACTIVE: lgw_mask_, lgw_mask_inv_ = LG.get_masks_for_step(step_sched) lgw_mask_sync_, lgw_mask_sync_inv_ = LG.get_masks_for_step(step_sched, lgw_type="sync") lgw_mask_drift_x_, lgw_mask_drift_x_inv_ = LG.get_masks_for_step(step_sched, lgw_type="drift_x") lgw_mask_drift_y_, lgw_mask_drift_y_inv_ = LG.get_masks_for_step(step_sched, lgw_type="drift_y") lgw_mask_lure_x_, lgw_mask_lure_x_inv_ = LG.get_masks_for_step(step_sched, lgw_type="lure_x") lgw_mask_lure_y_, lgw_mask_lure_y_inv_ = LG.get_masks_for_step(step_sched, lgw_type="lure_y") weight_mask = lgw_mask_ + lgw_mask_inv_ sync_mask = lgw_mask_sync_ + lgw_mask_sync_inv_ drift_x_mask = lgw_mask_drift_x_ + lgw_mask_drift_x_inv_ drift_y_mask = lgw_mask_drift_y_ + lgw_mask_drift_y_inv_ lure_x_mask = lgw_mask_lure_x_ + lgw_mask_lure_x_inv_ lure_y_mask = lgw_mask_lure_y_ + lgw_mask_lure_y_inv_ if eps_x_ is None: eps_x_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) data_x_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) eps_y2x_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) eps_x2y_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) eps_yt_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) eps_y_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) eps_prev_y_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) data_y_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) yt_ = torch.zeros(RK.rows+2, *x.shape, dtype=default_dtype, device=work_device) RUN_X_0_COPY = False if noise_bongflow is None: RUN_X_0_COPY = True data_prev_x_ = torch.zeros(4, *x.shape, dtype=default_dtype, device=work_device) data_prev_y_ = torch.zeros(4, *x.shape, dtype=default_dtype, device=work_device) noise_bongflow = normalize_zscore(NS.noise_sampler(sigma=sigma, sigma_next=NS.sigma_min), channelwise=True, inplace=True) _, _ = RK(noise_bongflow, s_tmp/s_tmp, noise_bongflow, sigma/sigma, transformer_options={'latent_type': 'xt'}) if RK.extra_args['model_options']['transformer_options'].get('y0_standard_guide') is not None: if hasattr(model.inner_model.inner_model.diffusion_model, "y0_standard_guide"): LG.y0 = y0_standard_guide = model.inner_model.inner_model.diffusion_model.y0_standard_guide.clone() del model.inner_model.inner_model.diffusion_model.y0_standard_guide RK.extra_args['model_options']['transformer_options']['y0_standard_guide'] = None if RK.extra_args['model_options']['transformer_options'].get('y0_inv_standard_guide') is not None: if hasattr(model.inner_model.inner_model.diffusion_model, "y0_inv_standard_guide"): LG.y0_inv = y0_inv_standard_guide = model.inner_model.inner_model.diffusion_model.y0_inv_standard_guide.clone() # RK.extra_args['model_options']['transformer_options'].get('y0_standard_guide') del model.inner_model.inner_model.diffusion_model.y0_inv_standard_guide RK.extra_args['model_options']['transformer_options']['y0_inv_standard_guide'] = None y0_bongflow = LG.HAS_LATENT_GUIDE * LG.mask * LG.y0 + LG.HAS_LATENT_GUIDE_INV * LG.mask_inv * LG.y0_inv #LG.y0.clone() if VE_MODEL: yt_0 = y0_bongflow + sigma * noise_bongflow yt = y0_bongflow + s_tmp * noise_bongflow else: yt_0 = (1-sigma) * y0_bongflow + sigma * noise_bongflow yt = (1-s_tmp) * y0_bongflow + s_tmp * noise_bongflow yt_[row] = yt if RUN_X_0_COPY: x_0 = yt_0.clone() x_tmp = x_[row] = yt.clone() else: y0_bongflow_orig = y0_bongflow.clone() if y0_bongflow_orig is None else y0_bongflow_orig y0_bongflow = y0_bongflow + LG.drift_x_data * drift_x_mask * (data_x - y0_bongflow) \ + LG.drift_x_sync * drift_x_mask * (data_barf - y0_bongflow) \ + LG.drift_y_data * drift_y_mask * (data_y - y0_bongflow) \ + LG.drift_y_sync * drift_y_mask * (data_barf_y - y0_bongflow) \ + LG.drift_y_guide * drift_y_mask * (y0_bongflow_orig - y0_bongflow) if torch.norm(y0_bongflow_orig - y0_bongflow) != 0 and EO("enable_y0_bongflow_update"): RK.update_transformer_options({'y0_style_pos': y0_bongflow.clone()}) if not EO("skip_yt"): yt_0 = RK.get_x(y0_bongflow, noise_bongflow, sigma) yt = RK.get_x(y0_bongflow, noise_bongflow, s_tmp) yt_[row] = yt if ((LG.lgw[step_sched].item() in {1,0} and LG.lgw_inv[step_sched].item() in {1,0} and LG.lgw[step_sched] == 1-LG.lgw_sync[step_sched] and LG.lgw_inv[step_sched] == 1-LG.lgw_sync_inv[step_sched]) or EO("sync_speed_mode")) and not EO("disable_sync_speed_mode"): data_y = y0_bongflow.clone() eps_y = RK.get_eps(yt_0, yt_[row], data_y, sigma, s_tmp) else: eps_y, data_y = RK(yt_[row], s_tmp, yt_0, sigma, transformer_options={'latent_type': 'yt'}) eps_x, data_x = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'xt', 'row': row, "x_tmp": x_tmp}) #if hasattr(model.inner_model.inner_model.diffusion_model, "eps_out"): for sync_lure_iter in range(LG.sync_lure_iter): if LG.sync_lure_sequence == "x -> y": if lure_x_mask.abs().sum() > 0: x_tmp = LG.swap_data(x_tmp, data_x, data_y, s_tmp, lure_x_mask) eps_x_lure, data_x_lure = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'xt'}) eps_x = eps_x + lure_x_mask * (eps_x_lure - eps_x) data_x = data_x + lure_x_mask * (data_x_lure - data_x) if lure_y_mask.abs().sum() > 0: y_tmp = yt_[row].clone() y_tmp = LG.swap_data(y_tmp, data_y, data_x, s_tmp, lure_y_mask) eps_y_lure, data_y_lure = RK(y_tmp, s_tmp, yt_0, sigma, transformer_options={'latent_type': 'yt'}) eps_y = eps_y + lure_y_mask * (eps_y_lure - eps_y) data_y = data_y + lure_y_mask * (data_y_lure - data_y) elif LG.sync_lure_sequence == "y -> x": if lure_y_mask.abs().sum() > 0: y_tmp = yt_[row].clone() y_tmp = LG.swap_data(y_tmp, data_y, data_x, s_tmp, lure_y_mask) eps_y_lure, data_y_lure = RK(y_tmp, s_tmp, yt_0, sigma, transformer_options={'latent_type': 'yt'}) eps_y = eps_y + lure_y_mask * (eps_y_lure - eps_y) data_y = data_y + lure_y_mask * (data_y_lure - data_y) if lure_x_mask.abs().sum() > 0: x_tmp = LG.swap_data(x_tmp, data_x, data_y, s_tmp, lure_x_mask) eps_x_lure, data_x_lure = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'xt'}) eps_x = eps_x + lure_x_mask * (eps_x_lure - eps_x) data_x = data_x + lure_x_mask * (data_x_lure - data_x) elif LG.sync_lure_sequence == "xy -> xy": data_x_orig, data_y_orig = data_x.clone(), data_y.clone() if lure_x_mask.abs().sum() > 0: x_tmp = LG.swap_data(x_tmp, data_x_orig, data_y_orig, s_tmp, lure_x_mask) eps_x_lure, data_x_lure = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'xt'}) eps_x = eps_x + lure_x_mask * (eps_x_lure - eps_x) data_x = data_x + lure_x_mask * (data_x_lure - data_x) if lure_y_mask.abs().sum() > 0: y_tmp = yt_[row].clone() y_tmp = LG.swap_data(y_tmp, data_y_orig, data_x_orig, s_tmp, lure_y_mask) eps_y_lure, data_y_lure = RK(y_tmp, s_tmp, yt_0, sigma, transformer_options={'latent_type': 'yt'}) eps_y = eps_y + lure_y_mask * (eps_y_lure - eps_y) data_y = data_y + lure_y_mask * (data_y_lure - data_y) if EO("sync_proj_y"): d_collinear_d_lerp = get_collinear(eps_x, eps_y) d_lerp_ortho_d = get_orthogonal(eps_y, eps_x) eps_y = d_collinear_d_lerp + d_lerp_ortho_d if EO("sync_proj_y2"): d_collinear_d_lerp = get_collinear(eps_y, eps_x) d_lerp_ortho_d = get_orthogonal(eps_x, eps_y) eps_y = d_collinear_d_lerp + d_lerp_ortho_d if EO("sync_proj_x"): d_collinear_d_lerp = get_collinear(eps_y, eps_x) d_lerp_ortho_d = get_orthogonal(eps_x, eps_y) eps_x = d_collinear_d_lerp + d_lerp_ortho_d if EO("sync_proj_x2"): d_collinear_d_lerp = get_collinear(eps_x, eps_y) d_lerp_ortho_d = get_orthogonal(eps_y, eps_x) eps_x = d_collinear_d_lerp + d_lerp_ortho_d eps_x2y = RK.get_eps(x_0, x_[row], data_y, sigma, s_tmp) eps_x2y_[row] = eps_x2y eps_y2x = RK.get_eps(x_0, x_[row], data_y, sigma, s_tmp) eps_y2x_[row] = eps_y2x if RK.EXPONENTIAL: if VE_MODEL: # ZERO IS THIS # ONE IS THIS eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + sigma*(-noise_bongflow)) if EO("sync_x2y"): eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + sigma*(-noise_bongflow)) else: eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (-eps_y + sigma*(y0_bongflow-noise_bongflow)) #+ lure_x_mask * sigma*(data_y - data_x) if EO("sync_x2y"): eps_[row] = sync_mask * eps_x - (1-sync_mask) * eps_x2y + weight_mask * (-eps_x2y + sigma*(y0_bongflow-noise_bongflow)) eps_yt_[row] = sync_mask * eps_y + (1-sync_mask) * eps_y2x + weight_mask * (-eps_x + sigma*(y0_bongflow-noise_bongflow)) # differentiate guide as well toward the x pred? else: if VE_MODEL: eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (noise_bongflow - eps_y) if EO("sync_x2y"): eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (noise_bongflow - eps_x2y) else: eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (noise_bongflow - eps_y - y0_bongflow) if EO("sync_x2y"): eps_[row] = sync_mask * eps_x + (1-sync_mask) * eps_x2y + weight_mask * (noise_bongflow - eps_x2y - y0_bongflow) eps_yt_[row] = sync_mask * eps_y + (1-sync_mask) * eps_y2x + weight_mask * (noise_bongflow - eps_x - y0_bongflow) # differentiate guide as well toward the x pred? if VE_MODEL: data_[row] = x_0 + sync_mask * NS.h * eps_x + (1-sync_mask) * NS.h * eps_x2y - weight_mask * (sigma*(eps_y + noise_bongflow)) # - lure_x_mask * (sigma*(eps_y + eps_x)) data_barf_y = yt_0 + sync_mask * NS.h * eps_y + (1-sync_mask) * NS.h * eps_y2x - weight_mask * (sigma*(eps_x + noise_bongflow)) if EO("sync_x2y"): data_[row] = x_0 + sync_mask * NS.h * eps_x + (1-sync_mask) * NS.h * eps_x2y - weight_mask * (sigma*(eps_x2y + noise_bongflow)) else: data_[row] = x_0 + sync_mask * NS.h * eps_x + (1-sync_mask) * NS.h * eps_x2y - weight_mask * (NS.h * eps_y + sigma*(noise_bongflow-y0_bongflow)) data_barf_y = yt_0 + sync_mask * NS.h * eps_y + (1-sync_mask) * NS.h * eps_y2x - weight_mask * (NS.h * eps_x + sigma*(noise_bongflow-y0_bongflow)) if EO("sync_x2y"): data_[row] = x_0 + sync_mask * NS.h * eps_x + (1-sync_mask) * NS.h * eps_x2y - weight_mask * (NS.h * eps_x2y + sigma*(noise_bongflow-y0_bongflow)) if EO("data_is_y0_with_lure_x_mask"): data_[row] = data_[row] + lure_x_mask * (y0_bongflow - data_[row]) if EO("eps_is_y0_with_lure_x_mask"): if RK.EXPONENTIAL: eps_[row] = eps_[row] + lure_x_mask * ((y0_bongflow - x_0) - eps_[row]) else: eps_[row] = eps_[row] + lure_x_mask * (((x_0 - y0_bongflow) / sigma) - eps_[row]) data_barf = data_[row] data_cached = data_x eps_x_ [row] = eps_x data_x_[row] = data_x eps_y_ [row] = eps_y data_y_[row] = data_y if EO("sync_use_fake_eps_y"): if RK.EXPONENTIAL: if VE_MODEL: eps_y_ [row] = sigma * ( - noise_bongflow) else: eps_y_ [row] = sigma * (y0_bongflow - noise_bongflow) else: if VE_MODEL: eps_y_ [row] = noise_bongflow else: eps_y_ [row] = noise_bongflow - y0_bongflow if EO("sync_use_fake_data_y"): data_y_[row] = y0_bongflow elif LG.guide_mode.startswith("flow") and (LG.lgw[step_sched] > 0 or LG.lgw_inv[step_sched] > 0) and not FLOW_STOPPED and not EO("flow_sync") : lgw_mask_, lgw_mask_inv_ = LG.get_masks_for_step(step) if not FLOW_STARTED and not FLOW_RESUMED: FLOW_STARTED = True data_x_prev_ = torch.zeros_like(data_prev_) y0 = LG.HAS_LATENT_GUIDE * LG.mask * LG.y0 + LG.HAS_LATENT_GUIDE_INV * LG.mask_inv * LG.y0_inv yx0 = y0.clone() if EO("flow_slerp"): y0_inv = LG.HAS_LATENT_GUIDE * LG.mask * LG.y0_inv + LG.HAS_LATENT_GUIDE_INV * LG.mask_inv * LG.y0 y0 = LG.y0.clone() y0_inv = LG.y0_inv.clone() flow_slerp_guide_ratio = EO("flow_slerp_guide_ratio", 0.5) y_slerp = slerp_tensor(flow_slerp_guide_ratio, y0, y0_inv) yx0 = y_slerp.clone() x_[row], x_0 = yx0.clone(), yx0.clone() if EO("guide_step_cutoff") or EO("guide_step_min"): x_0_orig = yx0.clone() if EO("flow_yx0_init_y0_inv"): yx0 = LG.HAS_LATENT_GUIDE * LG.mask * LG.y0_inv + LG.HAS_LATENT_GUIDE_INV * LG.mask_inv * LG.y0 if step > 0: if EO("flow_manual_masks"): y0 = (1 - (LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv)) * denoised + LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask * LG.y0 + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv * LG.y0_inv else: y0 = (1 - (lgw_mask_ + lgw_mask_inv_)) * denoised + lgw_mask_ * LG.y0 + lgw_mask_inv_ * LG.y0_inv yx0 = y0.clone() if EO("flow_slerp"): if EO("flow_manual_masks"): y0_inv = (1 - (LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv)) * denoised + LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask * LG.y0_inv + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv * LG.y0 else: y0_inv = (1 - (lgw_mask_ + lgw_mask_inv_)) * denoised + lgw_mask_ * LG.y0_inv + lgw_mask_inv_ * LG.y0 flow_slerp_guide_ratio = EO("flow_slerp_guide_ratio", 0.5) y_slerp = slerp_tensor(flow_slerp_guide_ratio, y0, y0_inv) yx0 = y_slerp.clone() else: yx0_prev = data_cached if EO("flow_manual_masks"): yx0 = (1 - (LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv)) * yx0_prev + LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask * x_tmp + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv * x_tmp else: yx0 = (1 - (lgw_mask_ + lgw_mask_inv_)) * yx0_prev + (lgw_mask_ + lgw_mask_inv_) * x_tmp if not EO("flow_static_guides"): if EO("flow_manual_masks"): y0 = (1 - (LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv)) * yx0_prev + LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask * LG.y0 + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv * LG.y0_inv else: y0 = (1 - (lgw_mask_ + lgw_mask_inv_)) * yx0_prev + lgw_mask_ * LG.y0 + lgw_mask_inv_ * LG.y0_inv if EO("flow_slerp"): if EO("flow_manual_masks"): y0_inv = (1 - (LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv)) * yx0_prev + LG.HAS_LATENT_GUIDE * LG.lgw[step_sched] * LG.mask * LG.y0_inv + LG.HAS_LATENT_GUIDE_INV * LG.lgw_inv[step_sched] * LG.mask_inv * LG.y0 else: y0_inv = (1 - (lgw_mask_ + lgw_mask_inv_)) * yx0_prev + lgw_mask_ * LG.y0_inv + lgw_mask_inv_ * LG.y0 y0_orig = y0.clone() if EO("flow_proj_xy"): d_collinear_d_lerp = get_collinear(yx0, y0_orig) d_lerp_ortho_d = get_orthogonal(y0_orig, yx0) y0 = d_collinear_d_lerp + d_lerp_ortho_d if EO("flow_proj_yx"): d_collinear_d_lerp = get_collinear(y0_orig, yx0) d_lerp_ortho_d = get_orthogonal(yx0, y0_orig) yx0 = d_collinear_d_lerp + d_lerp_ortho_d y0_inv_orig = None if EO("flow_proj_xy_inv"): y0_inv_orig = y0_inv.clone() d_collinear_d_lerp = get_collinear(yx0, y0_inv) d_lerp_ortho_d = get_orthogonal(y0_inv, yx0) y0_inv = d_collinear_d_lerp + d_lerp_ortho_d if EO("flow_proj_yx_inv"): y0_inv_orig = y0_inv if y0_inv_orig is None else y0_inv_orig d_collinear_d_lerp = get_collinear(y0_inv_orig, yx0) d_lerp_ortho_d = get_orthogonal(yx0, y0_inv_orig) yx0 = d_collinear_d_lerp + d_lerp_ortho_d del y0_orig flow_cossim_iter = EO("flow_cossim_iter", 1) if step == 0: noise_yt = noise_fn(y0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) # normalize_zscore(NS.noise_sampler(sigma=sigma, sigma_next=sigma_next), channelwise=True, inplace=True) if not EO("flow_disable_renoise_y0"): if noise_yt is None: noise_yt = noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) else: noise_yt = (1-eta) * noise_yt + eta * noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) if VE_MODEL: yt = y0 + s_tmp * noise_yt else: yt = (NS.sigma_max-s_tmp) * y0 + (s_tmp/NS.sigma_max) * noise_yt if not EO("flow_disable_doublenoise_y0"): if noise_yt is None: noise_yt = noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) else: noise_yt = (1-eta) * noise_yt + eta * noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) if VE_MODEL: y0_noised = y0 + sigma * noise_yt else: y0_noised = (NS.sigma_max-sigma) * y0 + sigma * noise_yt if EO("flow_slerp"): noise = noise_fn(y0_inv, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) yt_inv = (NS.sigma_max-s_tmp) * y0_inv + (s_tmp/NS.sigma_max) * noise if not EO("flow_disable_doublenoise_y0_inv"): noise = noise_fn(y0_inv, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) y0_noised_inv = (NS.sigma_max-sigma) * y0_inv + sigma * noise if step == 0: noise_xt = noise_fn(yx0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) if EO("flow_slerp"): xt = yx0 + (s_tmp/NS.sigma_max) * (noise - y_slerp) if not EO("flow_disable_doublenoise_x_0"): noise = noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) x_0_noised = x_0 + sigma * (noise - y_slerp) else: if not EO("flow_disable_renoise_x_0"): if noise_xt is None: noise_xt = noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) else: noise_xt = (1-eta_substep) * noise_xt + eta_substep * noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) if VE_MODEL: xt = yx0 + (s_tmp) * yx0 + (s_tmp) * (noise_xt - y0) else: xt = yx0 + (s_tmp/NS.sigma_max) * (noise_xt - y0) if not EO("flow_disable_doublenoise_x_0"): if noise_xt is None: noise_xt = noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) else: noise_xt = (1-eta_substep) * noise_xt + eta_substep * noise_fn(x_0, sigma, sigma_next, NS.noise_sampler, flow_cossim_iter) if VE_MODEL: x_0_noised = x_0 + (sigma) * x_0 + (sigma) * (noise_xt - y0) else: x_0_noised = x_0 + (sigma/NS.sigma_max) * (noise_xt - y0) # just lerp noise add, (1-sigma)*y0 + sigma*noise assuming x_0 == y0, which is true initially... eps_y, data_y = RK(yt, s_tmp, y0_noised, sigma, transformer_options={'latent_type': 'yt'}) eps_x, data_x = RK(xt, s_tmp, x_0_noised, sigma, transformer_options={'latent_type': 'xt'}) if EO("flow_slerp"): eps_y_inv, data_y_inv = RK(yt_inv, s_tmp, y0_noised_inv, sigma, transformer_options={'latent_type': 'yt_inv'}) if LG.lgw[step+1] == 0 and LG.lgw_inv[step+1] == 0: # break out of differentiating x0 and return to differentiating eps/velocity field if EO("flow_shit_out_yx0"): eps_ [row] = eps_x - eps_y data_[row] = yx0 if row == 0: x_[row] = x_0 = xt else: x_[row] = xt if not EO("flow_shit_out_new"): eps_ [row] = eps_x data_[row] = data_x if row == 0: x_[row] = x_0 = xt else: x_[row] = xt else: eps_ [row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * eps_x + (lgw_mask_ + lgw_mask_inv_) * eps_y data_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * data_x + (lgw_mask_ + lgw_mask_inv_) * data_y if row == 0: x_[row] = x_0 = (1 - (lgw_mask_ + lgw_mask_inv_)) * xt + (lgw_mask_ + lgw_mask_inv_) * yt else: x_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * xt + (lgw_mask_ + lgw_mask_inv_) * yt FLOW_STOPPED = True else: if not EO("flow_slerp"): if RK.EXPONENTIAL: eps_y_alt = data_y - x_0 eps_x_alt = data_x - x_0 else: eps_y_alt = (x_0 - data_y) / sigma eps_x_alt = (x_0 - data_x) / sigma if EO("flow_y_zero"): eps_y_alt *= LG.mask eps_[row] = eps_yx = (eps_y_alt - eps_x_alt) eps_y_lin = (x_0 - data_y) / sigma if EO("flow_y_zero"): eps_y_lin *= LG.mask eps_x_lin = (x_0 - data_x) / sigma eps_yx_lin = (eps_y_lin - eps_x_lin) data_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * data_x + (lgw_mask_ + lgw_mask_inv_) * data_y if EO("flow_reverse_data_masks"): data_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * data_y + (lgw_mask_ + lgw_mask_inv_) * data_x if flow_sync_eps != 0.0: if RK.EXPONENTIAL: eps_[row] = (1-flow_sync_eps) * eps_[row] + flow_sync_eps * (data_[row] - x_0) else: eps_[row] = (1-flow_sync_eps) * eps_[row] + flow_sync_eps * (x_0 - data_[row]) / sigma if EO("flow_sync_eps_mask"): flow_sync_eps = EO("flow_sync_eps_mask", 1.0) if RK.EXPONENTIAL: eps_[row] = (lgw_mask_ + lgw_mask_inv_) * (1-flow_sync_eps) * eps_[row] + (1 - (lgw_mask_ + lgw_mask_inv_)) * flow_sync_eps * (data_[row] - x_0) else: eps_[row] = (lgw_mask_ + lgw_mask_inv_) * (1-flow_sync_eps) * eps_[row] + (1 - (lgw_mask_ + lgw_mask_inv_)) * flow_sync_eps * (x_0 - data_[row]) / sigma if EO("flow_sync_eps_revmask"): flow_sync_eps = EO("flow_sync_eps_revmask", 1.0) if RK.EXPONENTIAL: eps_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * (1-flow_sync_eps) * eps_[row] + (lgw_mask_ + lgw_mask_inv_) * flow_sync_eps * (data_[row] - x_0) else: eps_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * (1-flow_sync_eps) * eps_[row] + (lgw_mask_ + lgw_mask_inv_) * flow_sync_eps * (x_0 - data_[row]) / sigma if EO("flow_sync_eps_maskonly"): flow_sync_eps = EO("flow_sync_eps_maskonly", 1.0) if RK.EXPONENTIAL: eps_[row] = (lgw_mask_ + lgw_mask_inv_) * eps_[row] + (1 - (lgw_mask_ + lgw_mask_inv_)) * (data_[row] - x_0) else: eps_[row] = (lgw_mask_ + lgw_mask_inv_) * eps_[row] + (1 - (lgw_mask_ + lgw_mask_inv_)) * (x_0 - data_[row]) / sigma if EO("flow_sync_eps_revmaskonly"): flow_sync_eps = EO("flow_sync_eps_revmaskonly", 1.0) if RK.EXPONENTIAL: eps_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * eps_[row] + (lgw_mask_ + lgw_mask_inv_) * (data_[row] - x_0) else: eps_[row] = (1 - (lgw_mask_ + lgw_mask_inv_)) * eps_[row] + (lgw_mask_ + lgw_mask_inv_) * (x_0 - data_[row]) / sigma if EO("flow_slerp"): if RK.EXPONENTIAL: eps_y_alt = data_y - x_0 eps_y_alt_inv = data_y_inv - x_0 eps_x_alt = data_x - x_0 else: eps_y_alt = (x_0 - data_y) / sigma eps_y_alt_inv = (x_0 - data_y_inv) / sigma eps_x_alt = (x_0 - data_x) / sigma flow_slerp_ratio2 = EO("flow_slerp_ratio2", 0.5) eps_yx = (eps_y_alt - eps_x_alt) eps_y_lin = (x_0 - data_y) / sigma eps_x_lin = (x_0 - data_x) / sigma eps_yx_lin = (eps_y_lin - eps_x_lin) eps_yx_inv = (eps_y_alt_inv - eps_x_alt) eps_y_lin_inv = (x_0 - data_y_inv) / sigma eps_x_lin = (x_0 - data_x) / sigma eps_yx_lin_inv = (eps_y_lin_inv - eps_x_lin) data_row = x_0 - sigma * eps_yx_lin data_row_inv = x_0 - sigma * eps_yx_lin_inv if EO("flow_slerp_similarity_ratio"): flow_slerp_similarity_ratio = EO("flow_slerp_similarity_ratio", 1.0) flow_slerp_ratio2 = find_slerp_ratio_grid(data_row, data_row_inv, LG.y0.clone(), LG.y0_inv.clone(), flow_slerp_similarity_ratio) eps_ [row] = slerp_tensor(flow_slerp_ratio2, eps_yx, eps_yx_inv) data_[row] = slerp_tensor(flow_slerp_ratio2, data_row, data_row_inv) if EO("flow_slerp_autoalter"): data_row_slerp = slerp_tensor(0.5, data_row, data_row_inv) y0_pearsim = get_pearson_similarity(data_row_slerp, y0) y0_pearsim_inv = get_pearson_similarity(data_row_slerp, y0_inv) if y0_pearsim > y0_pearsim_inv: data_[row] = data_row_inv eps_ [row] = (eps_y_alt_inv - eps_x_alt) else: data_[row] = data_row eps_ [row] = (eps_y_alt - eps_x_alt) if EO("flow_slerp_recalc_eps_row"): if RK.EXPONENTIAL: eps_[row] = data_[row] - x_0 else: eps_[row] = (x_0 - data_[row]) / sigma if EO("flow_slerp_recalc_data_row"): if RK.EXPONENTIAL: data_[row] = x_0 + eps_[row] else: data_[row] = x_0 - sigma * eps_[row] data_cached = data_x if step < EO("direct_pre_pseudo_guide", 0) and step > 0: for i_pseudo in range(EO("direct_pre_pseudo_guide_iter", 1)): x_tmp += LG.lgw[step_sched] * LG.mask * (NS.sigma_max - s_tmp) * (LG.y0 - denoised) + LG.lgw_inv[step_sched] * LG.mask_inv * (NS.sigma_max - s_tmp) * (LG.y0_inv - denoised) eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma) # MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL MODEL CALL if SYNC_GUIDE_ACTIVE: pass elif not ((not LG.guide_mode.startswith("flow")) or FLOW_STOPPED or (LG.guide_mode.startswith("flow") and LG.lgw[step_sched] == 0 and LG.lgw_inv[step_sched] == 0)): #(LG.guide_mode.startswith("flow") and (LG.lgw[step_sched] != 0 or LG.lgw_inv[step_sched] != 0)) or FLOW_STOPPED: pass elif LG.guide_mode.startswith("lure") and (LG.lgw[step_sched] > 0 or LG.lgw_inv[step_sched] > 0): eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'yt'}) else: if EO("protoshock") and StyleMMDiT is not None and StyleMMDiT.data_shock_start_step <= step_sched < StyleMMDiT.data_shock_end_step: eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'row': row, 'x_tmp': x_tmp, 'sigma_next': sigma_next}) data_wct = StyleMMDiT.apply_data_shock(data_[row]) if VE_MODEL: x_tmp = x_tmp + (data_wct - data_[row]) else: x_tmp = x_tmp + (NS.sigma_max-NS.s_[row]) * (data_wct - data_[row]) #x_[row+RK.row_offset] = x_tmp x_[row] = x_tmp if row == 0: x_0 = x_tmp if EO("preshock"): eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'row': row, 'x_tmp': x_tmp, 'sigma_next': sigma_next}) if VE_MODEL: x_tmp = x_tmp + (data_wct - data_[row]) else: x_tmp = x_tmp + (NS.sigma_max-NS.s_[row]) * (data_wct - data_[row]) x_[row] = x_tmp if row == 0: x_0 = x_tmp eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'row': row, 'x_tmp': x_tmp, 'sigma_next': sigma_next}) #if EO("yoloshock") and StyleMMDiT is not None and StyleMMDiT.data_shock_start_step <= step_sched < StyleMMDiT.data_shock_end_step: if not EO("disable_yoloshock") and StyleMMDiT is not None and StyleMMDiT.data_shock_start_step <= step_sched < StyleMMDiT.data_shock_end_step: data_wct = StyleMMDiT.apply_data_shock(data_[row]) if VE_MODEL: x_tmp = x_tmp + (data_wct - data_[row]) else: x_tmp = x_tmp + (NS.sigma_max-NS.s_[row]) * (data_wct - data_[row]) #x_[row+RK.row_offset] = x_tmp x_[row] = x_tmp if row == 0: x_0 = x_tmp data_[row] = data_wct if RK.EXPONENTIAL: eps_[row] = data_[row] - x_0 else: eps_[row] = (x_0 - data_[row]) / sigma if hasattr(model.inner_model.inner_model.diffusion_model, "eps_out"): # fp64 model out override, for testing only eps_out = model.inner_model.inner_model.diffusion_model.eps_out del model.inner_model.inner_model.diffusion_model.eps_out if eps_out.shape[0] == 2: data_cond = x_0 - sigma * eps_out[1] data_uncond = x_0 - sigma * eps_out[0] data_row = data_uncond + model.inner_model.cfg * (data_cond - data_uncond) eps_row = (x_0 - data_row) / sigma else: data_row = x_0 - sigma * eps_out if RK.EXPONENTIAL: eps_row = data_row - x_0 else: eps_row = eps_out if torch.norm(eps_row - eps_[row]) < 0.01 and torch.norm(data_row - data_[row]) < 0.01: # if some other cfg/post-cfg func was used, detect and ignore this eps_[row] = eps_row data_[row] = data_row if RK.extra_args['model_options']['transformer_options'].get('y0_standard_guide') is not None: if hasattr(model.inner_model.inner_model.diffusion_model, "y0_standard_guide"): LG.y0 = model.inner_model.inner_model.diffusion_model.y0_standard_guide.clone() del model.inner_model.inner_model.diffusion_model.y0_standard_guide RK.extra_args['model_options']['transformer_options']['y0_standard_guide'] = None if RK.extra_args['model_options']['transformer_options'].get('y0_inv_standard_guide') is not None: if hasattr(model.inner_model.inner_model.diffusion_model, "y0_inv_standard_guide"): LG.y0_inv = model.inner_model.inner_model.diffusion_model.y0_inv_standard_guide.clone() # RK.extra_args['model_options']['transformer_options'].get('y0_standard_guide') del model.inner_model.inner_model.diffusion_model.y0_inv_standard_guide RK.extra_args['model_options']['transformer_options']['y0_inv_standard_guide'] = None if LG.guide_mode.startswith("lure") and (LG.lgw[step_sched] > 0 or LG.lgw_inv[step_sched] > 0): x_tmp = LG.process_guides_data_substep(x_tmp, data_[row], step_sched, s_tmp) eps_[row], data_[row] = RK(x_tmp, s_tmp, x_0, sigma, transformer_options={'latent_type': 'xt'}) if momentum != 0.0: data_[row] = data_[row] - momentum * (data_prev_[0] - data_[row]) #negative! eps_[row] = RK.get_epsilon(x_0, x_tmp, data_[row], sigma, s_tmp) # ... why was this here??? for momentum maybe? if row < RK.rows and noise_scaling_weight != 0 and noise_scaling_type in {"sampler", "sampler_substep"}: if noise_scaling_type == "sampler_substep": sub_lying_su, sub_lying_sigma, sub_lying_sd, sub_lying_alpha_ratio = NS.get_sde_substep(NS.s_[row], NS.s_[row+RK.row_offset+RK.multistep_stages], noise_scaling_eta, noise_scaling_mode) for _ in range(noise_scaling_cycles-1): sub_lying_su, sub_lying_sigma, sub_lying_sd, sub_lying_alpha_ratio = NS.get_sde_substep(NS.s_[row], sub_lying_sd, noise_scaling_eta, noise_scaling_mode) lying_s_[row+1] = sub_lying_sd substep_noise_scaling_ratio = NS.s_[row+1]/lying_s_[row+1] if RK.multistep_stages > 0: substep_noise_scaling_ratio = sigma_next/lying_sd #fails with resample? lying_eps_row_factor = (1 - noise_scaling_weight*(substep_noise_scaling_ratio-1)) # GUIDE if not EO("disable_guides_eps_substep"): eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step_sched, NS.sigma, NS.sigma_next, NS.sigma_down, NS.s_, epsilon_scale, RK) if not EO("disable_guides_eps_prev_substep"): eps_prev_, x_ = LG.process_guides_substep(x_0, x_, eps_prev_, data_, row, step_sched, NS.sigma, NS.sigma_next, NS.sigma_down, NS.s_, epsilon_scale, RK) if LG.y0_mean is not None and LG.y0_mean.sum() != 0.0: if EO("guide_mean_scattersort"): data_row_mean = apply_scattersort_spatial(data_[row], LG.y0_mean) eps_row_mean = RK.get_eps(x_0, data_row_mean, s_tmp) else: eps_row_mean = eps_[row] - eps_[row].mean(dim=(-2,-1), keepdim=True) + (LG.y0_mean - x_0).mean(dim=(-2,-1), keepdim=True) if LG.mask_mean is not None: eps_row_mean = LG.mask_mean * eps_row_mean + (1-LG.mask_mean) * eps_[row] eps_[row] = eps_[row] + LG.lgw_mean[step_sched] * (eps_row_mean - eps_[row]) if (full_iter == 0 and diag_iter == 0) or EO("newton_iter_post_use_on_implicit_steps"): x_, eps_ = RK.newton_iter(x_0, x_, eps_, eps_prev_, data_, NS.s_, row, NS.h, sigmas, step, "post", SYNC_GUIDE_ACTIVE) # UPDATE #for row in range(RK.rows - RK.multistep_stages - RK.row_offset + 1): if EO("exp2lin_override") and RK.EXPONENTIAL: x_ = RK.update_substep(x_0, x_, eps_, eps_prev_, row, RK.row_offset, NS.h_new, NS.h_new_orig, lying_eps_row_factor=lying_eps_row_factor, sigma=sigma) #modifies eps_[row] if lying_eps_row_factor != 1.0 #x_ = RK.update_substep(x_0, x_, eps_, eps_prev_, row, RK.row_offset, -sigma*NS.h_new, -sigma*NS.h_new_orig, lying_eps_row_factor=lying_eps_row_factor) #modifies eps_[row] if lying_eps_row_factor != 1.0 else: x_ = RK.update_substep(x_0, x_, eps_, eps_prev_, row, RK.row_offset, NS.h_new, NS.h_new_orig, lying_eps_row_factor=lying_eps_row_factor) #modifies eps_[row] if lying_eps_row_factor != 1.0 x_[row+RK.row_offset] = NS.rebound_overshoot_substep(x_0, x_[row+RK.row_offset]) if SYNC_GUIDE_ACTIVE: #yt_ is not None: #yt_ = RK.update_substep(yt_0, yt_, eps_y_, eps_prev_y_, row, RK.row_offset, NS.h_new, NS.h_new_orig, lying_eps_row_factor=lying_eps_row_factor) #modifies eps_[row] if lying_eps_row_factor != 1.0 yt_ = RK.update_substep(yt_0, yt_, eps_yt_, eps_prev_y_, row, RK.row_offset, NS.h_new, NS.h_new_orig, lying_eps_row_factor=lying_eps_row_factor, sigma=sigma) #modifies eps_[row] if lying_eps_row_factor != 1.0 yt_[row+RK.row_offset] = NS.rebound_overshoot_substep(yt_0, yt_[row+RK.row_offset]) if not RK.IMPLICIT and NS.noise_mode_sde_substep != "hard_sq": x_means_per_substep = x_[row+RK.row_offset].mean(dim=(-2,-1), keepdim=True) if not LG.guide_mode.startswith("flow") or (LG.lgw[step_sched] == 0 and LG.lgw[step+1] == 0 and LG.lgw_inv[step_sched] == 0 and LG.lgw_inv[step+1] == 0): #if LG.guide_mode.startswith("sync") and (LG.lgw[step_sched] != 0.0 or LG.lgw_inv[step_sched] != 0.0): # x_row_tmp = x_[row+RK.row_offset].clone() #x_[row+RK.row_offset] = NS.swap_noise_substep(x_0, x_[row+RK.row_offset], mask=sde_mask, guide=LG.y0) x_row_tmp = NS.swap_noise_substep(x_0, x_[row+RK.row_offset], mask=sde_mask, guide=LG.y0) #if EO("eps_adain_smartnoise_substep"): if LG.ADAIN_NOISE_MODE == "smart": #eps_row_next = (x_0 - x_[row+RK.row_offset]) / (sigma - NS.s_[row+RK.row_offset]) #denoised_row_next = x_0 - sigma * eps_row_next # #eps_swapped = (x_row_tmp - denoised_row_next) / NS.s_[row+RK.row_offset] # #noise_row_next = eps_swapped + denoised_row_next #z_[row+RK.row_offset] = noise_row_next #RK.update_transformer_options({'z_' : z_}) data_next = denoised + NS.h_new * RK.zum(row+RK.row_offset+RK.multistep_stages, data_, data_prev_) if VE_MODEL: z_[row+RK.row_offset] = (x_row_tmp - data_next) / NS.s_[row+RK.row_offset] else: z_[row+RK.row_offset] = (x_row_tmp - (NS.sigma_max-NS.s_[row+RK.row_offset])*data_next) / NS.s_[row+RK.row_offset] RK.update_transformer_options({'z_' : z_}) elif LG.ADAIN_NOISE_MODE == "update": #EO("eps_adain"): x_init_new = (x_row_tmp - x_[row+RK.row_offset]) / s_tmp + x_init x_0 += sigma * (x_init_new - x_init) x_init = x_init_new RK.update_transformer_options({'x_init' : x_init.clone()}) if SYNC_GUIDE_ACTIVE: noise_bongflow_new = (x_row_tmp - x_[row+RK.row_offset]) / s_tmp + noise_bongflow yt_[row+RK.row_offset] += s_tmp * (noise_bongflow_new - noise_bongflow) x_0 += sigma * (noise_bongflow_new - noise_bongflow) noise_bongflow = noise_bongflow_new x_[row+RK.row_offset] = x_row_tmp elif LG.guide_mode.startswith("flow"): pass if not LG.guide_mode.startswith("lure"): x_[row+RK.row_offset] = LG.process_guides_data_substep(x_[row+RK.row_offset], data_[row], step_sched, NS.s_[row]) if ((not EO("protoshock") and not EO("yoloshock")) or EO("fuckitshock")) and StyleMMDiT is not None and StyleMMDiT.data_shock_start_step <= step_sched < StyleMMDiT.data_shock_end_step: data_wct = StyleMMDiT.apply_data_shock(data_[row]) if VE_MODEL: x_[row+RK.row_offset] = x_[row+RK.row_offset] + (data_wct - data_[row]) else: x_[row+RK.row_offset] = x_[row+RK.row_offset] + (NS.sigma_max-NS.s_[row]) * (data_wct - data_[row]) if SYNC_GUIDE_ACTIVE: # # # # ## # # ## # YIIIIKES --------------------------------------------------------------------------------------------------------- if VE_MODEL: yt_[:NS.s_.shape[0], 0] = y0_bongflow + NS.s_.view(-1, *[1]*(x.ndim-1)) * (noise_bongflow) yt_0 = y0_bongflow + sigma * (noise_bongflow) else: yt_[:NS.s_.shape[0], 0] = y0_bongflow + NS.s_.view(-1, *[1]*(x.ndim-1)) * (noise_bongflow - y0_bongflow) yt_0 = y0_bongflow + sigma * (noise_bongflow - y0_bongflow) if RK.EXPONENTIAL: eps_y_ = data_y_ - yt_0 # yt_ # watch out for fuckery with size of tableau being smaller later in a chained sampler else: if BONGMATH: eps_y_[:NS.s_.shape[0]] = (yt_[:NS.s_.shape[0]] - data_y_[:NS.s_.shape[0]]) / NS.s_.view(-1,*[1]*(x_.ndim-1)) else: eps_y_[:NS.s_.shape[0]] = (yt_0.repeat(NS.s_.shape[0], *[1]*(x_.ndim-1)) - data_y_[:NS.s_.shape[0]]) / sigma # calc exact to c0 node if not BONGMATH and (eta != 0 or eta_substep != 0): if RK.EXPONENTIAL: eps_x_ = data_x_ - x_0 else: eps_x_ = (x_0 - data_x_) / sigma weight_mask = lgw_mask_+lgw_mask_inv_ if LG.SYNC_SEPARATE: sync_mask = lgw_mask_sync_+lgw_mask_sync_inv_ else: sync_mask = 1. for ms in range(len(eps_)): if RK.EXPONENTIAL: if VE_MODEL: eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + sigma*(-noise_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + sigma*(-noise_bongflow)) else: eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + sigma*(y0_bongflow-noise_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + sigma*(y0_bongflow-noise_bongflow)) else: if VE_MODEL: eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + (noise_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + (noise_bongflow)) else: eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_y_[ms] + (noise_bongflow-y0_bongflow)) if EO("sync_x2y"): eps_[ms] = sync_mask * eps_x_[ms] + (1-sync_mask) * eps_x2y_[ms] + weight_mask * (-eps_x2y_[ms] + (noise_bongflow-y0_bongflow)) if BONGMATH and NS.s_[row] > RK.sigma_min and NS.h < RK.sigma_max/2 and (diag_iter == implicit_steps_diag or EO("enable_diag_explicit_bongmath_all")) and not EO("disable_terminal_bongmath"): if step == 0 and UNSAMPLE: pass elif full_iter == implicit_steps_full or not EO("disable_fully_explicit_bongmath_except_final"): if sigma > 0.03: BONGMATH_Y = SYNC_GUIDE_ACTIVE x_0, x_, eps_ = RK.bong_iter(x_0, x_, eps_, eps_prev_, data_, sigma, NS.s_, row, RK.row_offset, NS.h, step, step_sched, BONGMATH_Y, y0_bongflow, noise_bongflow, eps_x_, eps_y_, data_x_, data_y_, LG) # BONGMATH_Y, y0_bongflow, noise_bongflow, eps_x_, eps_y_, eps_x2y_, data_x_, LG) #if EO("eps_adain_smartnoise_bongmath"): if LG.ADAIN_NOISE_MODE == "smart": if VE_MODEL: z_[:NS.s_.shape[0], ...] = (x_ - data_)[:NS.s_.shape[0], ...] / NS.s_.view(-1,*[1]*(x_.ndim-1)) else: z_[:NS.s_.shape[0], ...] = (x_[:NS.s_.shape[0], ...] - (NS.sigma_max - NS.s_.view(-1,*[1]*(x_.ndim-1)))*data_[:NS.s_.shape[0], ...])[:NS.s_.shape[0], ...] / NS.s_.view(-1,*[1]*(x_.ndim-1)) RK.update_transformer_options({'z_' : z_}) diag_iter += 1 #progress_bar.update( round(1 / implicit_steps_total, 2) ) #step_update = round(1 / implicit_steps_total, 2) #progress_bar.update(float(f"{step_update:.2f}")) x_next = x_[RK.rows - RK.multistep_stages - RK.row_offset + 1] x_next = NS.rebound_overshoot_step(x_0, x_next) if SYNC_GUIDE_ACTIVE: # YT_NEXT UPDATE STEP -------------------------------------- yt_next = yt_[RK.rows - RK.multistep_stages - RK.row_offset + 1] yt_next = NS.rebound_overshoot_step(yt_0, yt_next) eps = (x_0 - x_next) / (sigma - sigma_next) denoised = x_0 - sigma * eps if EO("postshock") and step < EO("postshock", 10): eps_row, data_row = RK(x_next, sigma_next, x_next, sigma_next, transformer_options={'row': row, 'x_tmp': x_next, 'sigma_next': sigma_next}) if VE_MODEL: x_next = x_next + (data_row - denoised) else: x_next = x_next + (NS.sigma_max-sigma_next) * (data_row - denoised) eps = (x_0 - x_next) / (sigma - sigma_next) denoised = x_0 - sigma * eps if EO("data_sampler") and step > EO("data_sampler_start_step", 0) and step < EO("data_sampler_end_step", 5): data_sampler_weight = EO("data_sampler_weight", 1.0) denoised_step = RK.zum(row+RK.row_offset+RK.multistep_stages, data_, data_prev_) x_next = LG.swap_data(x_next, denoised, denoised_step, data_sampler_weight * sigma_next) eps = (x_0 - x_next) / (sigma - sigma_next) denoised = x_0 - sigma * eps x_0_prev = x_0.clone() x_means_per_step = x_next.mean(dim=(-2,-1), keepdim=True) if eta == 0.0: x = x_next if SYNC_GUIDE_ACTIVE: yt_0 = yt_[0] = yt_next #elif LG.guide_mode.startswith("sync") and (LG.lgw[step_sched] != 0.0 or LG.lgw_inv[step_sched] != 0.0): # noise_sync_new = NS.noise_sampler(sigma=sigma, sigma_next=sigma_next) # x = x_next + sigma * eta * (noise_sync_new - noise_bongflow) # noise_bongflow += eta * (noise_sync_new - noise_bongflow) elif not LG.guide_mode.startswith("flow") or (LG.lgw[step_sched] == 0 and LG.lgw[step+1] == 0 and LG.lgw_inv[step_sched] == 0 and LG.lgw_inv[step+1] == 0): x = NS.swap_noise_step(x_0, x_next, mask=sde_mask) #if EO("eps_adain_smartnoise"): if LG.ADAIN_NOISE_MODE == "smart": #noise_next = eps + denoised #eps_swapped = (x - denoised) / sigma_next # #noise_next = eps_swapped + denoised #z_[0] = noise_next #RK.update_transformer_options({'z_' : z_}) if full_iter+1 < implicit_steps_full+1: # are we to loop for full iter after this? if VE_MODEL: #z_[row+RK.row_offset] = (x - denoised) / sigma_next z_[0] = (x_0 - denoised) / sigma else: #z_[row+RK.row_offset] = (x - (NS.sigma_max-sigma_next) * denoised) / sigma_next z_[0] = (x_0 - (NS.sigma_max-sigma) * denoised) / sigma else: #we're advancing to next step, x is x_next if VE_MODEL: #z_[row+RK.row_offset] = (x - denoised) / sigma_next z_[0] = (x - denoised) / sigma_next else: #z_[row+RK.row_offset] = (x - (NS.sigma_max-sigma_next) * denoised) / sigma_next z_[0] = (x - (NS.sigma_max-sigma_next) * denoised) / sigma_next RK.update_transformer_options({'z_' : z_}) elif LG.ADAIN_NOISE_MODE == "update": #EO("eps_adain"): x_init_new = (x - x_next) / sigma_next + x_init x_0 += sigma * (x_init_new - x_init) x_init = x_init_new RK.update_transformer_options({'x_init' : x_init.clone()}) if SYNC_GUIDE_ACTIVE: noise_bongflow_new = (x - x_next) / sigma_next + noise_bongflow yt_next += sigma_next * (noise_bongflow_new - noise_bongflow) x_0 += sigma * (noise_bongflow_new - noise_bongflow) if not EO("disable_i_bong"): for i_bong in range(len(NS.s_)): x_[i_bong] += NS.s_[i_bong] * (noise_bongflow_new - noise_bongflow) #x_[0] += sigma * (noise_bongflow_new - noise_bongflow) yt_0 = yt_[0] = yt_next noise_bongflow = noise_bongflow_new else: x = x_next if EO("keep_step_means"): x = x - x.mean(dim=(-2,-1), keepdim=True) + x_means_per_step callback_step = len(sigmas)-1 - step if sampler_mode == "unsample" else step preview_callback(x, eps, denoised, x_, eps_, data_, callback_step, sigma, sigma_next, callback, EO, preview_override=data_cached, FLOW_STOPPED=FLOW_STOPPED) h_prev = NS.h x_prev = x_0 denoised_prev2 = denoised_prev denoised_prev = denoised full_iter += 1 if LG.lgw[step_sched] > 0 and step >= EO("guide_cutoff_start_step", 0) and cossim_counter < EO("guide_cutoff_max_iter", 10) and (EO("guide_cutoff") or EO("guide_min")): guide_cutoff = EO("guide_cutoff", 1.0) denoised_norm = data_[0] - data_[0].mean(dim=(-2,-1), keepdim=True) y0_norm = LG.y0 - LG.y0 .mean(dim=(-2,-1), keepdim=True) y0_cossim = get_cosine_similarity(denoised_norm, y0_norm) if y0_cossim > guide_cutoff and LG.lgw[step_sched] > EO("guide_cutoff_floor", 0.0): if not EO("guide_cutoff_fast"): LG.lgw[step_sched] *= EO("guide_cutoff_factor", 0.9) else: LG.lgw *= EO("guide_cutoff_factor", 0.9) full_iter -= 1 if y0_cossim < EO("guide_min", 0.0) and LG.lgw[step_sched] < EO("guide_min_ceiling", 1.0): if not EO("guide_cutoff_fast"): LG.lgw[step_sched] *= EO("guide_min_factor", 1.1) else: LG.lgw *= EO("guide_min_factor", 1.1) full_iter -= 1 #if EO("smartnoise"): #TODO: determine if this was useful # z_[0] = z_next if FLOW_STARTED and FLOW_STOPPED: data_prev_ = data_x_prev_ if FLOW_STARTED and not FLOW_STOPPED: data_x_prev_[0] = data_cached # data_cached is data_x from flow mode. this allows multistep to resume seamlessly. for ms in range(recycled_stages): data_x_prev_[recycled_stages - ms] = data_x_prev_[recycled_stages - ms - 1] #if LG.guide_mode.startswith("sync") and (LG.lgw[step_sched] != 0.0 or LG.lgw_inv[step_sched] != 0.0): # data_prev_[0] = x_0 - sigma * eps_[0] #else: data_prev_[0] = data_[0] # with flow mode, this will be the differentiated guide/"denoised" for ms in range(recycled_stages): data_prev_[recycled_stages - ms] = data_prev_[recycled_stages - ms - 1] # TODO: verify that this does not run on every substep... if SYNC_GUIDE_ACTIVE: data_prev_x_[0] = data_x for ms in range(recycled_stages): data_prev_x_[recycled_stages - ms] = data_prev_x_[recycled_stages - ms - 1] data_prev_y_[0] = data_y for ms in range(recycled_stages): data_prev_y_[recycled_stages - ms] = data_prev_y_[recycled_stages - ms - 1] rk_type = RK.swap_rk_type_at_step_or_threshold(x_0, data_prev_, NS, sigmas, step, rk_swap_step, rk_swap_threshold, rk_swap_type, rk_swap_print) if step > rk_swap_step: implicit_steps_full = 0 implicit_steps_diag = 0 if EO("bong2m") or EO("bong3m"): denoised_data_prev2 = denoised_data_prev denoised_data_prev = data_[0] if SKIP_PSEUDO and not LG.guide_mode.startswith("flow"): if SKIP_PSEUDO_Y == "y0": LG.y0 = denoised LG.HAS_LATENT_GUIDE = True else: LG.y0_inv = denoised LG.HAS_LATENT_GUIDE_INV = True if EO("pseudo_mix_strength"): pseudo_mix_strength = EO("pseudo_mix_strength", 0.0) LG.y0 = orig_y0 + pseudo_mix_strength * (denoised - orig_y0) LG.y0_inv = orig_y0_inv + pseudo_mix_strength * (denoised - orig_y0_inv) #if sampler_mode == "unsample": # progress_bar.n -= 1 # progress_bar.refresh() #else: # progress_bar.update(1) progress_bar.update(1) #THIS WAS HERE step += 1 if EO("skip_step", -1) == step: step += 1 if d_noise_start_step == step: sigmas = sigmas.clone() * d_noise if sigmas.max() > NS.sigma_max: sigmas = sigmas / NS.sigma_max if d_noise_inv_start_step == step: sigmas = sigmas.clone() / d_noise_inv if sigmas.max() > NS.sigma_max: sigmas = sigmas / NS.sigma_max if LG.lgw[step_sched] > 0 and step >= EO("guide_step_cutoff_start_step", 0) and cossim_counter < EO("guide_step_cutoff_max_iter", 10) and (EO("guide_step_cutoff") or EO("guide_step_min")): guide_cutoff = EO("guide_step_cutoff", 1.0) eps_trash, data_trash = RK(x, sigma_next, x_0, sigma) denoised_norm = data_trash - data_trash.mean(dim=(-2,-1), keepdim=True) y0_norm = LG.y0 - LG.y0 .mean(dim=(-2,-1), keepdim=True) y0_cossim = get_cosine_similarity(denoised_norm, y0_norm) if y0_cossim > guide_cutoff and LG.lgw[step_sched] > EO("guide_step_cutoff_floor", 0.0): if not EO("guide_step_cutoff_fast"): LG.lgw[step_sched] *= EO("guide_step_cutoff_factor", 0.9) else: LG.lgw *= EO("guide_step_cutoff_factor", 0.9) step -= 1 x_0 = x = x_[0] = x_0_orig.clone() if y0_cossim < EO("guide_step_min", 0.0) and LG.lgw[step_sched] < EO("guide_step_min_ceiling", 1.0): if not EO("guide_step_cutoff_fast"): LG.lgw[step_sched] *= EO("guide_step_min_factor", 1.1) else: LG.lgw *= EO("guide_step_min_factor", 1.1) step -= 1 x_0 = x = x_[0] = x_0_orig.clone() # END SAMPLING LOOP --------------------------------------------------------------------------------------------------- #progress_bar.close() RK.update_transformer_options({'update_cross_attn': None}) if step == len(sigmas)-2 and sigmas[-1] == 0 and sigmas[-2] == NS.sigma_min and not INIT_SAMPLE_LOOP: if EO("skip_final_model_call"): sigma_min = NS.sigma_min.view((1,) * x.ndim).to(x) denoised = model.inner_model.inner_model.model_sampling.calculate_denoised(sigma_min, eps, x) x = denoised else: eps, denoised = RK(x, NS.sigma_min, x, NS.sigma_min) x = denoised #progress_bar.update(1) eps = eps .to(model_device) denoised = denoised.to(model_device) x = x .to(model_device) progress_bar.close() if not (UNSAMPLE and sigmas[1] > sigmas[0]) and not EO("preview_last_step_always") and sigma is not None and not (FLOW_STARTED and not FLOW_STOPPED): callback_step = len(sigmas)-1 - step if sampler_mode == "unsample" else step preview_callback(x, eps, denoised, x_, eps_, data_, callback_step, sigma, sigma_next, callback, EO, preview_override=data_cached, FLOW_STOPPED=FLOW_STOPPED) if INIT_SAMPLE_LOOP: state_info_out = state_info else: if guides is not None and guides.get('guide_mode', "") == 'inversion': guide_inversion_y0 = state_info.get('guide_inversion_y0') guide_inversion_y0_inv = state_info.get('guide_inversion_y0_inv') if sampler_mode == "unsample" and guide_inversion_y0 is None: guide_inversion_y0 = LG.y0.clone() if sampler_mode == "unsample" and guide_inversion_y0_inv is None: guide_inversion_y0_inv = LG.y0_inv.clone() if sampler_mode in {"standard", "resample"} and guide_inversion_y0 is None: guide_inversion_y0 = NS.noise_sampler(sigma=NS.sigma_max, sigma_next=NS.sigma_min).to(x) guide_inversion_y0 = normalize_zscore(guide_inversion_y0, channelwise=True, inplace=True) if sampler_mode in {"standard", "resample"} and guide_inversion_y0_inv is None: guide_inversion_y0_inv = NS.noise_sampler(sigma=NS.sigma_max, sigma_next=NS.sigma_min).to(x) guide_inversion_y0_inv = normalize_zscore(guide_inversion_y0_inv, channelwise=True, inplace=True) state_info_out['guide_inversion_y0'] = guide_inversion_y0 state_info_out['guide_inversion_y0_inv'] = guide_inversion_y0_inv state_info_out['raw_x'] = x.to('cpu') state_info_out['denoised'] = denoised.to('cpu') state_info_out['data_prev_'] = data_prev_.to('cpu') state_info_out['end_step'] = step state_info_out['sigma_next'] = sigma_next.clone() state_info_out['sigmas'] = sigmas_scheduled.clone() state_info_out['sampler_mode'] = sampler_mode state_info_out['last_rng'] = NS.noise_sampler .generator.get_state().clone() state_info_out['last_rng_substep'] = NS.noise_sampler2.generator.get_state().clone() state_info_out['completed'] = step == len(sigmas)-2 and sigmas[-1] == 0 and sigmas[-2] == NS.sigma_min state_info_out['FLOW_STARTED'] = FLOW_STARTED state_info_out['FLOW_STOPPED'] = FLOW_STOPPED state_info_out['noise_bongflow'] = noise_bongflow state_info_out['y0_bongflow'] = y0_bongflow state_info_out['y0_bongflow_orig'] = y0_bongflow_orig state_info_out['y0_standard_guide'] = y0_standard_guide state_info_out['y0_inv_standard_guide'] = y0_inv_standard_guide state_info_out['data_prev_y_'] = data_prev_y_ state_info_out['data_prev_x_'] = data_prev_x_ if noise_initial is not None: state_info_out['noise_initial'] = noise_initial.to('cpu') if image_initial is not None: state_info_out['image_initial'] = image_initial.to('cpu') if FLOW_STARTED and not FLOW_STOPPED: state_info_out['y0'] = y0.to('cpu') #state_info_out['y0_inv'] = y0_inv.to('cpu') # TODO: implement this? state_info_out['data_cached'] = data_cached.to('cpu') state_info_out['data_x_prev_'] = data_x_prev_.to('cpu') return x def noise_fn(x, sigma, sigma_next, noise_sampler, cossim_iter=1): noise = normalize_zscore(noise_sampler(sigma=sigma, sigma_next=sigma_next), channelwise=True, inplace=True) cossim = get_pearson_similarity(x, noise) for i in range(cossim_iter): noise_new = normalize_zscore(noise_sampler(sigma=sigma, sigma_next=sigma_next), channelwise=True, inplace=True) cossim_new = get_pearson_similarity(x, noise_new) if cossim_new > cossim: noise = noise_new cossim = cossim_new return noise def preview_callback( x : Tensor, eps : Tensor, denoised : Tensor, x_ : Tensor, eps_ : Tensor, data_ : Tensor, step : int, sigma : Tensor, sigma_next : Tensor, callback : Callable, EO : ExtraOptions, preview_override : Optional[Tensor] = None, FLOW_STOPPED : bool = False): if EO("eps_substep_preview"): row_callback = EO("eps_substep_preview", 0) denoised_callback = eps_[row_callback] elif EO("denoised_substep_preview"): row_callback = EO("denoised_substep_preview", 0) denoised_callback = data_[row_callback] elif EO("x_substep_preview"): row_callback = EO("x_substep_preview", 0) denoised_callback = x_[row_callback] elif EO("eps_preview"): denoised_callback = eps elif EO("denoised_preview"): denoised_callback = denoised elif EO("x_preview"): denoised_callback = x elif preview_override is not None and FLOW_STOPPED == False: denoised_callback = preview_override else: denoised_callback = data_[0] callback({'x': x, 'i': step, 'sigma': sigma, 'sigma_next': sigma_next, 'denoised': denoised_callback.to(torch.float32)}) if callback is not None else None return ================================================ FILE: beta/samplers.py ================================================ import torch import torch.nn.functional as F from torch import Tensor from typing import Optional, Callable, Tuple, Dict, Any, Union import copy import gc import comfy.samplers import comfy.sample import comfy.sampler_helpers import comfy.model_sampling import comfy.latent_formats import comfy.sd import comfy.supported_models import comfy.utils import comfy.nested_tensor from comfy.samplers import CFGGuider, sampling_function import latent_preview from ..helper import initialize_or_scale, get_res4lyf_scheduler_list, OptionsManager, ExtraOptions from ..res4lyf import RESplain from ..latents import normalize_zscore, get_orthogonal from ..sigmas import get_sigmas #import ..models # import ReFluxPatcher from .constants import MAX_STEPS, IMPLICIT_TYPE_NAMES from .noise_classes import NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_NAMES_SIMPLE, NOISE_GENERATOR_NAMES from .rk_noise_sampler_beta import NOISE_MODE_NAMES from .rk_coefficients_beta import get_default_sampler_name, get_sampler_name_list, process_sampler_name def copy_cond(conditioning): new_conditioning = [] if type(conditioning[0][0]) == list: for i in range(len(conditioning)): new_conditioning_i = [] for embedding, cond in conditioning[i]: cond_copy = {} for k, v in cond.items(): if isinstance(v, torch.Tensor): cond_copy[k] = v.clone() else: cond_copy[k] = v # ensure we're not copying huge shit like controlnets new_conditioning_i.append([embedding.clone(), cond_copy]) new_conditioning.append(new_conditioning_i) else: for embedding, cond in conditioning: cond_copy = {} for k, v in cond.items(): if isinstance(v, torch.Tensor): cond_copy[k] = v.clone() else: cond_copy[k] = v # ensure we're not copying huge shit like controlnets new_conditioning.append([embedding.clone(), cond_copy]) return new_conditioning def generate_init_noise(x, seed, noise_type_init, noise_stdev, noise_mean, noise_normalize, sigma_max, sigma_min, alpha_init=None, k_init=None, EO=None): if noise_type_init == "none" or noise_stdev == 0.0: return torch.zeros_like(x) noise_sampler_init = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_type_init)( x=x, seed=seed, sigma_max=sigma_max, sigma_min=sigma_min ) if noise_type_init == "fractal": noise_sampler_init.alpha = alpha_init noise_sampler_init.k = k_init noise_sampler_init.scale = 0.1 noise = noise_sampler_init(sigma=sigma_max * noise_stdev, sigma_next=sigma_min) if noise_normalize and noise.std() > 0: channelwise = EO("init_noise_normalize_channelwise", "true") if EO else "true" channelwise = True if channelwise == "true" else False noise = normalize_zscore(noise, channelwise=channelwise, inplace=True) noise *= noise_stdev noise = (noise - noise.mean()) + noise_mean return noise class SharkGuider(CFGGuider): def __init__(self, model_patcher): super().__init__(model_patcher) self.cfgs = {} def set_conds(self, **kwargs): self.inner_set_conds(kwargs) def set_cfgs(self, **kwargs): self.cfgs = {**kwargs} self.cfg = self.cfgs.get('xt', self.cfg) def predict_noise(self, x, timestep, model_options={}, seed=None): latent_type = model_options['transformer_options'].get('latent_type', 'xt') positive = self.conds.get(f'{latent_type}_positive', self.conds.get('xt_positive')) negative = self.conds.get(f'{latent_type}_negative', self.conds.get('xt_negative')) positive = self.conds.get('xt_positive') if positive is None else positive negative = self.conds.get('xt_negative') if negative is None else negative cfg = self.cfgs.get(latent_type, self.cfg) model_options['transformer_options']['yt_positive'] = self.conds.get('yt_positive') model_options['transformer_options']['yt_negative'] = self.conds.get('yt_negative') return sampling_function(self.inner_model, x, timestep, negative, positive, cfg, model_options=model_options, seed=seed) class SharkSampler: @classmethod def INPUT_TYPES(cls): return { "required": { "noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_stdev": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }), "noise_seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}), "sampler_mode": (['unsample', 'standard', 'resample'], {"default": "standard"}), "scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},), "steps": ("INT", {"default": 30, "min": 1, "max": 10000.0}), "denoise": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01}), "denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01}), "cfg": ("FLOAT", {"default": 5.5, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Negative values use channelwise CFG." }), }, "optional": { "model": ("MODEL",), "positive": ("CONDITIONING", ), "negative": ("CONDITIONING", ), "sampler": ("SAMPLER", ), "sigmas": ("SIGMAS", ), "latent_image": ("LATENT", ), "extra_options": ("STRING", {"default": "", "multiline": True}), "options": ("OPTIONS", ), } } RETURN_TYPES = ("LATENT", "LATENT", "LATENT",) RETURN_NAMES = ("output", "denoised", "sde_noise",) FUNCTION = "main" CATEGORY = "RES4LYF/samplers" EXPERIMENTAL = True def main(self, model = None, cfg : float = 5.5, scheduler : str = "beta57", steps : int = 30, steps_to_run : int = -1, sampler_mode : str = "standard", denoise : float = 1.0, denoise_alt : float = 1.0, noise_type_init : str = "gaussian", latent_image : Optional[dict[Tensor]] = None, positive = None, negative = None, sampler = None, sigmas : Optional[Tensor] = None, noise_stdev : float = 1.0, noise_mean : float = 0.0, noise_normalize : bool = True, d_noise : float = 1.0, alpha_init : float = -1.0, k_init : float = 1.0, cfgpp : float = 0.0, noise_seed : int = -1, options = None, sde_noise = None, sde_noise_steps : int = 1, rebounds : int = 0, unsample_cfg : float = 1.0, unsample_eta : float = 0.5, unsampler_name : str = "none", unsample_steps_to_run : int = -1, eta_decay_scale : float = 1.0, #ultracascade_stage : str = "stage_UP", ultracascade_latent_image : Optional[dict[str,Any]] = None, ultracascade_guide_weights: Optional[Tuple] = None, ultracascade_latent_width : int = 0, ultracascade_latent_height: int = 0, extra_options : str = "", **kwargs, ): disable_pbar = not comfy.utils.PROGRESS_BAR_ENABLED # INIT EXTENDABLE OPTIONS INPUTS options_mgr = OptionsManager(options, **kwargs) extra_options += "\n" + options_mgr.get('extra_options', "") EO = ExtraOptions(extra_options) default_dtype = EO("default_dtype", torch.float64) default_device = EO("work_device", "cuda" if torch.cuda.is_available() else "cpu") noise_stdev = options_mgr.get('noise_init_stdev', noise_stdev) noise_mean = options_mgr.get('noise_init_mean', noise_mean) noise_type_init = options_mgr.get('noise_type_init', noise_type_init) d_noise = options_mgr.get('d_noise', d_noise) alpha_init = options_mgr.get('alpha_init', alpha_init) k_init = options_mgr.get('k_init', k_init) sde_noise = options_mgr.get('sde_noise', sde_noise) sde_noise_steps = options_mgr.get('sde_noise_steps', sde_noise_steps) rebounds = options_mgr.get('rebounds', rebounds) unsample_cfg = options_mgr.get('unsample_cfg', unsample_cfg) unsample_eta = options_mgr.get('unsample_eta', unsample_eta) unsampler_name = options_mgr.get('unsampler_name', unsampler_name) unsample_steps_to_run = options_mgr.get('unsample_steps_to_run', unsample_steps_to_run) eta_decay_scale = options_mgr.get('eta_decay_scale', eta_decay_scale) start_at_step = options_mgr.get('start_at_step', -1) tile_sizes = options_mgr.get('tile_sizes', None) flow_sync_eps = options_mgr.get('flow_sync_eps', 0.0) unsampler_name, _ = process_sampler_name(unsampler_name) #ultracascade_stage = options_mgr.get('ultracascade_stage', ultracascade_stage) ultracascade_latent_image = options_mgr.get('ultracascade_latent_image', ultracascade_latent_image) ultracascade_latent_width = options_mgr.get('ultracascade_latent_width', ultracascade_latent_width) ultracascade_latent_height = options_mgr.get('ultracascade_latent_height', ultracascade_latent_height) if 'BONGMATH' in sampler.extra_options: sampler.extra_options['start_at_step'] = start_at_step sampler.extra_options['tile_sizes'] = tile_sizes sampler.extra_options['unsample_bongmath'] = options_mgr.get('unsample_bongmath', sampler.extra_options['BONGMATH']) # allow turning off bongmath for unsampling with cycles sampler.extra_options['flow_sync_eps'] = flow_sync_eps is_chained = False if latent_image is not None: if 'positive' in latent_image and positive is None: positive = copy_cond(latent_image['positive']) if positive is not None and 'control' in positive[0][1]: for i in range(len(positive)): positive[i][1]['control'] = latent_image['positive'][i][1]['control'] if hasattr(latent_image['positive'][i][1]['control'], 'base'): positive[i][1]['control'].base = latent_image['positive'][i][1]['control'].base is_chained = True if 'negative' in latent_image and negative is None: negative = copy_cond(latent_image['negative']) if negative is not None and 'control' in negative[0][1]: for i in range(len(negative)): negative[i][1]['control'] = latent_image['negative'][i][1]['control'] if hasattr(latent_image['negative'][i][1]['control'], 'base'): negative[i][1]['control'].base = latent_image['negative'][i][1]['control'].base is_chained = True if 'sampler' in latent_image and sampler is None: sampler = copy_cond(latent_image['sampler']) #.clone() is_chained = True if 'steps_to_run' in sampler.extra_options: sampler.extra_options['steps_to_run'] = steps_to_run guider_input = options_mgr.get('guider', None) if guider_input is not None and is_chained is False: guider = guider_input work_model = guider.model_patcher RESplain("Shark: Using model from ClownOptions_GuiderInput: ", guider.model_patcher.model.diffusion_model.__class__.__name__) RESplain("SharkWarning: \"flow\" guide mode does not work with ClownOptions_GuiderInput") if hasattr(guider, 'cfg') and guider.cfg is not None: cfg = guider.cfg RESplain("Shark: Using cfg from ClownOptions_GuiderInput: ", cfg) if hasattr(guider, 'original_conds') and guider.original_conds is not None: if 'positive' in guider.original_conds: first_ = guider.original_conds['positive'][0]['cross_attn'] second_ = {k: v for k, v in guider.original_conds['positive'][0].items() if k != 'cross_attn'} positive = [[first_, second_],] RESplain("Shark: Using positive cond from ClownOptions_GuiderInput") if 'negative' in guider.original_conds: first_ = guider.original_conds['negative'][0]['cross_attn'] second_ = {k: v for k, v in guider.original_conds['negative'][0].items() if k != 'cross_attn'} negative = [[first_, second_],] RESplain("Shark: Using negative cond from ClownOptions_GuiderInput") else: guider = None work_model = model#.clone() if latent_image is not None: latent_image['samples'] = comfy.sample.fix_empty_latent_channels(work_model, latent_image['samples']) if positive is None or negative is None: from ..conditioning import EmptyConditioningGenerator EmptyCondGen = EmptyConditioningGenerator(work_model) positive, negative = EmptyCondGen.zero_none_conditionings_([positive, negative]) if cfg < 0: sampler.extra_options['cfg_cw'] = -cfg cfg = 1.0 else: sampler.extra_options.pop("cfg_cw", None) is_nested_input = latent_image is not None and 'samples' in latent_image and isinstance(latent_image['samples'], comfy.nested_tensor.NestedTensor) if not EO("disable_dummy_sampler_init") and not is_nested_input: sampler_null = comfy.samplers.ksampler("rk_beta", { "sampler_mode": "NULL", }) if latent_image is not None and 'samples' in latent_image: latent_vram_factor = EO("latent_vram_factor", 3) x_null = torch.zeros_like(latent_image['samples']).repeat_interleave(latent_vram_factor, dim=-1) elif ultracascade_latent_height * ultracascade_latent_width > 0: x_null = comfy.sample.fix_empty_latent_channels(model, torch.zeros((1,16,ultracascade_latent_height,ultracascade_latent_width))) else: print("Fallback: spawning dummy 1,16,256,256 latent.") x_null = comfy.sample.fix_empty_latent_channels(model, torch.zeros((1,16,256,256))) _ = comfy.sample.sample_custom(work_model, x_null, cfg, sampler_null, torch.linspace(1, 0, 10).to(x_null.dtype).to(x_null.device), negative, negative, x_null, noise_mask=None, callback=None, disable_pbar=disable_pbar, seed=noise_seed) sigma_min = work_model.get_model_object('model_sampling').sigma_min sigma_max = work_model.get_model_object('model_sampling').sigma_max if sampler is None: raise ValueError("sampler is required") else: sampler = copy.deepcopy(sampler) # INIT SIGMAS if sigmas is not None: sigmas = sigmas.clone().to(dtype=default_dtype, device=default_device) # does this type carry into clown after passing through comfy? sigmas *= denoise # ... otherwise we have to interpolate and that might not be ideal for tiny custom schedules... else: sigmas = get_sigmas(work_model, scheduler, steps, abs(denoise)).to(dtype=default_dtype, device=default_device) sigmas *= denoise_alt # USE NULL FLOATS AS "FLAGS" TO PREVENT COMFY NOISE ADDITION if sampler_mode.startswith("unsample"): null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype) sigmas = torch.flip(sigmas, dims=[0]) sigmas = torch.cat([sigmas, null]) elif sampler_mode.startswith("resample"): null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype) sigmas = torch.cat([null, sigmas]) sigmas = torch.cat([sigmas, null]) latent_x = {} # INIT STATE INFO FOR CONTINUING GENERATION ACROSS MULTIPLE SAMPLER NODES if latent_image is not None: samples = latent_image['samples'] latent_x['samples'] = samples._copy() if isinstance(samples, comfy.nested_tensor.NestedTensor) else samples.clone() if 'noise_mask' in latent_image: noise_mask = latent_image['noise_mask'] latent_x['noise_mask'] = noise_mask._copy() if isinstance(noise_mask, comfy.nested_tensor.NestedTensor) else noise_mask.clone() state_info = copy.deepcopy(latent_image['state_info']) if 'state_info' in latent_image else {} else: state_info = {} state_info_out = {} # SETUP CONDITIONING EMBEDS pos_cond = copy_cond(positive) neg_cond = copy_cond(negative) # SETUP FOR ULTRACASCADE IF DETECTED if work_model.model.model_config.unet_config.get('stable_cascade_stage') == 'up': ultracascade_guide_weight = EO("ultracascade_guide_weight", 0.0) ultracascade_guide_type = EO("ultracascade_guide_type", "residual") x_lr = None if ultracascade_latent_height * ultracascade_latent_width > 0: x_lr = latent_image['samples'].clone() if latent_image is not None else None x_lr_bs = 1 if x_lr is None else x_lr.shape[-4] x_lr_dtype = default_dtype if x_lr is None else x_lr.dtype x_lr_device = 'cuda' if x_lr is None else x_lr.device ultracascade_stage_up_upscale_align_corners = EO("ultracascade_stage_up_upscale_align_corners", False) ultracascade_stage_up_upscale_mode = EO("ultracascade_stage_up_upscale_mode", "bicubic") latent_x['samples'] = torch.zeros([x_lr_bs, 16, ultracascade_latent_height, ultracascade_latent_width], dtype=x_lr_dtype, device=x_lr_device) data_prev_ = state_info.get('data_prev_') if EO("ultracascade_stage_up_preserve_data_prev") and data_prev_ is not None: data_prev_ = data_prev_.squeeze(1) if data_prev_.dim() == 4: data_prev_ = F.interpolate( data_prev_, size=latent_x['samples'].shape[-2:], mode=ultracascade_stage_up_upscale_mode, align_corners=ultracascade_stage_up_upscale_align_corners ) else: print("data_prev_ upscale failed.") state_info['data_prev_'] = data_prev_.unsqueeze(1) else: state_info['data_prev_'] = data_prev_ #None # = None was leading to errors even with sampler_mode=standard due to below with = state_info['data_prev_'][batch_num] if x_lr is not None: if x_lr.shape[-2:] != latent_image['samples'].shape[-2:]: x_height, x_width = latent_image['samples'].shape[-2:] ultracascade_stage_up_upscale_align_corners = EO("ultracascade_stage_up_upscale_align_corners", False) ultracascade_stage_up_upscale_mode = EO("ultracascade_stage_up_upscale_mode", "bicubic") x_lr = F.interpolate(x_lr, size=(x_height, x_width), mode=ultracascade_stage_up_upscale_mode, align_corners=ultracascade_stage_up_upscale_align_corners) ultracascade_guide_weights = initialize_or_scale(ultracascade_guide_weights, ultracascade_guide_weight, MAX_STEPS) patch = work_model.model_options.get("transformer_options", {}).get("patches_replace", {}).get("ultracascade", {}).get("main") if patch is not None: patch.update(x_lr=x_lr, guide_weights=ultracascade_guide_weights, guide_type=ultracascade_guide_type) else: work_model.model.diffusion_model.set_sigmas_schedule(sigmas_schedule = sigmas) work_model.model.diffusion_model.set_sigmas_prev (sigmas_prev = sigmas[:1]) work_model.model.diffusion_model.set_guide_weights (guide_weights = ultracascade_guide_weights) work_model.model.diffusion_model.set_guide_type (guide_type = ultracascade_guide_type) work_model.model.diffusion_model.set_x_lr (x_lr = x_lr) elif work_model.model.model_config.unet_config.get('stable_cascade_stage') == 'b': #if sampler_mode != "resample": # state_info['data_prev_'] = None #commented out as it was throwing an error below with = state_info['data_prev_'][batch_num] c_pos, c_neg = [], [] for t in pos_cond: d_pos = t[1].copy() d_neg = t[1].copy() x_lr = None if ultracascade_latent_height * ultracascade_latent_width > 0: x_lr = latent_image['samples'].clone() latent_x['samples'] = torch.zeros([x_lr.shape[-4], 4, ultracascade_latent_height // 4, ultracascade_latent_width // 4], dtype=x_lr.dtype, device=x_lr.device) d_pos['stable_cascade_prior'] = x_lr pooled_output = d_neg.get("pooled_output", None) if pooled_output is not None: d_neg["pooled_output"] = torch.zeros_like(pooled_output) c_pos.append( [t[0], d_pos]) c_neg.append([torch.zeros_like(t[0]), d_neg]) pos_cond = c_pos neg_cond = c_neg elif ultracascade_latent_height * ultracascade_latent_width > 0: latent_x['samples'] = torch.zeros([1, 16, ultracascade_latent_height, ultracascade_latent_width], dtype=default_dtype, device=sigmas.device) # NOISE, ORTHOGONALIZE, OR ZERO EMBEDS if pos_cond is None or neg_cond is None: from ..conditioning import EmptyConditioningGenerator EmptyCondGen = EmptyConditioningGenerator(work_model) pos_cond, neg_cond = EmptyCondGen.zero_none_conditionings_([pos_cond, neg_cond]) if EO(("cond_noise", "uncond_noise")): if noise_seed == -1: cond_seed = torch.initial_seed() + 1 else: cond_seed = noise_seed t5_seed = EO("t5_seed" , cond_seed) clip_seed = EO("clip_seed" , cond_seed+1) t5_noise_type = EO("t5_noise_type" , "gaussian") clip_noise_type = EO("clip_noise_type" , "gaussian") t5_noise_sigma_max = EO("t5_noise_sigma_max" , "gaussian") t5_noise_sigma_min = EO("t5_noise_sigma_min" , "gaussian") clip_noise_sigma_max = EO("clip_noise_sigma_max", "gaussian") clip_noise_sigma_min = EO("clip_noise_sigma_min", "gaussian") noise_sampler_t5 = NOISE_GENERATOR_CLASSES_SIMPLE.get( t5_noise_type)(x=pos_cond[0][0], seed= t5_seed, sigma_max= t5_noise_sigma_max, sigma_min= t5_noise_sigma_min, ) noise_sampler_clip = NOISE_GENERATOR_CLASSES_SIMPLE.get(clip_noise_type)(x=pos_cond[0][1]['pooled_output'], seed=clip_seed, sigma_max=clip_noise_sigma_max, sigma_min=clip_noise_sigma_min, ) t5_noise_scale = EO("t5_noise_scale", 1.0) clip_noise_scale = EO("clip_noise_scale", 1.0) if EO("cond_noise"): t5_noise = noise_sampler_t5 (sigma= t5_noise_sigma_max, sigma_next= t5_noise_sigma_min) clip_noise = noise_sampler_clip(sigma=clip_noise_sigma_max, sigma_next=clip_noise_sigma_min) pos_cond[0][0] = pos_cond[0][0] + t5_noise_scale * (t5_noise - pos_cond[0][0]) pos_cond[0][1]['pooled_output'] = pos_cond[0][1]['pooled_output'] + clip_noise_scale * (clip_noise - pos_cond[0][1]['pooled_output']) if EO("uncond_noise"): t5_noise = noise_sampler_t5 (sigma= t5_noise_sigma_max, sigma_next= t5_noise_sigma_min) clip_noise = noise_sampler_clip(sigma=clip_noise_sigma_max, sigma_next=clip_noise_sigma_min) neg_cond[0][0] = neg_cond[0][0] + t5_noise_scale * (t5_noise - neg_cond[0][0]) neg_cond[0][1]['pooled_output'] = neg_cond[0][1]['pooled_output'] + clip_noise_scale * (clip_noise - neg_cond[0][1]['pooled_output']) if EO("uncond_ortho"): neg_cond[0][0] = get_orthogonal(neg_cond[0][0], pos_cond[0][0]) neg_cond[0][1]['pooled_output'] = get_orthogonal(neg_cond[0][1]['pooled_output'], pos_cond[0][1]['pooled_output']) if "noise_seed" in sampler.extra_options: if sampler.extra_options['noise_seed'] == -1 and noise_seed != -1: sampler.extra_options['noise_seed'] = noise_seed + 1 RESplain("Shark: setting clown noise seed to: ", sampler.extra_options['noise_seed'], debug=True) if "sampler_mode" in sampler.extra_options: sampler.extra_options['sampler_mode'] = sampler_mode if "extra_options" in sampler.extra_options: extra_options += "\n" extra_options += sampler.extra_options['extra_options'] sampler.extra_options['extra_options'] = extra_options samples = latent_x['samples'] latent_image_batch = {"samples": samples._copy() if isinstance(samples, comfy.nested_tensor.NestedTensor) else samples.clone()} if 'noise_mask' in latent_x and latent_x['noise_mask'] is not None: noise_mask = latent_x['noise_mask'] latent_image_batch['noise_mask'] = noise_mask._copy() if isinstance(noise_mask, comfy.nested_tensor.NestedTensor) else noise_mask.clone() if EO("no_batch_loop"): x = latent_image_batch['samples'].to(default_dtype) if isinstance(x, comfy.nested_tensor.NestedTensor): noise = comfy.nested_tensor.NestedTensor([ generate_init_noise( x=t.clone(), seed=noise_seed + idx, noise_type_init=noise_type_init, noise_stdev=noise_stdev, noise_mean=noise_mean, noise_normalize=noise_normalize, sigma_max=sigma_max, sigma_min=sigma_min, alpha_init=alpha_init, k_init=k_init, EO=EO ) for idx, t in enumerate(x.unbind()) ]) else: noise = generate_init_noise( x=x.clone(), seed=noise_seed, noise_type_init=noise_type_init, noise_stdev=noise_stdev, noise_mean=noise_mean, noise_normalize=noise_normalize, sigma_max=sigma_max, sigma_min=sigma_min, alpha_init=alpha_init, k_init=k_init, EO=EO ) if guider is None: guider = SharkGuider(work_model) flow_cond = options_mgr.get('flow_cond', {}) if flow_cond and 'yt_positive' in flow_cond: if 'yt_inv_positive' not in flow_cond: guider.set_conds(yt_positive=flow_cond.get('yt_positive'), yt_negative=flow_cond.get('yt_negative')) guider.set_cfgs(yt=flow_cond.get('yt_cfg'), xt=cfg) else: guider.set_conds(yt_positive=flow_cond.get('yt_positive'), yt_negative=flow_cond.get('yt_negative'), yt_inv_positive=flow_cond.get('yt_inv_positive'), yt_inv_negative=flow_cond.get('yt_inv_negative')) guider.set_cfgs(yt=flow_cond.get('yt_cfg'), yt_inv=flow_cond.get('yt_inv_cfg'), xt=cfg) else: guider.set_cfgs(xt=cfg) guider.set_conds(xt_positive=pos_cond, xt_negative=neg_cond) elif type(guider) == SharkGuider: guider.set_cfgs(xt=cfg) guider.set_conds(xt_positive=pos_cond, xt_negative=neg_cond) else: try: guider.set_cfg(cfg) guider.set_conds(pos_cond, neg_cond) except: pass if latent_image is not None and 'state_info' in latent_image and 'sigmas' in latent_image['state_info']: steps_len = max(sigmas.shape[-1] - 1, latent_image['state_info']['sigmas'].shape[-1] - 1) else: steps_len = sigmas.shape[-1] - 1 x0_output = {} try: callback = latent_preview.prepare_callback(work_model, steps_len, x0_output, shape=x.shape if hasattr(x, 'is_nested') and x.is_nested else None) except TypeError: callback = latent_preview.prepare_callback(work_model, steps_len, x0_output) noise_mask = latent_image_batch.get("noise_mask", None) if noise_mask is not None: stored_image = state_info.get('image_initial') x_initial = stored_image if stored_image is not None else x stored_noise = state_info.get('noise_initial') noise_initial = stored_noise if stored_noise is not None else noise else: x_initial = x noise_initial = noise state_info_out = {} if 'BONGMATH' in sampler.extra_options: sampler.extra_options['state_info'] = state_info sampler.extra_options['state_info_out'] = state_info_out sampler.extra_options['image_initial'] = x_initial sampler.extra_options['noise_initial'] = noise_initial if rebounds > 0: cfgs_cached = guider.cfgs steps_to_run_cached = sampler.extra_options['steps_to_run'] eta_cached = sampler.extra_options['eta'] eta_substep_cached = sampler.extra_options['eta_substep'] etas_cached = sampler.extra_options['etas'].clone() etas_substep_cached = sampler.extra_options['etas_substep'].clone() unsample_etas = torch.full_like(etas_cached, unsample_eta) rk_type_cached = sampler.extra_options['rk_type'] if sampler.extra_options['sampler_mode'] == "unsample": guider.cfgs = { 'xt': unsample_cfg, 'yt': unsample_cfg, } if unsample_eta != -1.0: sampler.extra_options['eta_substep'] = unsample_eta sampler.extra_options['eta'] = unsample_eta sampler.extra_options['etas_substep'] = unsample_etas sampler.extra_options['etas'] = unsample_etas if unsampler_name != "none": sampler.extra_options['rk_type'] = unsampler_name if unsample_steps_to_run > -1: sampler.extra_options['steps_to_run'] = unsample_steps_to_run else: guider.cfgs = cfgs_cached guider.cfgs = cfgs_cached sampler.extra_options['steps_to_run'] = steps_to_run_cached eta_decay = eta_cached eta_substep_decay = eta_substep_cached unsample_eta_decay = unsample_eta etas_decay = etas_cached etas_substep_decay = etas_substep_cached unsample_etas_decay = unsample_etas if isinstance(x, comfy.nested_tensor.NestedTensor): samples = guider.sample(noise, x._copy(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed) else: samples = guider.sample(noise, x.clone(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed) if rebounds > 0: noise_seed_cached = sampler.extra_options['noise_seed'] cfgs_cached = guider.cfgs sampler_mode_cached = sampler.extra_options['sampler_mode'] for restarts_iter in range(rebounds): sampler.extra_options['state_info'] = sampler.extra_options['state_info_out'] sigmas = sampler.extra_options['state_info_out']['sigmas'] if sigmas is None else sigmas if sampler.extra_options['sampler_mode'] == "standard": sampler.extra_options['sampler_mode'] = "unsample" elif sampler.extra_options['sampler_mode'] == "unsample": sampler.extra_options['sampler_mode'] = "resample" elif sampler.extra_options['sampler_mode'] == "resample": sampler.extra_options['sampler_mode'] = "unsample" sampler.extra_options['noise_seed'] = -1 if sampler.extra_options['sampler_mode'] == "unsample": guider.cfgs = { 'xt': unsample_cfg, 'yt': unsample_cfg, } if unsample_eta != -1.0: sampler.extra_options['eta_substep'] = unsample_eta_decay sampler.extra_options['eta'] = unsample_eta_decay sampler.extra_options['etas_substep'] = unsample_etas sampler.extra_options['etas'] = unsample_etas else: sampler.extra_options['eta_substep'] = eta_substep_decay sampler.extra_options['eta'] = eta_decay sampler.extra_options['etas_substep'] = etas_substep_decay sampler.extra_options['etas'] = etas_decay if unsampler_name != "none": sampler.extra_options['rk_type'] = unsampler_name if unsample_steps_to_run > -1: sampler.extra_options['steps_to_run'] = unsample_steps_to_run else: guider.cfgs = cfgs_cached sampler.extra_options['eta_substep'] = eta_substep_decay sampler.extra_options['eta'] = eta_decay sampler.extra_options['etas_substep'] = etas_substep_decay sampler.extra_options['etas'] = etas_decay sampler.extra_options['rk_type'] = rk_type_cached sampler.extra_options['steps_to_run'] = steps_to_run_cached samples = guider.sample(noise, samples.clone(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=-1) eta_substep_decay *= eta_decay_scale eta_decay *= eta_decay_scale unsample_eta_decay *= eta_decay_scale etas_substep_decay *= eta_decay_scale etas_decay *= eta_decay_scale unsample_etas_decay *= eta_decay_scale sampler.extra_options['noise_seed'] = noise_seed_cached guider.cfgs = cfgs_cached sampler.extra_options['sampler_mode'] = sampler_mode_cached sampler.extra_options['eta_substep'] = eta_substep_cached sampler.extra_options['eta'] = eta_cached sampler.extra_options['etas_substep'] = etas_substep_cached sampler.extra_options['etas'] = etas_cached if noise_mask is not None: if hasattr(samples, 'is_nested') and samples.is_nested: blended = [] x_initial_list = x_initial.unbind() if hasattr(x_initial, 'is_nested') and x_initial.is_nested else [x_initial] if hasattr(noise_mask, 'is_nested') and noise_mask.is_nested: mask_list = noise_mask.unbind() else: mask_list = [noise_mask] for idx, s in enumerate(samples.unbind()): xi = x_initial_list[idx] if idx < len(x_initial_list) else x_initial_list[0] m = mask_list[idx] if idx < len(mask_list) else mask_list[0] if s.ndim == m.ndim: reshaped_mask = comfy.utils.reshape_mask(m, s.shape).to(s.device) blended.append(s * reshaped_mask + xi.to(s.device) * (1.0 - reshaped_mask)) else: blended.append(s) samples = comfy.nested_tensor.NestedTensor(blended) else: if hasattr(noise_mask, 'is_nested') and noise_mask.is_nested: noise_mask = noise_mask.unbind()[0] reshaped_mask = comfy.utils.reshape_mask(noise_mask, samples.shape).to(samples.device) samples = samples * reshaped_mask + x_initial.to(samples.device) * (1.0 - reshaped_mask) samples = samples.to(comfy.model_management.intermediate_device()) out = latent_x.copy() out["samples"] = samples if "x0" in x0_output: x0_out = work_model.model.process_latent_out(x0_output["x0"].cpu()) if hasattr(samples, 'is_nested') and samples.is_nested: latent_shapes = [t.shape for t in samples.unbind()] x0_out = comfy.nested_tensor.NestedTensor( comfy.utils.unpack_latents(x0_out, latent_shapes) ) out_denoised = latent_x.copy() out_denoised["samples"] = x0_out else: out_denoised = out out['positive'] = positive out['negative'] = negative out['model'] = work_model out['sampler'] = sampler if noise_mask is not None: state_info_out['image_initial'] = x_initial state_info_out['noise_initial'] = noise_initial out['state_info'] = state_info_out return (out, out_denoised, None) out_samples = [] out_denoised_samples = [] out_state_info = [] for batch_num in range(latent_image_batch['samples'].shape[0]): latent_unbatch = copy.deepcopy(latent_x) if isinstance(latent_image_batch['samples'][batch_num], comfy.nested_tensor.NestedTensor): latent_unbatch['samples'] = latent_image_batch['samples'][batch_num]._copy() else: latent_unbatch['samples'] = latent_image_batch['samples'][batch_num].clone().unsqueeze(0) if 'BONGMATH' in sampler.extra_options: sampler.extra_options['batch_num'] = batch_num if noise_seed == -1 and sampler_mode in {"unsample", "resample"}: if latent_image.get('state_info', {}).get('last_rng', None) is not None: seed = torch.initial_seed() + batch_num else: seed = torch.initial_seed() + 1 + batch_num else: if EO("lock_batch_seed"): seed = noise_seed else: seed = noise_seed + batch_num torch .manual_seed(seed) torch.cuda.manual_seed(seed) if hasattr(latent_unbatch["samples"], 'is_nested') and latent_unbatch["samples"].is_nested: x = latent_unbatch["samples"]._copy().to(default_dtype) else: x = latent_unbatch["samples"].clone().to(default_dtype) # does this type carry into clown after passing through comfy? if sde_noise is None and sampler_mode.startswith("unsample"): sde_noise = [] else: sde_noise_steps = 1 for total_steps_iter in range (sde_noise_steps): if noise_type_init != "none" and noise_stdev != 0.0: RESplain("Initial latent noise seed: ", seed, debug=True) noise = generate_init_noise( x=x, seed=seed, noise_type_init=noise_type_init, noise_stdev=noise_stdev, noise_mean=noise_mean, noise_normalize=noise_normalize, sigma_max=sigma_max, sigma_min=sigma_min, alpha_init=alpha_init, k_init=k_init, EO=EO ) noise_mask = latent_unbatch["noise_mask"] if "noise_mask" in latent_unbatch else None x_input = x if noise_mask is not None and 'noise_initial' in state_info: stored_noise = state_info.get('noise_initial') if stored_noise is not None: if stored_noise.dim() > 3 and stored_noise.shape[0] > batch_num: stored_noise = stored_noise[batch_num] if stored_noise.shape == noise.shape: noise = stored_noise.to(noise.device, dtype=noise.dtype) RESplain("Using stored noise_initial from previous sampler", debug=True) stored_image = state_info.get('image_initial') if stored_image is not None: if stored_image.dim() > 3 and stored_image.shape[0] > batch_num: stored_image = stored_image[batch_num] if stored_image.shape == x.shape: x_input = stored_image.to(x.device, dtype=x.dtype) RESplain("Using stored image_initial from previous sampler", debug=True) if 'BONGMATH' in sampler.extra_options: sampler.extra_options['noise_initial'] = noise sampler.extra_options['image_initial'] = x_input x0_output = {} if latent_image is not None and 'state_info' in latent_image and 'sigmas' in latent_image['state_info']: steps_len = max(sigmas.shape[-1] - 1, latent_image['state_info']['sigmas'].shape[-1]-1) else: steps_len = sigmas.shape[-1]-1 callback = latent_preview.prepare_callback(work_model, steps_len, x0_output) if 'BONGMATH' in sampler.extra_options: # verify the sampler is rk_sampler_beta() sampler.extra_options['state_info'] = copy.deepcopy(state_info) ############################## if state_info != {} and state_info != {'data_prev_': None}: #second condition is for ultracascade sampler.extra_options['state_info']['raw_x'] = state_info['raw_x'] [batch_num] sampler.extra_options['state_info']['data_prev_'] = state_info['data_prev_'] [batch_num] sampler.extra_options['state_info']['last_rng'] = state_info['last_rng'] [batch_num] sampler.extra_options['state_info']['last_rng_substep'] = state_info['last_rng_substep'][batch_num] if 'image_initial' in state_info and state_info['image_initial'].dim() > 3: sampler.extra_options['state_info']['image_initial'] = state_info['image_initial'][batch_num] if 'noise_initial' in state_info and state_info['noise_initial'].dim() > 3: sampler.extra_options['state_info']['noise_initial'] = state_info['noise_initial'][batch_num] #state_info = copy.deepcopy(latent_image['state_info']) if 'state_info' in latent_image else {} state_info_out = {} sampler.extra_options['state_info_out'] = state_info_out if type(pos_cond[0][0]) == list: pos_cond_tmp = pos_cond[batch_num] positive_tmp = positive[batch_num] else: pos_cond_tmp = pos_cond positive_tmp = positive for i in range(len(neg_cond)): # crude fix for copy.deepcopy converting superclass into real object if 'control' in neg_cond[i][1]: neg_cond[i][1]['control'] = negative[i][1]['control'] if hasattr(negative[i][1]['control'], 'base'): neg_cond[i][1]['control'].base = negative[i][1]['control'].base for i in range(len(pos_cond_tmp)): # crude fix for copy.deepcopy converting superclass into real object if 'control' in pos_cond_tmp[i][1]: pos_cond_tmp[i][1]['control'] = positive_tmp[i][1]['control'] if hasattr(positive[i][1]['control'], 'base'): pos_cond_tmp[i][1]['control'].base = positive_tmp[i][1]['control'].base # SETUP REGIONAL COND if pos_cond_tmp[0][1] is not None: if 'callback_regional' in pos_cond_tmp[0][1]: pos_cond_tmp = pos_cond_tmp[0][1]['callback_regional'](work_model) if 'AttnMask' in pos_cond_tmp[0][1]: sampler.extra_options['AttnMask'] = pos_cond_tmp[0][1]['AttnMask'] sampler.extra_options['RegContext'] = pos_cond_tmp[0][1]['RegContext'] sampler.extra_options['RegParam'] = pos_cond_tmp[0][1]['RegParam'] if isinstance(model.model.model_config, (comfy.supported_models.SDXL, comfy.supported_models.SD15)): latent_up_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] * 2, latent_image['samples'].shape[-1] * 2), mode="nearest") sampler.extra_options['AttnMask'].set_latent(latent_up_dummy) sampler.extra_options['AttnMask'].generate() sampler.extra_options['AttnMask'].mask_up = sampler.extra_options['AttnMask'].attn_mask.mask latent_down_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] // 2, latent_image['samples'].shape[-1] // 2), mode="nearest") sampler.extra_options['AttnMask'].set_latent(latent_down_dummy) sampler.extra_options['AttnMask'].generate() sampler.extra_options['AttnMask'].mask_down = sampler.extra_options['AttnMask'].attn_mask.mask if isinstance(model.model.model_config, comfy.supported_models.SD15): latent_down_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] // 4, latent_image['samples'].shape[-1] // 4), mode="nearest") sampler.extra_options['AttnMask'].set_latent(latent_down_dummy) sampler.extra_options['AttnMask'].generate() sampler.extra_options['AttnMask'].mask_down2 = sampler.extra_options['AttnMask'].attn_mask.mask if isinstance(model.model.model_config, (comfy.supported_models.Stable_Cascade_C)): latent_up_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] * 2, latent_image['samples'].shape[-1] * 2), mode="nearest") sampler.extra_options['AttnMask'].set_latent(latent_up_dummy) # cascade concats 4 + 4 tokens (clip_text_pooled, clip_img) sampler.extra_options['AttnMask'].context_lens = [context_len + 8 for context_len in sampler.extra_options['AttnMask'].context_lens] sampler.extra_options['AttnMask'].text_len = sum(sampler.extra_options['AttnMask'].context_lens) else: sampler.extra_options['AttnMask'].set_latent(latent_image['samples']) sampler.extra_options['AttnMask'].generate() if neg_cond[0][1] is not None: if 'callback_regional' in neg_cond[0][1]: neg_cond = neg_cond[0][1]['callback_regional'](work_model) if 'AttnMask' in neg_cond[0][1]: sampler.extra_options['AttnMask_neg'] = neg_cond[0][1]['AttnMask'] sampler.extra_options['RegContext_neg'] = neg_cond[0][1]['RegContext'] sampler.extra_options['RegParam_neg'] = neg_cond[0][1]['RegParam'] if isinstance(model.model.model_config, (comfy.supported_models.SDXL, comfy.supported_models.SD15)): latent_up_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] * 2, latent_image['samples'].shape[-1] * 2), mode="nearest") sampler.extra_options['AttnMask_neg'].set_latent(latent_up_dummy) sampler.extra_options['AttnMask_neg'].generate() sampler.extra_options['AttnMask_neg'].mask_up = sampler.extra_options['AttnMask_neg'].attn_mask.mask latent_down_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] // 2, latent_image['samples'].shape[-1] // 2), mode="nearest") sampler.extra_options['AttnMask_neg'].set_latent(latent_down_dummy) sampler.extra_options['AttnMask_neg'].generate() sampler.extra_options['AttnMask_neg'].mask_down = sampler.extra_options['AttnMask_neg'].attn_mask.mask if isinstance(model.model.model_config, comfy.supported_models.SD15): latent_down_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] // 4, latent_image['samples'].shape[-1] // 4), mode="nearest") sampler.extra_options['AttnMask_neg'].set_latent(latent_down_dummy) sampler.extra_options['AttnMask_neg'].generate() sampler.extra_options['AttnMask_neg'].mask_down2 = sampler.extra_options['AttnMask_neg'].attn_mask.mask if isinstance(model.model.model_config, (comfy.supported_models.Stable_Cascade_C)): latent_up_dummy = F.interpolate(latent_image['samples'].to(torch.float16), size=(latent_image['samples'].shape[-2] * 2, latent_image['samples'].shape[-1] * 2), mode="nearest") sampler.extra_options['AttnMask'].set_latent(latent_up_dummy) # cascade concats 4 + 4 tokens (clip_text_pooled, clip_img) sampler.extra_options['AttnMask'].context_lens = [context_len + 8 for context_len in sampler.extra_options['AttnMask'].context_lens] sampler.extra_options['AttnMask'].text_len = sum(sampler.extra_options['AttnMask'].context_lens) else: sampler.extra_options['AttnMask_neg'].set_latent(latent_image['samples']) sampler.extra_options['AttnMask_neg'].generate() if guider is None: guider = SharkGuider(work_model) flow_cond = options_mgr.get('flow_cond', {}) if flow_cond != {} and 'yt_positive' in flow_cond and not 'yt_inv_positive' in flow_cond: #and not 'yt_inv;_positive' in flow_cond: # typo??? guider.set_conds(yt_positive=flow_cond.get('yt_positive'), yt_negative=flow_cond.get('yt_negative'),) guider.set_cfgs(yt=flow_cond.get('yt_cfg'), xt=cfg) elif flow_cond != {} and 'yt_positive' in flow_cond and 'yt_inv_positive' in flow_cond: guider.set_conds(yt_positive=flow_cond.get('yt_positive'), yt_negative=flow_cond.get('yt_negative'), yt_inv_positive=flow_cond.get('yt_inv_positive'), yt_inv_negative=flow_cond.get('yt_inv_negative'),) guider.set_cfgs(yt=flow_cond.get('yt_cfg'), yt_inv=flow_cond.get('yt_inv_cfg'), xt=cfg) else: guider.set_cfgs(xt=cfg) guider.set_conds(xt_positive=pos_cond_tmp, xt_negative=neg_cond) elif type(guider) == SharkGuider: guider.set_cfgs(xt=cfg) guider.set_conds(xt_positive=pos_cond_tmp, xt_negative=neg_cond) else: try: guider.set_cfg(cfg) except: RESplain("SharkWarning: guider.set_cfg failed but assuming cfg already set correctly.") try: guider.set_conds(pos_cond_tmp, neg_cond) except: RESplain("SharkWarning: guider.set_conds failed but assuming conds already set correctly.") if rebounds > 0: cfgs_cached = guider.cfgs steps_to_run_cached = sampler.extra_options['steps_to_run'] eta_cached = sampler.extra_options['eta'] eta_substep_cached = sampler.extra_options['eta_substep'] etas_cached = sampler.extra_options['etas'].clone() etas_substep_cached = sampler.extra_options['etas_substep'].clone() unsample_etas = torch.full_like(etas_cached, unsample_eta) rk_type_cached = sampler.extra_options['rk_type'] if sampler.extra_options['sampler_mode'] == "unsample": guider.cfgs = { 'xt': unsample_cfg, 'yt': unsample_cfg, } if unsample_eta != -1.0: sampler.extra_options['eta_substep'] = unsample_eta sampler.extra_options['eta'] = unsample_eta sampler.extra_options['etas_substep'] = unsample_etas sampler.extra_options['etas'] = unsample_etas if unsampler_name != "none": sampler.extra_options['rk_type'] = unsampler_name if unsample_steps_to_run > -1: sampler.extra_options['steps_to_run'] = unsample_steps_to_run else: guider.cfgs = cfgs_cached guider.cfgs = cfgs_cached sampler.extra_options['steps_to_run'] = steps_to_run_cached eta_decay = eta_cached eta_substep_decay = eta_substep_cached unsample_eta_decay = unsample_eta etas_decay = etas_cached etas_substep_decay = etas_substep_cached unsample_etas_decay = unsample_etas if isinstance(x_input, comfy.nested_tensor.NestedTensor): samples = guider.sample(noise, x_input._copy(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed) else: samples = guider.sample(noise, x_input.clone(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed) if rebounds > 0: noise_seed_cached = sampler.extra_options['noise_seed'] cfgs_cached = guider.cfgs sampler_mode_cached = sampler.extra_options['sampler_mode'] for restarts_iter in range(rebounds): sampler.extra_options['state_info'] = sampler.extra_options['state_info_out'] #steps = sampler.extra_options['state_info_out']['sigmas'].shape[-1] - 3 sigmas = sampler.extra_options['state_info_out']['sigmas'] if sigmas is None else sigmas #if len(sigmas) > 2 and sigmas[1] < sigmas[2] and sampler.extra_options['state_info_out']['sampler_mode'] == "unsample": # and sampler_mode == "resample": # sigmas = torch.flip(sigmas, dims=[0]) if sampler.extra_options['sampler_mode'] == "standard": sampler.extra_options['sampler_mode'] = "unsample" elif sampler.extra_options['sampler_mode'] == "unsample": sampler.extra_options['sampler_mode'] = "resample" elif sampler.extra_options['sampler_mode'] == "resample": sampler.extra_options['sampler_mode'] = "unsample" sampler.extra_options['noise_seed'] = -1 if sampler.extra_options['sampler_mode'] == "unsample": guider.cfgs = { 'xt': unsample_cfg, 'yt': unsample_cfg, } if unsample_eta != -1.0: sampler.extra_options['eta_substep'] = unsample_eta_decay sampler.extra_options['eta'] = unsample_eta_decay sampler.extra_options['etas_substep'] = unsample_etas sampler.extra_options['etas'] = unsample_etas else: sampler.extra_options['eta_substep'] = eta_substep_decay sampler.extra_options['eta'] = eta_decay sampler.extra_options['etas_substep'] = etas_substep_decay sampler.extra_options['etas'] = etas_decay if unsampler_name != "none": sampler.extra_options['rk_type'] = unsampler_name if unsample_steps_to_run > -1: sampler.extra_options['steps_to_run'] = unsample_steps_to_run else: guider.cfgs = cfgs_cached sampler.extra_options['eta_substep'] = eta_substep_decay sampler.extra_options['eta'] = eta_decay sampler.extra_options['etas_substep'] = etas_substep_decay sampler.extra_options['etas'] = etas_decay sampler.extra_options['rk_type'] = rk_type_cached sampler.extra_options['steps_to_run'] = steps_to_run_cached samples = guider.sample(noise, samples.clone(), sampler, sigmas, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=-1) eta_substep_decay *= eta_decay_scale eta_decay *= eta_decay_scale unsample_eta_decay *= eta_decay_scale etas_substep_decay *= eta_decay_scale etas_decay *= eta_decay_scale unsample_etas_decay *= eta_decay_scale sampler.extra_options['noise_seed'] = noise_seed_cached guider.cfgs = cfgs_cached sampler.extra_options['sampler_mode'] = sampler_mode_cached sampler.extra_options['eta_substep'] = eta_substep_cached sampler.extra_options['eta'] = eta_cached sampler.extra_options['etas_substep'] = etas_substep_cached sampler.extra_options['etas'] = etas_cached sampler.extra_options['rk_type'] = rk_type_cached sampler.extra_options['steps_to_run'] = steps_to_run_cached # TODO: verify this is carried on if noise_mask is not None: if 'BONGMATH' in sampler.extra_options: batch_state_info = sampler.extra_options.get('state_info', {}) latent_for_mask = batch_state_info.get('image_initial', x) else: stored_image = state_info.get('image_initial') if stored_image is not None and stored_image.dim() > 3: latent_for_mask = stored_image[batch_num] elif stored_image is not None: latent_for_mask = stored_image else: latent_for_mask = x reshaped_mask = comfy.utils.reshape_mask(noise_mask, samples.shape).to(samples.device) samples = samples * reshaped_mask + latent_for_mask.to(samples.device) * (1.0 - reshaped_mask) out = latent_unbatch.copy() out["samples"] = samples if "x0" in x0_output: out_denoised = latent_unbatch.copy() out_denoised["samples"] = work_model.model.process_latent_out(x0_output["x0"].cpu()) else: out_denoised = out out_samples .append(out ["samples"]) out_denoised_samples.append(out_denoised["samples"]) # ACCUMULATE UNSAMPLED SDE NOISE if total_steps_iter > 1: if 'raw_x' in state_info_out: sde_noise_out = state_info_out['raw_x'] else: sde_noise_out = out["samples"] sde_noise.append(normalize_zscore(sde_noise_out, channelwise=True, inplace=True)) out_state_info.append(state_info_out) # INCREMENT BATCH LOOP if not EO("lock_batch_seed"): seed += 1 if latent_image is not None: #needed for ultracascade, where latent_image input is not really used for stage C/first stage if latent_image.get('state_info', {}).get('last_rng', None) is None: torch.manual_seed(seed) gc.collect() # STACK SDE NOISES, SAVE STATE INFO state_info_out = out_state_info[0] if 'raw_x' in out_state_info[0]: state_info_out['raw_x'] = torch.stack([out_state_info[_]['raw_x'] for _ in range(len(out_state_info))]) state_info_out['data_prev_'] = torch.stack([out_state_info[_]['data_prev_'] for _ in range(len(out_state_info))]) state_info_out['last_rng'] = torch.stack([out_state_info[_]['last_rng'] for _ in range(len(out_state_info))]) state_info_out['last_rng_substep'] = torch.stack([out_state_info[_]['last_rng_substep'] for _ in range(len(out_state_info))]) if 'image_initial' in out_state_info[0]: state_info_out['image_initial'] = torch.stack([out_state_info[_]['image_initial'] for _ in range(len(out_state_info))]) if 'noise_initial' in out_state_info[0]: state_info_out['noise_initial'] = torch.stack([out_state_info[_]['noise_initial'] for _ in range(len(out_state_info))]) elif 'raw_x' in state_info: state_info_out = state_info out_samples = [tensor.squeeze(0) for tensor in out_samples] out_denoised_samples = [tensor.squeeze(0) for tensor in out_denoised_samples] out ['samples'] = torch.stack(out_samples, dim=0) out_denoised['samples'] = torch.stack(out_denoised_samples, dim=0) out['state_info'] = copy.deepcopy(state_info_out) state_info = {} out['positive'] = positive out['negative'] = negative out['model'] = work_model#.clone() out['sampler'] = sampler return (out, out_denoised, sde_noise,) class SharkSampler_Beta: @classmethod def INPUT_TYPES(cls): return { "required": { "scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},), "steps": ("INT", {"default": 30, "min": 1, "max": 10000.0}), "steps_to_run": ("INT", {"default": -1, "min": -1, "max": MAX_STEPS}), "denoise": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01}), "cfg": ("FLOAT", {"default": 5.5, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Negative values use channelwise CFG." }), "seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}), "sampler_mode": (['unsample', 'standard', 'resample'], {"default": "standard"}), }, "optional": { "model": ("MODEL",), "positive": ("CONDITIONING", ), "negative": ("CONDITIONING", ), "sampler": ("SAMPLER", ), "sigmas": ("SIGMAS", ), "latent_image": ("LATENT", ), "options": ("OPTIONS", ), } } RETURN_TYPES = ("LATENT", "LATENT", "OPTIONS",) RETURN_NAMES = ("output", "denoised", "options",) FUNCTION = "main" CATEGORY = "RES4LYF/samplers" def main(self, model = None, cfg : float = 5.5, scheduler : str = "beta57", steps : int = 30, steps_to_run : int = -1, sampler_mode : str = "standard", denoise : float = 1.0, denoise_alt : float = 1.0, noise_type_init : str = "gaussian", latent_image : Optional[dict[Tensor]] = None, positive = None, negative = None, sampler = None, sigmas : Optional[Tensor] = None, noise_stdev : float = 1.0, noise_mean : float = 0.0, noise_normalize : bool = True, d_noise : float = 1.0, alpha_init : float = -1.0, k_init : float = 1.0, cfgpp : float = 0.0, seed : int = -1, options = None, sde_noise = None, sde_noise_steps : int = 1, extra_options : str = "", **kwargs, ): options_mgr = OptionsManager(options, **kwargs) if denoise < 0: denoise_alt = -denoise denoise = 1.0 #if 'steps_to_run' in sampler.extra_options: # sampler.extra_options['steps_to_run'] = steps_to_run if 'positive' in latent_image and positive is None: positive = latent_image['positive'] if 'negative' in latent_image and negative is None: negative = latent_image['negative'] if 'sampler' in latent_image and sampler is None: sampler = latent_image['sampler'] if 'model' in latent_image and model is None: model = latent_image['model'] #if model.model.model_config.unet_config.get('stable_cascade_stage') == 'b': # if 'noise_type_sde' in sampler.extra_options: # noise_type_sde = "pyramid-cascade_B" # noise_type_sde_substep = "pyramid-cascade_B" output, denoised, sde_noise = SharkSampler().main( model = model, cfg = cfg, scheduler = scheduler, steps = steps, steps_to_run = steps_to_run, denoise = denoise, latent_image = latent_image, positive = positive, negative = negative, sampler = sampler, cfgpp = cfgpp, noise_seed = seed, options = options, sde_noise = sde_noise, sde_noise_steps = sde_noise_steps, noise_type_init = noise_type_init, noise_stdev = noise_stdev, sampler_mode = sampler_mode, denoise_alt = denoise_alt, sigmas = sigmas, extra_options = extra_options) return (output, denoised,options_mgr.as_dict()) class SharkChainsampler_Beta(SharkSampler_Beta): @classmethod def INPUT_TYPES(cls): return { "required": { "steps_to_run": ("INT", {"default": -1, "min": -1, "max": MAX_STEPS}), "cfg": ("FLOAT", {"default": 5.5, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Negative values use channelwise CFG." }), "sampler_mode": (['unsample', 'resample'], {"default": "resample"}), }, "optional": { "model": ("MODEL",), "positive": ("CONDITIONING", ), "negative": ("CONDITIONING", ), "sampler": ("SAMPLER", ), "sigmas": ("SIGMAS", ), "latent_image": ("LATENT", ), "options": ("OPTIONS", ), } } def main(self, model = None, steps_to_run = -1, cfg = 5.5, latent_image = None, sigmas = None, sampler_mode = "", seed : int = -1, **kwargs): steps = latent_image['state_info']['sigmas'].shape[-1] - 3 sigmas = latent_image['state_info']['sigmas'] if sigmas is None else sigmas if len(sigmas) > 2 and sigmas[1] < sigmas[2] and latent_image['state_info']['sampler_mode'] == "unsample" and sampler_mode == "resample": sigmas = torch.flip(sigmas, dims=[0]) return super().main(model=model, sampler_mode=sampler_mode, steps_to_run=steps_to_run, sigmas=sigmas, steps=steps, cfg=cfg, seed=seed, latent_image=latent_image, **kwargs) class ClownSamplerAdvanced_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_type_sde_substep": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "noise_mode_sde_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "overshoot_mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How step size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "overshoot_mode_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How substep size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "eta_substep": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "overshoot": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising step, then rescale to match the original. Has a softening effect."}), "overshoot_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising substep, then rescale to match the original. Has a softening effect."}), "noise_scaling_weight": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}), "noise_boost_step": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}), "noise_boost_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}), "noise_anchor": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Typically set to between 1.0 and 0.0. Lower values cerate a grittier, more detailed image."}), "s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}), "s_noise_substep": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}), "d_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Downscales the sigma schedule. Values around 0.98-0.95 can lead to a large boost in detail and paint textures."}), "momentum": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Accelerate convergence with positive values when sampling, negative values when unsampling."}), "noise_seed_sde": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}), "sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}), "implicit_type": (IMPLICIT_TYPE_NAMES, {"default": "predictor-corrector"}), "implicit_type_substeps": (IMPLICIT_TYPE_NAMES, {"default": "predictor-corrector"}), "implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}), "implicit_substeps": ("INT", {"default": 0, "min": 0, "max": 10000}), "bongmath": ("BOOLEAN", {"default": True}), }, "optional": { "guides": ("GUIDES", ), "automation": ("AUTOMATION", ), "extra_options": ("STRING", {"default": "", "multiline": True}), "options": ("OPTIONS", ), } } RETURN_TYPES = ("SAMPLER",) RETURN_NAMES = ("sampler", ) FUNCTION = "main" CATEGORY = "RES4LYF/samplers" EXPERIMENTAL = True def main(self, noise_type_sde : str = "gaussian", noise_type_sde_substep : str = "gaussian", noise_mode_sde : str = "hard", overshoot_mode : str = "hard", overshoot_mode_substep : str = "hard", eta : float = 0.5, eta_substep : float = 0.5, momentum : float = 0.0, noise_scaling_weight : float = 0.0, noise_scaling_type : str = "sampler", noise_scaling_mode : str = "linear", noise_scaling_eta : float = 0.0, noise_scaling_cycles : int = 1, noise_scaling_weights : Optional[Tensor] = None, noise_scaling_etas : Optional[Tensor] = None, noise_boost_step : float = 0.0, noise_boost_substep : float = 0.0, noise_boost_normalize : bool = True, noise_anchor : float = 1.0, s_noise : float = 1.0, s_noise_substep : float = 1.0, d_noise : float = 1.0, d_noise_start_step : int = 0, d_noise_inv : float = 1.0, d_noise_inv_start_step : int = 0, alpha_sde : float = -1.0, k_sde : float = 1.0, cfgpp : float = 0.0, c1 : float = 0.0, c2 : float = 0.5, c3 : float = 1.0, noise_seed_sde : int = -1, sampler_name : str = "res_2m", implicit_sampler_name : str = "gauss-legendre_2s", implicit_substeps : int = 0, implicit_steps : int = 0, rescale_floor : bool = True, sigmas_override : Optional[Tensor] = None, guides = None, options = None, sde_noise = None, sde_noise_steps : int = 1, extra_options : str = "", automation = None, etas : Optional[Tensor] = None, etas_substep : Optional[Tensor] = None, s_noises : Optional[Tensor] = None, s_noises_substep : Optional[Tensor] = None, epsilon_scales : Optional[Tensor] = None, regional_conditioning_weights : Optional[Tensor] = None, frame_weights_mgr = None, noise_mode_sde_substep : str = "hard", overshoot : float = 0.0, overshoot_substep : float = 0.0, bongmath : bool = True, implicit_type : str = "predictor-corrector", implicit_type_substeps : str = "predictor-corrector", rk_swap_step : int = MAX_STEPS, rk_swap_print : bool = False, rk_swap_threshold : float = 0.0, rk_swap_type : str = "", steps_to_run : int = -1, sde_mask : Optional[Tensor] = None, **kwargs, ): options_mgr = OptionsManager(options, **kwargs) extra_options += "\n" + options_mgr.get('extra_options', "") EO = ExtraOptions(extra_options) default_dtype = EO("default_dtype", torch.float64) sampler_name, implicit_sampler_name = process_sampler_name(sampler_name) implicit_steps_diag = implicit_substeps implicit_steps_full = implicit_steps if noise_mode_sde == "none": eta = 0.0 noise_mode_sde = "hard" noise_type_sde = options_mgr.get('noise_type_sde' , noise_type_sde) noise_mode_sde = options_mgr.get('noise_mode_sde' , noise_mode_sde) eta = options_mgr.get('eta' , eta) eta_substep = options_mgr.get('eta_substep' , eta_substep) noise_scaling_weight = options_mgr.get('noise_scaling_weight' , noise_scaling_weight) noise_scaling_type = options_mgr.get('noise_scaling_type' , noise_scaling_type) noise_scaling_mode = options_mgr.get('noise_scaling_mode' , noise_scaling_mode) noise_scaling_eta = options_mgr.get('noise_scaling_eta' , noise_scaling_eta) noise_scaling_cycles = options_mgr.get('noise_scaling_cycles' , noise_scaling_cycles) noise_scaling_weights = options_mgr.get('noise_scaling_weights' , noise_scaling_weights) noise_scaling_etas = options_mgr.get('noise_scaling_etas' , noise_scaling_etas) noise_boost_step = options_mgr.get('noise_boost_step' , noise_boost_step) noise_boost_substep = options_mgr.get('noise_boost_substep' , noise_boost_substep) noise_boost_normalize = options_mgr.get('noise_boost_normalize' , noise_boost_normalize) noise_anchor = options_mgr.get('noise_anchor' , noise_anchor) s_noise = options_mgr.get('s_noise' , s_noise) s_noise_substep = options_mgr.get('s_noise_substep' , s_noise_substep) d_noise = options_mgr.get('d_noise' , d_noise) d_noise_start_step = options_mgr.get('d_noise_start_step' , d_noise_start_step) d_noise_inv = options_mgr.get('d_noise_inv' , d_noise_inv) d_noise_inv_start_step = options_mgr.get('d_noise_inv_start_step', d_noise_inv_start_step) alpha_sde = options_mgr.get('alpha_sde' , alpha_sde) k_sde = options_mgr.get('k_sde' , k_sde) c1 = options_mgr.get('c1' , c1) c2 = options_mgr.get('c2' , c2) c3 = options_mgr.get('c3' , c3) frame_weights_mgr = options_mgr.get('frame_weights_mgr', frame_weights_mgr) sde_noise = options_mgr.get('sde_noise' , sde_noise) sde_noise_steps = options_mgr.get('sde_noise_steps' , sde_noise_steps) rk_swap_step = options_mgr.get('rk_swap_step' , rk_swap_step) rk_swap_print = options_mgr.get('rk_swap_print' , rk_swap_print) rk_swap_threshold = options_mgr.get('rk_swap_threshold', rk_swap_threshold) rk_swap_type = options_mgr.get('rk_swap_type' , rk_swap_type) steps_to_run = options_mgr.get('steps_to_run' , steps_to_run) noise_seed_sde = options_mgr.get('noise_seed_sde' , noise_seed_sde) momentum = options_mgr.get('momentum' , momentum) sde_mask = options_mgr.get('sde_mask' , sde_mask) rescale_floor = EO("rescale_floor") if automation is not None: etas = automation['etas'] if 'etas' in automation else None etas_substep = automation['etas_substep'] if 'etas_substep' in automation else None s_noises = automation['s_noises'] if 's_noises' in automation else None s_noises_substep = automation['s_noises_substep'] if 's_noises_substep' in automation else None epsilon_scales = automation['epsilon_scales'] if 'epsilon_scales' in automation else None frame_weights_mgr = automation['frame_weights_mgr'] if 'frame_weights_mgr' in automation else None etas = options_mgr.get('etas', etas) etas_substep = options_mgr.get('etas_substep', etas_substep) s_noises = options_mgr.get('s_noises', s_noises) s_noises_substep = options_mgr.get('s_noises_substep', s_noises_substep) etas = initialize_or_scale(etas, eta, MAX_STEPS).to(default_dtype) etas_substep = initialize_or_scale(etas_substep, eta_substep, MAX_STEPS).to(default_dtype) s_noises = initialize_or_scale(s_noises, s_noise, MAX_STEPS).to(default_dtype) s_noises_substep = initialize_or_scale(s_noises_substep, s_noise_substep, MAX_STEPS).to(default_dtype) etas = F.pad(etas, (0, MAX_STEPS), value=0.0) etas_substep = F.pad(etas_substep, (0, MAX_STEPS), value=0.0) s_noises = F.pad(s_noises, (0, MAX_STEPS), value=1.0) s_noises_substep = F.pad(s_noises_substep, (0, MAX_STEPS), value=1.0) if sde_noise is None: sde_noise = [] else: sde_noise = copy.deepcopy(sde_noise) sde_noise = normalize_zscore(sde_noise, channelwise=True, inplace=True) sampler = comfy.samplers.ksampler("rk_beta", { "eta" : eta, "eta_substep" : eta_substep, "alpha" : alpha_sde, "k" : k_sde, "c1" : c1, "c2" : c2, "c3" : c3, "cfgpp" : cfgpp, "noise_sampler_type" : noise_type_sde, "noise_sampler_type_substep" : noise_type_sde_substep, "noise_mode_sde" : noise_mode_sde, "noise_seed" : noise_seed_sde, "rk_type" : sampler_name, "implicit_sampler_name" : implicit_sampler_name, "implicit_steps_diag" : implicit_steps_diag, "implicit_steps_full" : implicit_steps_full, "LGW_MASK_RESCALE_MIN" : rescale_floor, "sigmas_override" : sigmas_override, "sde_noise" : sde_noise, "extra_options" : extra_options, "sampler_mode" : "standard", "etas" : etas, "etas_substep" : etas_substep, "s_noises" : s_noises, "s_noises_substep" : s_noises_substep, "epsilon_scales" : epsilon_scales, "regional_conditioning_weights" : regional_conditioning_weights, "guides" : guides, "frame_weights_mgr" : frame_weights_mgr, "eta_substep" : eta_substep, "noise_mode_sde_substep" : noise_mode_sde_substep, "noise_scaling_weight" : noise_scaling_weight, "noise_scaling_type" : noise_scaling_type, "noise_scaling_mode" : noise_scaling_mode, "noise_scaling_eta" : noise_scaling_eta, "noise_scaling_cycles" : noise_scaling_cycles, "noise_scaling_weights" : noise_scaling_weights, "noise_scaling_etas" : noise_scaling_etas, "noise_boost_step" : noise_boost_step, "noise_boost_substep" : noise_boost_substep, "noise_boost_normalize" : noise_boost_normalize, "noise_anchor" : noise_anchor, "s_noise" : s_noise, "s_noise_substep" : s_noise_substep, "d_noise" : d_noise, "d_noise_start_step" : d_noise_start_step, "d_noise_inv" : d_noise_inv, "d_noise_inv_start_step" : d_noise_inv_start_step, "overshoot_mode" : overshoot_mode, "overshoot_mode_substep" : overshoot_mode_substep, "overshoot" : overshoot, "overshoot_substep" : overshoot_substep, "BONGMATH" : bongmath, "implicit_type" : implicit_type, "implicit_type_substeps" : implicit_type_substeps, "rk_swap_step" : rk_swap_step, "rk_swap_print" : rk_swap_print, "rk_swap_threshold" : rk_swap_threshold, "rk_swap_type" : rk_swap_type, "steps_to_run" : steps_to_run, "sde_mask" : sde_mask, "momentum" : momentum, }) return (sampler, ) class ClownsharKSampler_Beta: @classmethod def INPUT_TYPES(cls): inputs = {"required": { "eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "sampler_name": (get_sampler_name_list (), {"default": get_default_sampler_name()}), "scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},), "steps": ("INT", {"default": 30, "min": 1, "max": MAX_STEPS}), "steps_to_run": ("INT", {"default": -1, "min": -1, "max": MAX_STEPS}), "denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": MAX_STEPS, "step":0.01}), "cfg": ("FLOAT", {"default": 5.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, }), "seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}), "sampler_mode": (['unsample', 'standard', 'resample'], {"default": "standard"}), "bongmath": ("BOOLEAN", {"default": True}), }, "optional": { "model": ("MODEL",), "positive": ("CONDITIONING",), "negative": ("CONDITIONING",), "latent_image": ("LATENT",), "sigmas": ("SIGMAS",), "guides": ("GUIDES",), "options": ("OPTIONS", {}), } } return inputs RETURN_TYPES = ("LATENT", "LATENT", "OPTIONS", ) RETURN_NAMES = ("output", "denoised", "options", ) FUNCTION = "main" CATEGORY = "RES4LYF/samplers" def main(self, model = None, denoise : float = 1.0, scheduler : str = "beta57", cfg : float = 1.0, seed : int = -1, positive = None, negative = None, latent_image : Optional[dict[Tensor]] = None, steps : int = 30, steps_to_run : int = -1, bongmath : bool = True, sampler_mode : str = "standard", noise_type_sde : str = "gaussian", noise_type_sde_substep : str = "gaussian", noise_mode_sde : str = "hard", noise_mode_sde_substep : str = "hard", overshoot_mode : str = "hard", overshoot_mode_substep : str = "hard", overshoot : float = 0.0, overshoot_substep : float = 0.0, eta : float = 0.5, eta_substep : float = 0.5, momentum : float = 0.0, noise_scaling_weight : float = 0.0, noise_scaling_type : str = "sampler", noise_scaling_mode : str = "linear", noise_scaling_eta : float = 0.0, noise_scaling_cycles : int = 1, noise_scaling_weights : Optional[Tensor] = None, noise_scaling_etas : Optional[Tensor] = None, noise_boost_step : float = 0.0, noise_boost_substep : float = 0.0, noise_boost_normalize : bool = True, noise_anchor : float = 1.0, s_noise : float = 1.0, s_noise_substep : float = 1.0, d_noise : float = 1.0, d_noise_start_step : int = 0, d_noise_inv : float = 1.0, d_noise_inv_start_step : int = 0, alpha_sde : float = -1.0, k_sde : float = 1.0, cfgpp : float = 0.0, c1 : float = 0.0, c2 : float = 0.5, c3 : float = 1.0, noise_seed_sde : int = -1, sampler_name : str = "res_2m", implicit_sampler_name : str = "use_explicit", implicit_type : str = "bongmath", implicit_type_substeps : str = "bongmath", implicit_steps : int = 0, implicit_substeps : int = 0, sigmas : Optional[Tensor] = None, sigmas_override : Optional[Tensor] = None, guides = None, options = None, sde_noise = None, sde_noise_steps : int = 1, extra_options : str = "", automation = None, epsilon_scales : Optional[Tensor] = None, regional_conditioning_weights : Optional[Tensor] = None, frame_weights_mgr = None, rescale_floor : bool = True, rk_swap_step : int = MAX_STEPS, rk_swap_print : bool = False, rk_swap_threshold : float = 0.0, rk_swap_type : str = "", sde_mask : Optional[Tensor] = None, #start_at_step : int = 0, #stop_at_step : int = MAX_STEPS, **kwargs ): options_mgr = OptionsManager(options, **kwargs) extra_options += "\n" + options_mgr.get('extra_options', "") #if model is None: # model = latent_image['model'] # defaults for ClownSampler eta_substep = eta # defaults for SharkSampler noise_type_init = "gaussian" noise_stdev = 1.0 denoise_alt = 1.0 channelwise_cfg = False if denoise < 0: denoise_alt = -denoise denoise = 1.0 is_chained = False if latent_image is not None and 'positive' in latent_image and positive is None: positive = latent_image['positive'] is_chained = True if latent_image is not None and 'negative' in latent_image and negative is None: negative = latent_image['negative'] is_chained = True if latent_image is not None and 'model' in latent_image and model is None: model = latent_image['model'] is_chained = True guider = options_mgr.get('guider', None) if is_chained is False and guider is not None: model = guider.model_patcher if model.model.model_config.unet_config.get('stable_cascade_stage') == 'b': noise_type_sde = "pyramid-cascade_B" noise_type_sde_substep = "pyramid-cascade_B" #if options is not None: #options_mgr = OptionsManager(options_inputs) noise_seed_sde = options_mgr.get('noise_seed_sde' , noise_seed_sde) noise_type_sde = options_mgr.get('noise_type_sde' , noise_type_sde) noise_type_sde_substep = options_mgr.get('noise_type_sde_substep', noise_type_sde_substep) options_mgr.update('noise_type_sde', noise_type_sde) options_mgr.update('noise_type_sde_substep', noise_type_sde_substep) noise_mode_sde = options_mgr.get('noise_mode_sde' , noise_mode_sde) noise_mode_sde_substep = options_mgr.get('noise_mode_sde_substep', noise_mode_sde_substep) overshoot_mode = options_mgr.get('overshoot_mode' , overshoot_mode) overshoot_mode_substep = options_mgr.get('overshoot_mode_substep', overshoot_mode_substep) eta = options_mgr.get('eta' , eta) eta_substep = options_mgr.get('eta_substep' , eta_substep) options_mgr.update('eta', eta) options_mgr.update('eta_substep', eta_substep) overshoot = options_mgr.get('overshoot' , overshoot) overshoot_substep = options_mgr.get('overshoot_substep' , overshoot_substep) noise_scaling_weight = options_mgr.get('noise_scaling_weight' , noise_scaling_weight) noise_scaling_type = options_mgr.get('noise_scaling_type' , noise_scaling_type) noise_scaling_mode = options_mgr.get('noise_scaling_mode' , noise_scaling_mode) noise_scaling_eta = options_mgr.get('noise_scaling_eta' , noise_scaling_eta) noise_scaling_cycles = options_mgr.get('noise_scaling_cycles' , noise_scaling_cycles) noise_scaling_weights = options_mgr.get('noise_scaling_weights' , noise_scaling_weights) noise_scaling_etas = options_mgr.get('noise_scaling_etas' , noise_scaling_etas) noise_boost_step = options_mgr.get('noise_boost_step' , noise_boost_step) noise_boost_substep = options_mgr.get('noise_boost_substep' , noise_boost_substep) noise_boost_normalize = options_mgr.get('noise_boost_normalize' , noise_boost_normalize) noise_anchor = options_mgr.get('noise_anchor' , noise_anchor) s_noise = options_mgr.get('s_noise' , s_noise) s_noise_substep = options_mgr.get('s_noise_substep' , s_noise_substep) d_noise = options_mgr.get('d_noise' , d_noise) d_noise_start_step = options_mgr.get('d_noise_start_step' , d_noise_start_step) d_noise_inv = options_mgr.get('d_noise_inv' , d_noise_inv) d_noise_inv_start_step = options_mgr.get('d_noise_inv_start_step', d_noise_inv_start_step) momentum = options_mgr.get('momentum' , momentum) implicit_type = options_mgr.get('implicit_type' , implicit_type) implicit_type_substeps = options_mgr.get('implicit_type_substeps', implicit_type_substeps) implicit_steps = options_mgr.get('implicit_steps' , implicit_steps) implicit_substeps = options_mgr.get('implicit_substeps' , implicit_substeps) alpha_sde = options_mgr.get('alpha_sde' , alpha_sde) k_sde = options_mgr.get('k_sde' , k_sde) c1 = options_mgr.get('c1' , c1) c2 = options_mgr.get('c2' , c2) c3 = options_mgr.get('c3' , c3) frame_weights_mgr = options_mgr.get('frame_weights_mgr' , frame_weights_mgr) sde_noise = options_mgr.get('sde_noise' , sde_noise) sde_noise_steps = options_mgr.get('sde_noise_steps' , sde_noise_steps) extra_options = options_mgr.get('extra_options' , extra_options) automation = options_mgr.get('automation' , automation) # SharkSampler Options noise_type_init = options_mgr.get('noise_type_init' , noise_type_init) noise_stdev = options_mgr.get('noise_stdev' , noise_stdev) sampler_mode = options_mgr.get('sampler_mode' , sampler_mode) denoise_alt = options_mgr.get('denoise_alt' , denoise_alt) channelwise_cfg = options_mgr.get('channelwise_cfg' , channelwise_cfg) options_mgr.update('noise_type_init', noise_type_init) options_mgr.update('noise_stdev', noise_stdev) options_mgr.update('denoise_alt', denoise_alt) #options_mgr.update('channelwise_cfg', channelwise_cfg) sigmas = options_mgr.get('sigmas' , sigmas) rk_swap_type = options_mgr.get('rk_swap_type' , rk_swap_type) rk_swap_step = options_mgr.get('rk_swap_step' , rk_swap_step) rk_swap_threshold = options_mgr.get('rk_swap_threshold' , rk_swap_threshold) rk_swap_print = options_mgr.get('rk_swap_print' , rk_swap_print) sde_mask = options_mgr.get('sde_mask' , sde_mask) #start_at_step = options_mgr.get('start_at_step' , start_at_step) #stop_at_ste = options_mgr.get('stop_at_step' , stop_at_step) if channelwise_cfg: # != 1.0: cfg = -abs(cfg) # set cfg negative for shark, to flag as cfg_cw sampler, = ClownSamplerAdvanced_Beta().main( noise_type_sde = noise_type_sde, noise_type_sde_substep = noise_type_sde_substep, noise_mode_sde = noise_mode_sde, noise_mode_sde_substep = noise_mode_sde_substep, eta = eta, eta_substep = eta_substep, overshoot = overshoot, overshoot_substep = overshoot_substep, overshoot_mode = overshoot_mode, overshoot_mode_substep = overshoot_mode_substep, momentum = momentum, alpha_sde = alpha_sde, k_sde = k_sde, cfgpp = cfgpp, c1 = c1, c2 = c2, c3 = c3, sampler_name = sampler_name, implicit_sampler_name = implicit_sampler_name, implicit_type = implicit_type, implicit_type_substeps = implicit_type_substeps, implicit_steps = implicit_steps, implicit_substeps = implicit_substeps, rescale_floor = rescale_floor, sigmas_override = sigmas_override, noise_seed_sde = noise_seed_sde, guides = guides, options = options_mgr.as_dict(), extra_options = extra_options, automation = automation, noise_scaling_weight = noise_scaling_weight, noise_scaling_type = noise_scaling_type, noise_scaling_mode = noise_scaling_mode, noise_scaling_eta = noise_scaling_eta, noise_scaling_cycles = noise_scaling_cycles, noise_scaling_weights = noise_scaling_weights, noise_scaling_etas = noise_scaling_etas, noise_boost_step = noise_boost_step, noise_boost_substep = noise_boost_substep, noise_boost_normalize = noise_boost_normalize, noise_anchor = noise_anchor, s_noise = s_noise, s_noise_substep = s_noise_substep, d_noise = d_noise, d_noise_start_step = d_noise_start_step, d_noise_inv = d_noise_inv, d_noise_inv_start_step = d_noise_inv_start_step, epsilon_scales = epsilon_scales, regional_conditioning_weights = regional_conditioning_weights, frame_weights_mgr = frame_weights_mgr, sde_noise = sde_noise, sde_noise_steps = sde_noise_steps, rk_swap_step = rk_swap_step, rk_swap_print = rk_swap_print, rk_swap_threshold = rk_swap_threshold, rk_swap_type = rk_swap_type, steps_to_run = steps_to_run, sde_mask = sde_mask, bongmath = bongmath, ) output, denoised, sde_noise = SharkSampler().main( model = model, cfg = cfg, scheduler = scheduler, steps = steps, steps_to_run = steps_to_run, denoise = denoise, latent_image = latent_image, positive = positive, negative = negative, sampler = sampler, cfgpp = cfgpp, noise_seed = seed, options = options_mgr.as_dict(), sde_noise = sde_noise, sde_noise_steps = sde_noise_steps, noise_type_init = noise_type_init, noise_stdev = noise_stdev, sampler_mode = sampler_mode, denoise_alt = denoise_alt, sigmas = sigmas, extra_options = extra_options) return (output, denoised, options_mgr.as_dict(),) # {'model':model,},) class ClownsharkChainsampler_Beta(ClownsharKSampler_Beta): @classmethod def INPUT_TYPES(cls): return { "required": { "eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}), "steps_to_run": ("INT", {"default": -1, "min": -1, "max": MAX_STEPS}), "cfg": ("FLOAT", {"default": 5.5, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Negative values use channelwise CFG." }), "sampler_mode": (['unsample', 'resample'],{"default": "resample"}), "bongmath": ("BOOLEAN", {"default": True}), }, "optional": { "model": ("MODEL",), "positive": ("CONDITIONING", ), "negative": ("CONDITIONING", ), #"sampler": ("SAMPLER", ), "sigmas": ("SIGMAS", ), "latent_image": ("LATENT", ), "guides": ("GUIDES", ), "options": ("OPTIONS", ), } } def main(self, eta = 0.5, sampler_name = "res_2m", steps_to_run = -1, cfg = 5.5, bongmath = True, seed : int = -1, latent_image = None, sigmas = None, sampler_mode = "", **kwargs): steps = latent_image['state_info']['sigmas'].shape[-1] - 3 sigmas = latent_image['state_info']['sigmas'] if sigmas is None else sigmas if len(sigmas) > 2 and sigmas[1] < sigmas[2] and latent_image['state_info']['sampler_mode'] == "unsample" and sampler_mode == "resample": sigmas = torch.flip(sigmas, dims=[0]) return super().main(eta=eta, sampler_name=sampler_name, sampler_mode=sampler_mode, sigmas=sigmas, steps_to_run=steps_to_run, steps=steps, cfg=cfg, bongmath=bongmath, seed=seed, latent_image=latent_image, **kwargs) class ClownSampler_Beta: @classmethod def INPUT_TYPES(cls): inputs = {"required": { "eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "sampler_name": (get_sampler_name_list (), {"default": get_default_sampler_name()}), "seed": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}), "bongmath": ("BOOLEAN", {"default": True}), }, "optional": { "guides": ("GUIDES",), "options": ("OPTIONS", {}), } } return inputs RETURN_TYPES = ("SAMPLER",) RETURN_NAMES = ("sampler",) FUNCTION = "main" CATEGORY = "RES4LYF/samplers" def main(self, model = None, denoise : float = 1.0, scheduler : str = "beta57", cfg : float = 1.0, seed : int = -1, positive = None, negative = None, latent_image : Optional[dict[Tensor]] = None, steps : int = 30, steps_to_run : int = -1, bongmath : bool = True, sampler_mode : str = "standard", noise_type_sde : str = "gaussian", noise_type_sde_substep : str = "gaussian", noise_mode_sde : str = "hard", noise_mode_sde_substep : str = "hard", overshoot_mode : str = "hard", overshoot_mode_substep : str = "hard", overshoot : float = 0.0, overshoot_substep : float = 0.0, eta : float = 0.5, eta_substep : float = 0.5, noise_scaling_weight : float = 0.0, noise_boost_step : float = 0.0, noise_boost_substep : float = 0.0, noise_anchor : float = 1.0, s_noise : float = 1.0, s_noise_substep : float = 1.0, d_noise : float = 1.0, d_noise_start_step : int = 0, d_noise_inv : float = 1.0, d_noise_inv_start_step : int = 0, alpha_sde : float = -1.0, k_sde : float = 1.0, cfgpp : float = 0.0, c1 : float = 0.0, c2 : float = 0.5, c3 : float = 1.0, noise_seed_sde : int = -1, sampler_name : str = "res_2m", implicit_sampler_name : str = "use_explicit", implicit_type : str = "bongmath", implicit_type_substeps : str = "bongmath", implicit_steps : int = 0, implicit_substeps : int = 0, sigmas : Optional[Tensor] = None, sigmas_override : Optional[Tensor] = None, guides = None, options = None, sde_noise = None, sde_noise_steps : int = 1, extra_options : str = "", automation = None, epsilon_scales : Optional[Tensor] = None, regional_conditioning_weights : Optional[Tensor] = None, frame_weights_mgr = None, rescale_floor : bool = True, rk_swap_step : int = MAX_STEPS, rk_swap_print : bool = False, rk_swap_threshold : float = 0.0, rk_swap_type : str = "", sde_mask : Optional[Tensor] = None, #start_at_step : int = 0, #stop_at_step : int = MAX_STEPS, **kwargs ): options_mgr = OptionsManager(options, **kwargs) extra_options += "\n" + options_mgr.get('extra_options', "") # defaults for ClownSampler eta_substep = eta # defaults for SharkSampler noise_type_init = "gaussian" noise_stdev = 1.0 denoise_alt = 1.0 channelwise_cfg = False #1.0 #if options is not None: #options_mgr = OptionsManager(options_inputs) noise_type_sde = options_mgr.get('noise_type_sde' , noise_type_sde) noise_type_sde_substep = options_mgr.get('noise_type_sde_substep', noise_type_sde_substep) noise_mode_sde = options_mgr.get('noise_mode_sde' , noise_mode_sde) noise_mode_sde_substep = options_mgr.get('noise_mode_sde_substep', noise_mode_sde_substep) overshoot_mode = options_mgr.get('overshoot_mode' , overshoot_mode) overshoot_mode_substep = options_mgr.get('overshoot_mode_substep', overshoot_mode_substep) eta = options_mgr.get('eta' , eta) eta_substep = options_mgr.get('eta_substep' , eta_substep) overshoot = options_mgr.get('overshoot' , overshoot) overshoot_substep = options_mgr.get('overshoot_substep' , overshoot_substep) noise_scaling_weight = options_mgr.get('noise_scaling_weight' , noise_scaling_weight) noise_boost_step = options_mgr.get('noise_boost_step' , noise_boost_step) noise_boost_substep = options_mgr.get('noise_boost_substep' , noise_boost_substep) noise_anchor = options_mgr.get('noise_anchor' , noise_anchor) s_noise = options_mgr.get('s_noise' , s_noise) s_noise_substep = options_mgr.get('s_noise_substep' , s_noise_substep) d_noise = options_mgr.get('d_noise' , d_noise) d_noise_start_step = options_mgr.get('d_noise_start_step' , d_noise_start_step) d_noise_inv = options_mgr.get('d_noise_inv' , d_noise_inv) d_noise_inv_start_step = options_mgr.get('d_noise_inv_start_step', d_noise_inv_start_step) implicit_type = options_mgr.get('implicit_type' , implicit_type) implicit_type_substeps = options_mgr.get('implicit_type_substeps', implicit_type_substeps) implicit_steps = options_mgr.get('implicit_steps' , implicit_steps) implicit_substeps = options_mgr.get('implicit_substeps' , implicit_substeps) alpha_sde = options_mgr.get('alpha_sde' , alpha_sde) k_sde = options_mgr.get('k_sde' , k_sde) c1 = options_mgr.get('c1' , c1) c2 = options_mgr.get('c2' , c2) c3 = options_mgr.get('c3' , c3) frame_weights_mgr = options_mgr.get('frame_weights_mgr' , frame_weights_mgr) sde_noise = options_mgr.get('sde_noise' , sde_noise) sde_noise_steps = options_mgr.get('sde_noise_steps' , sde_noise_steps) extra_options = options_mgr.get('extra_options' , extra_options) automation = options_mgr.get('automation' , automation) # SharkSampler Options noise_type_init = options_mgr.get('noise_type_init' , noise_type_init) noise_stdev = options_mgr.get('noise_stdev' , noise_stdev) sampler_mode = options_mgr.get('sampler_mode' , sampler_mode) denoise_alt = options_mgr.get('denoise_alt' , denoise_alt) channelwise_cfg = options_mgr.get('channelwise_cfg' , channelwise_cfg) sigmas = options_mgr.get('sigmas' , sigmas) rk_swap_type = options_mgr.get('rk_swap_type' , rk_swap_type) rk_swap_step = options_mgr.get('rk_swap_step' , rk_swap_step) rk_swap_threshold = options_mgr.get('rk_swap_threshold' , rk_swap_threshold) rk_swap_print = options_mgr.get('rk_swap_print' , rk_swap_print) sde_mask = options_mgr.get('sde_mask' , sde_mask) #start_at_step = options_mgr.get('start_at_step' , start_at_step) #stop_at_ste = options_mgr.get('stop_at_step' , stop_at_step) if channelwise_cfg: # != 1.0: cfg = -abs(cfg) # set cfg negative for shark, to flag as cfg_cw noise_seed_sde = seed sampler, = ClownSamplerAdvanced_Beta().main( noise_type_sde = noise_type_sde, noise_type_sde_substep = noise_type_sde_substep, noise_mode_sde = noise_mode_sde, noise_mode_sde_substep = noise_mode_sde_substep, eta = eta, eta_substep = eta_substep, s_noise = s_noise, s_noise_substep = s_noise_substep, overshoot = overshoot, overshoot_substep = overshoot_substep, overshoot_mode = overshoot_mode, overshoot_mode_substep = overshoot_mode_substep, d_noise = d_noise, d_noise_start_step = d_noise_start_step, d_noise_inv = d_noise_inv, d_noise_inv_start_step = d_noise_inv_start_step, alpha_sde = alpha_sde, k_sde = k_sde, cfgpp = cfgpp, c1 = c1, c2 = c2, c3 = c3, sampler_name = sampler_name, implicit_sampler_name = implicit_sampler_name, implicit_type = implicit_type, implicit_type_substeps = implicit_type_substeps, implicit_steps = implicit_steps, implicit_substeps = implicit_substeps, rescale_floor = rescale_floor, sigmas_override = sigmas_override, noise_seed_sde = noise_seed_sde, guides = guides, options = options_mgr.as_dict(), extra_options = extra_options, automation = automation, noise_scaling_weight = noise_scaling_weight, noise_boost_step = noise_boost_step, noise_boost_substep = noise_boost_substep, epsilon_scales = epsilon_scales, regional_conditioning_weights = regional_conditioning_weights, frame_weights_mgr = frame_weights_mgr, sde_noise = sde_noise, sde_noise_steps = sde_noise_steps, rk_swap_step = rk_swap_step, rk_swap_print = rk_swap_print, rk_swap_threshold = rk_swap_threshold, rk_swap_type = rk_swap_type, steps_to_run = steps_to_run, sde_mask = sde_mask, bongmath = bongmath, ) return (sampler,) class BongSampler: @classmethod def INPUT_TYPES(cls): inputs = {"required": { "model": ("MODEL",), "seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}), "steps": ("INT", {"default": 30, "min": 1, "max": MAX_STEPS}), "cfg": ("FLOAT", {"default": 5.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, }), "sampler_name": (["res_2m", "res_3m", "res_2s", "res_3s","res_2m_sde", "res_3m_sde", "res_2s_sde", "res_3s_sde"], {"default": "res_2s_sde"}), "scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},), "denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": MAX_STEPS, "step":0.01}), }, "optional": { "positive": ("CONDITIONING",), "negative": ("CONDITIONING",), "latent_image": ("LATENT",), } } return inputs RETURN_TYPES = ("LATENT", ) RETURN_NAMES = ("output", ) FUNCTION = "main" CATEGORY = "RES4LYF/samplers" def main(self, model = None, denoise : float = 1.0, scheduler : str = "beta57", cfg : float = 1.0, seed : int = 42, positive = None, negative = None, latent_image : Optional[dict[Tensor]] = None, steps : int = 30, steps_to_run : int = -1, bongmath : bool = True, sampler_mode : str = "standard", noise_type_sde : str = "brownian", noise_type_sde_substep : str = "brownian", noise_mode_sde : str = "hard", noise_mode_sde_substep : str = "hard", overshoot_mode : str = "hard", overshoot_mode_substep : str = "hard", overshoot : float = 0.0, overshoot_substep : float = 0.0, eta : float = 0.5, eta_substep : float = 0.5, d_noise : float = 1.0, s_noise : float = 1.0, s_noise_substep : float = 1.0, alpha_sde : float = -1.0, k_sde : float = 1.0, cfgpp : float = 0.0, c1 : float = 0.0, c2 : float = 0.5, c3 : float = 1.0, noise_seed_sde : int = -1, sampler_name : str = "res_2m", implicit_sampler_name : str = "use_explicit", implicit_type : str = "bongmath", implicit_type_substeps : str = "bongmath", implicit_steps : int = 0, implicit_substeps : int = 0, sigmas : Optional[Tensor] = None, sigmas_override : Optional[Tensor] = None, guides = None, options = None, sde_noise = None, sde_noise_steps : int = 1, extra_options : str = "", automation = None, epsilon_scales : Optional[Tensor] = None, regional_conditioning_weights : Optional[Tensor] = None, frame_weights_mgr = None, noise_scaling_weight : float = 0.0, noise_boost_step : float = 0.0, noise_boost_substep : float = 0.0, noise_anchor : float = 1.0, rescale_floor : bool = True, rk_swap_step : int = MAX_STEPS, rk_swap_print : bool = False, rk_swap_threshold : float = 0.0, rk_swap_type : str = "", #start_at_step : int = 0, #stop_at_step : int = MAX_STEPS, **kwargs ): options_mgr = OptionsManager(options, **kwargs) extra_options += "\n" + options_mgr.get('extra_options', "") if model.model.model_config.unet_config.get('stable_cascade_stage') == 'b': noise_type_sde = "pyramid-cascade_B" noise_type_sde_substep = "pyramid-cascade_B" if sampler_name.endswith("_sde"): sampler_name = sampler_name[:-4] eta = 0.5 else: eta = 0.0 # defaults for ClownSampler eta_substep = eta # defaults for SharkSampler noise_type_init = "gaussian" noise_stdev = 1.0 denoise_alt = 1.0 channelwise_cfg = False #1.0 #if options is not None: #options_mgr = OptionsManager(options_inputs) noise_type_sde = options_mgr.get('noise_type_sde' , noise_type_sde) noise_type_sde_substep = options_mgr.get('noise_type_sde_substep', noise_type_sde_substep) noise_mode_sde = options_mgr.get('noise_mode_sde' , noise_mode_sde) noise_mode_sde_substep = options_mgr.get('noise_mode_sde_substep', noise_mode_sde_substep) overshoot_mode = options_mgr.get('overshoot_mode' , overshoot_mode) overshoot_mode_substep = options_mgr.get('overshoot_mode_substep', overshoot_mode_substep) eta = options_mgr.get('eta' , eta) eta_substep = options_mgr.get('eta_substep' , eta_substep) overshoot = options_mgr.get('overshoot' , overshoot) overshoot_substep = options_mgr.get('overshoot_substep' , overshoot_substep) noise_scaling_weight = options_mgr.get('noise_scaling_weight' , noise_scaling_weight) noise_boost_step = options_mgr.get('noise_boost_step' , noise_boost_step) noise_boost_substep = options_mgr.get('noise_boost_substep' , noise_boost_substep) noise_anchor = options_mgr.get('noise_anchor' , noise_anchor) s_noise = options_mgr.get('s_noise' , s_noise) s_noise_substep = options_mgr.get('s_noise_substep' , s_noise_substep) d_noise = options_mgr.get('d_noise' , d_noise) implicit_type = options_mgr.get('implicit_type' , implicit_type) implicit_type_substeps = options_mgr.get('implicit_type_substeps', implicit_type_substeps) implicit_steps = options_mgr.get('implicit_steps' , implicit_steps) implicit_substeps = options_mgr.get('implicit_substeps' , implicit_substeps) alpha_sde = options_mgr.get('alpha_sde' , alpha_sde) k_sde = options_mgr.get('k_sde' , k_sde) c1 = options_mgr.get('c1' , c1) c2 = options_mgr.get('c2' , c2) c3 = options_mgr.get('c3' , c3) frame_weights_mgr = options_mgr.get('frame_weights_mgr' , frame_weights_mgr) sde_noise = options_mgr.get('sde_noise' , sde_noise) sde_noise_steps = options_mgr.get('sde_noise_steps' , sde_noise_steps) extra_options = options_mgr.get('extra_options' , extra_options) automation = options_mgr.get('automation' , automation) # SharkSampler Options noise_type_init = options_mgr.get('noise_type_init' , noise_type_init) noise_stdev = options_mgr.get('noise_stdev' , noise_stdev) sampler_mode = options_mgr.get('sampler_mode' , sampler_mode) denoise_alt = options_mgr.get('denoise_alt' , denoise_alt) channelwise_cfg = options_mgr.get('channelwise_cfg' , channelwise_cfg) sigmas = options_mgr.get('sigmas' , sigmas) rk_swap_type = options_mgr.get('rk_swap_type' , rk_swap_type) rk_swap_step = options_mgr.get('rk_swap_step' , rk_swap_step) rk_swap_threshold = options_mgr.get('rk_swap_threshold' , rk_swap_threshold) rk_swap_print = options_mgr.get('rk_swap_print' , rk_swap_print) #start_at_step = options_mgr.get('start_at_step' , start_at_step) #stop_at_ste = options_mgr.get('stop_at_step' , stop_at_step) if channelwise_cfg: # != 1.0: cfg = -abs(cfg) # set cfg negative for shark, to flag as cfg_cw sampler, = ClownSamplerAdvanced_Beta().main( noise_type_sde = noise_type_sde, noise_type_sde_substep = noise_type_sde_substep, noise_mode_sde = noise_mode_sde, noise_mode_sde_substep = noise_mode_sde_substep, eta = eta, eta_substep = eta_substep, s_noise = s_noise, s_noise_substep = s_noise_substep, overshoot = overshoot, overshoot_substep = overshoot_substep, overshoot_mode = overshoot_mode, overshoot_mode_substep = overshoot_mode_substep, d_noise = d_noise, #d_noise_start_step = d_noise_start_step, #d_noise_inv = d_noise_inv, #d_noise_inv_start_step = d_noise_inv_start_step, alpha_sde = alpha_sde, k_sde = k_sde, cfgpp = cfgpp, c1 = c1, c2 = c2, c3 = c3, sampler_name = sampler_name, implicit_sampler_name = implicit_sampler_name, implicit_type = implicit_type, implicit_type_substeps = implicit_type_substeps, implicit_steps = implicit_steps, implicit_substeps = implicit_substeps, rescale_floor = rescale_floor, sigmas_override = sigmas_override, noise_seed_sde = noise_seed_sde, guides = guides, options = options_mgr.as_dict(), extra_options = extra_options, automation = automation, noise_scaling_weight = noise_scaling_weight, noise_boost_step = noise_boost_step, noise_boost_substep = noise_boost_substep, epsilon_scales = epsilon_scales, regional_conditioning_weights = regional_conditioning_weights, frame_weights_mgr = frame_weights_mgr, sde_noise = sde_noise, sde_noise_steps = sde_noise_steps, rk_swap_step = rk_swap_step, rk_swap_print = rk_swap_print, rk_swap_threshold = rk_swap_threshold, rk_swap_type = rk_swap_type, steps_to_run = steps_to_run, bongmath = bongmath, ) output, denoised, sde_noise = SharkSampler().main( model = model, cfg = cfg, scheduler = scheduler, steps = steps, steps_to_run = steps_to_run, denoise = denoise, latent_image = latent_image, positive = positive, negative = negative, sampler = sampler, cfgpp = cfgpp, noise_seed = seed, options = options_mgr.as_dict(), sde_noise = sde_noise, sde_noise_steps = sde_noise_steps, noise_type_init = noise_type_init, noise_stdev = noise_stdev, sampler_mode = sampler_mode, denoise_alt = denoise_alt, sigmas = sigmas, extra_options = extra_options) return (output, ) ================================================ FILE: beta/samplers_extensions.py ================================================ import torch from torch import Tensor import torch.nn.functional as F from dataclasses import dataclass, asdict from typing import Optional, Callable, Tuple, Dict, Any, Union import copy from nodes import MAX_RESOLUTION from ..latents import get_edge_mask from ..helper import OptionsManager, FrameWeightsManager, initialize_or_scale, get_res4lyf_scheduler_list, parse_range_string, parse_tile_sizes, parse_range_string_int from .rk_coefficients_beta import RK_SAMPLER_NAMES_BETA_FOLDERS, get_default_sampler_name, get_sampler_name_list, process_sampler_name from .noise_classes import NOISE_GENERATOR_NAMES_SIMPLE from .rk_noise_sampler_beta import NOISE_MODE_NAMES from .constants import IMPLICIT_TYPE_NAMES, GUIDE_MODE_NAMES_BETA_SIMPLE, MAX_STEPS, FRAME_WEIGHTS_CONFIG_NAMES, FRAME_WEIGHTS_DYNAMICS_NAMES, FRAME_WEIGHTS_SCHEDULE_NAMES class ClownSamplerSelector_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}), }, "optional": { } } RETURN_TYPES = (RK_SAMPLER_NAMES_BETA_FOLDERS,) RETURN_NAMES = ("sampler_name",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, sampler_name = "res_2m", ): sampler_name, implicit_sampler_name = process_sampler_name(sampler_name) sampler_name = sampler_name if implicit_sampler_name == "use_explicit" else implicit_sampler_name return (sampler_name,) class ClownOptions_SDE_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_type_sde_substep": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "noise_mode_sde_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "eta_substep": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "seed": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}), }, "optional": { "etas": ("SIGMAS", ), "etas_substep": ("SIGMAS", ), "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, noise_type_sde = "gaussian", noise_type_sde_substep = "gaussian", noise_mode_sde = "hard", noise_mode_sde_substep = "hard", eta = 0.5, eta_substep = 0.5, seed : int = -1, etas : Optional[Tensor] = None, etas_substep : Optional[Tensor] = None, options = None, ): options = options if options is not None else {} if noise_mode_sde == "none": noise_mode_sde = "hard" eta = 0.0 if noise_mode_sde_substep == "none": noise_mode_sde_substep = "hard" eta_substep = 0.0 if noise_type_sde == "none": noise_type_sde = "gaussian" eta = 0.0 if noise_type_sde_substep == "none": noise_type_sde_substep = "gaussian" eta_substep = 0.0 options['noise_type_sde'] = noise_type_sde options['noise_type_sde_substep'] = noise_type_sde_substep options['noise_mode_sde'] = noise_mode_sde options['noise_mode_sde_substep'] = noise_mode_sde_substep options['eta'] = eta options['eta_substep'] = eta_substep options['noise_seed_sde'] = seed options['etas'] = etas options['etas_substep'] = etas_substep return (options,) class ClownOptions_StepSize_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "overshoot_mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How step size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "overshoot_mode_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How substep size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "overshoot": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising step, then rescale to match the original. Has a softening effect."}), "overshoot_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising substep, then rescale to match the original. Has a softening effect."}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, overshoot_mode = "hard", overshoot_mode_substep = "hard", overshoot = 0.0, overshoot_substep = 0.0, options = None, ): options = options if options is not None else {} options['overshoot_mode'] = overshoot_mode options['overshoot_mode_substep'] = overshoot_mode_substep options['overshoot'] = overshoot options['overshoot_substep'] = overshoot_substep return (options, ) @dataclass class DetailBoostOptions: noise_scaling_weight : float = 0.0 noise_boost_step : float = 0.0 noise_boost_substep : float = 0.0 noise_anchor : float = 1.0 s_noise : float = 1.0 s_noise_substep : float = 1.0 d_noise : float = 1.0 DETAIL_BOOST_METHODS = [ 'sampler', 'sampler_normal', 'sampler_substep', 'sampler_substep_normal', 'model', 'model_alpha', ] class ClownOptions_DetailBoost_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}), "method": (DETAIL_BOOST_METHODS, {"default": "model", "tooltip": "Determines whether the sampler or the model underestimates the noise level."}), #"noise_scaling_mode": (['linear'] + NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "Changes the steps where the effect is greatest. Most affect early steps, sinusoidal affects middle steps."}), "mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "Changes the steps where the effect is greatest. Most affect early steps, sinusoidal affects middle steps."}), "eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "The strength of the effect of the noise_scaling_mode. Linear ignores this parameter."}), "start_step": ("INT", {"default": 3, "min": 0, "max": MAX_STEPS}), "end_step": ("INT", {"default": 10, "min": -1, "max": MAX_STEPS}), #"noise_scaling_cycles": ("INT", {"default": 1, "min": 1, "max": MAX_STEPS}), #"noise_boost_step": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}), #"noise_boost_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}), #"sampler_scaling_normalize":("BOOLEAN", {"default": False, "tooltip": "Limit saturation and luminosity drift."}), }, "optional": { "weights": ("SIGMAS", ), "etas": ("SIGMAS", ), "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, weight : float = 0.0, method : str = "sampler", mode : str = "linear", eta : float = 0.5, start_step : int = 0, end_step : int = -1, noise_scaling_cycles : int = 1, noise_boost_step : float = 0.0, noise_boost_substep : float = 0.0, sampler_scaling_normalize : bool = False, weights : Optional[Tensor] = None, etas : Optional[Tensor] = None, options = None ): noise_scaling_weight = weight noise_scaling_type = method noise_scaling_mode = mode noise_scaling_eta = eta noise_scaling_start_step = start_step noise_scaling_end_step = end_step noise_scaling_weights = weights noise_scaling_etas = etas options = options if options is not None else {} default_dtype = torch.float64 default_device = torch.device('cuda') if noise_scaling_type.endswith("_normal"): sampler_scaling_normalize = True noise_scaling_type = noise_scaling_type[:-7] if noise_scaling_end_step == -1: noise_scaling_end_step = MAX_STEPS if noise_scaling_weights == None: noise_scaling_weights = initialize_or_scale(None, noise_scaling_weight, MAX_STEPS).to(default_dtype).to(default_device) if noise_scaling_etas == None: noise_scaling_etas = initialize_or_scale(None, noise_scaling_eta, MAX_STEPS).to(default_dtype).to(default_device) noise_scaling_prepend = torch.zeros((noise_scaling_start_step,), dtype=default_dtype, device=default_device) noise_scaling_weights = torch.cat((noise_scaling_prepend, noise_scaling_weights), dim=0) noise_scaling_etas = torch.cat((noise_scaling_prepend, noise_scaling_etas), dim=0) if noise_scaling_weights.shape[-1] > noise_scaling_end_step: noise_scaling_weights = noise_scaling_weights[:noise_scaling_end_step] if noise_scaling_etas.shape[-1] > noise_scaling_end_step: noise_scaling_etas = noise_scaling_etas[:noise_scaling_end_step] noise_scaling_weights = F.pad(noise_scaling_weights, (0, MAX_STEPS), value=0.0) noise_scaling_etas = F.pad(noise_scaling_etas, (0, MAX_STEPS), value=0.0) options['noise_scaling_weight'] = noise_scaling_weight options['noise_scaling_type'] = noise_scaling_type options['noise_scaling_mode'] = noise_scaling_mode options['noise_scaling_eta'] = noise_scaling_eta options['noise_scaling_cycles'] = noise_scaling_cycles options['noise_scaling_weights'] = noise_scaling_weights options['noise_scaling_etas'] = noise_scaling_etas options['noise_boost_step'] = noise_boost_step options['noise_boost_substep'] = noise_boost_substep options['noise_boost_normalize'] = sampler_scaling_normalize """options['DetailBoostOptions'] = DetailBoostOptions( noise_scaling_weight = noise_scaling_weight, noise_scaling_type = noise_scaling_type, noise_scaling_mode = noise_scaling_mode, noise_scaling_eta = noise_scaling_eta, noise_boost_step = noise_boost_step, noise_boost_substep = noise_boost_substep, noise_boost_normalize = noise_boost_normalize, noise_anchor = noise_anchor, s_noise = s_noise, s_noise_substep = s_noise_substep, d_noise = d_noise d_noise_start_step = d_noise_start_step )""" return (options,) class ClownOptions_SigmaScaling_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}), "s_noise_substep": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}), "noise_anchor_sde": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Typically set to between 1.0 and 0.0. Lower values cerate a grittier, more detailed image."}), "lying": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Downscales the sigma schedule. Values around 0.98-0.95 can lead to a large boost in detail and paint textures."}), "lying_inv": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Upscales the sigma schedule. Will soften the image and deepen colors. Use after d_noise to counteract desaturation."}), "lying_start_step": ("INT", {"default": 0, "min": 0, "max": MAX_STEPS}), "lying_inv_start_step": ("INT", {"default": 1, "min": 0, "max": MAX_STEPS}), }, "optional": { "s_noises": ("SIGMAS", ), "s_noises_substep": ("SIGMAS", ), "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, noise_anchor_sde : float = 1.0, s_noise : float = 1.0, s_noise_substep : float = 1.0, lying : float = 1.0, lying_start_step : int = 0, lying_inv : float = 1.0, lying_inv_start_step : int = 1, s_noises : Optional[Tensor] = None, s_noises_substep : Optional[Tensor] = None, options = None ): options = options if options is not None else {} default_dtype = torch.float64 default_device = torch.device('cuda') options['noise_anchor'] = noise_anchor_sde options['s_noise'] = s_noise options['s_noise_substep'] = s_noise_substep options['d_noise'] = lying options['d_noise_start_step'] = lying_start_step options['d_noise_inv'] = lying_inv options['d_noise_inv_start_step'] = lying_inv_start_step options['s_noises'] = s_noises options['s_noises_substep'] = s_noises_substep return (options,) class ClownOptions_FlowGuide: @classmethod def INPUT_TYPES(cls): return {"required": { "sync_eps": ("FLOAT", {"default": 0.75, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Accelerate convergence with positive values when sampling, negative values when unsampling."}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, sync_eps = 0.75, options = None ): options = options if options is not None else {} options['flow_sync_eps'] = sync_eps return (options,) class ClownOptions_Momentum_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "momentum": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Accelerate convergence with positive values when sampling, negative values when unsampling."}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, momentum = 0.0, options = None ): options = options if options is not None else {} options['momentum'] = momentum return (options,) class ClownOptions_ImplicitSteps_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "implicit_type": (IMPLICIT_TYPE_NAMES, {"default": "bongmath"}), "implicit_type_substeps": (IMPLICIT_TYPE_NAMES, {"default": "bongmath"}), "implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}), "implicit_substeps": ("INT", {"default": 0, "min": 0, "max": 10000}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, implicit_type = "bongmath", implicit_type_substeps = "bongmath", implicit_steps = 0, implicit_substeps = 0, options = None ): options = options if options is not None else {} options['implicit_type'] = implicit_type options['implicit_type_substeps'] = implicit_type_substeps options['implicit_steps'] = implicit_steps options['implicit_substeps'] = implicit_substeps return (options,) class ClownOptions_Cycles_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "cycles" : ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10000, "step":0.5, "round": 0.5}), "eta_decay_scale" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Multiplies etas by this number after every cycle. May help drive convergence." }), "unsample_eta" : ("FLOAT", {"default": 0.5, "min": -10000, "max": 10000, "step":0.01}), "unsampler_override" : (get_sampler_name_list(), {"default": "none"}), "unsample_steps_to_run" : ("INT", {"default": -1, "min": -1, "max": 10000, "step":1, "round": 1}), "unsample_cfg" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "unsample_bongmath" : ("BOOLEAN", {"default": False}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, cycles = 0, unsample_eta = 0.5, eta_decay_scale = 1.0, unsample_cfg = 1.0, unsampler_override = "none", unsample_steps_to_run = -1, unsample_bongmath = False, options = None ): options = options if options is not None else {} options['rebounds'] = int(cycles * 2) options['unsample_eta'] = unsample_eta options['unsampler_name'] = unsampler_override options['eta_decay_scale'] = eta_decay_scale options['unsample_steps_to_run'] = unsample_steps_to_run options['unsample_cfg'] = unsample_cfg options['unsample_bongmath'] = unsample_bongmath return (options,) class SharkOptions_StartStep_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "start_at_step": ("INT", {"default": 0, "min": -1, "max": 10000, "step":1,}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, start_at_step = 0, options = None ): options = options if options is not None else {} options['start_at_step'] = start_at_step return (options,) class ClownOptions_Tile_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "tile_width" : ("INT", {"default": 1024, "min": -1, "max": 10000, "step":1,}), "tile_height": ("INT", {"default": 1024, "min": -1, "max": 10000, "step":1,}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, tile_height = 1024, tile_width = 1024, options = None ): options = options if options is not None else {} tile_sizes = options.get('tile_sizes', []) tile_sizes.append((tile_height, tile_width)) options['tile_sizes'] = tile_sizes return (options,) class ClownOptions_Tile_Advanced_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "tile_sizes": ("STRING", {"default": "1024,1024", "multiline": True}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, tile_sizes = "1024,1024", options = None ): options = options if options is not None else {} tiles_height_width = parse_tile_sizes(tile_sizes) options['tile_sizes'] = [(tile[-1], tile[-2]) for tile in tiles_height_width] # swap height and width to be consistent... width, height return (options,) class ClownOptions_ExtraOptions_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "extra_options": ("STRING", {"default": "", "multiline": True}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, extra_options = "", options = None ): options = options if options is not None else {} if 'extra_options' in options: options['extra_options'] += '\n' + extra_options else: options['extra_options'] = extra_options return (options, ) class ClownOptions_DenoisedSampling_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "cycles" : ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10000, "step":0.5, "round": 0.5}), "eta_decay_scale" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Multiplies etas by this number after every cycle. May help drive convergence." }), "unsample_eta" : ("FLOAT", {"default": 0.5, "min": -10000, "max": 10000, "step":0.01}), "unsampler_override" : (get_sampler_name_list(), {"default": "none"}), "unsample_steps_to_run" : ("INT", {"default": -1, "min": -1, "max": 10000, "step":1, "round": 1}), "unsample_cfg" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "unsample_bongmath" : ("BOOLEAN", {"default": False}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, extra_options = "", options = None ): options = options if options is not None else {} if 'extra_options' in options: options['extra_options'] += '\n' + extra_options else: options['extra_options'] = extra_options return (options, ) class ClownOptions_Automation_Beta: @classmethod def INPUT_TYPES(cls): return {"required": {}, "optional": { "etas": ("SIGMAS", ), "etas_substep": ("SIGMAS", ), "s_noises": ("SIGMAS", ), "s_noises_substep": ("SIGMAS", ), "epsilon_scales": ("SIGMAS", ), "frame_weights": ("SIGMAS", ), "options": ("OPTIONS",), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, etas = None, etas_substep = None, s_noises = None, s_noises_substep = None, epsilon_scales = None, frame_weights = None, options = None ): options = options if options is not None else {} options_mgr = OptionsManager(options) frame_weights_mgr = options_mgr.get("frame_weights_mgr") if frame_weights_mgr is None and frame_weights is not None: frame_weights_mgr = FrameWeightsManager() frame_weights_mgr.set_custom_weights("frame_weights", frame_weights) automation = { "etas" : etas, "etas_substep" : etas_substep, "s_noises" : s_noises, "s_noises_substep" : s_noises_substep, "epsilon_scales" : epsilon_scales, "frame_weights_mgr" : frame_weights_mgr, } options["automation"] = automation return (options, ) class SharkOptions_GuideCond_Beta: @classmethod def INPUT_TYPES(cls): return {"required": {}, "optional": { "positive" : ("CONDITIONING", ), "negative" : ("CONDITIONING", ), "cfg" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "options" : ("OPTIONS",), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, positive = None, negative = None, cfg = 1.0, options = None, ): options = options if options is not None else {} flow_cond = { "yt_positive" : positive, "yt_negative" : negative, "yt_cfg" : cfg, } options["flow_cond"] = flow_cond return (options, ) class SharkOptions_GuideConds_Beta: @classmethod def INPUT_TYPES(cls): return {"required": {}, "optional": { "positive_masked" : ("CONDITIONING", ), "positive_unmasked" : ("CONDITIONING", ), "negative_masked" : ("CONDITIONING", ), "negative_unmasked" : ("CONDITIONING", ), "cfg_masked" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "cfg_unmasked" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "options" : ("OPTIONS",), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, positive_masked = None, negative_masked = None, cfg_masked = 1.0, positive_unmasked = None, negative_unmasked = None, cfg_unmasked = 1.0, options = None, ): options = options if options is not None else {} flow_cond = { "yt_positive" : positive_masked, "yt_negative" : negative_masked, "yt_cfg" : cfg_masked, "yt_inv_positive" : positive_unmasked, "yt_inv_negative" : negative_unmasked, "yt_inv_cfg" : cfg_unmasked, } options["flow_cond"] = flow_cond return (options, ) class SharkOptions_Beta: @classmethod def INPUT_TYPES(cls): return { "required": { "noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "s_noise_init": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }), "denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "channelwise_cfg": ("BOOLEAN", {"default": False}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, noise_type_init = "gaussian", s_noise_init = 1.0, denoise_alt = 1.0, channelwise_cfg = False, options = None ): options = options if options is not None else {} options['noise_type_init'] = noise_type_init options['noise_init_stdev'] = s_noise_init options['denoise_alt'] = denoise_alt options['channelwise_cfg'] = channelwise_cfg return (options,) class SharkOptions_UltraCascade_Latent_Beta: @classmethod def INPUT_TYPES(cls): return { "required": { "width": ("INT", {"default": 60, "min": 1, "max": MAX_RESOLUTION, "step": 1}), "height": ("INT", {"default": 36, "min": 1, "max": MAX_RESOLUTION, "step": 1}), }, "optional": { "options": ("OPTIONS",), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, width : int = 60, height : int = 36, options = None, ): options = options if options is not None else {} options['ultracascade_latent_width'] = width options['ultracascade_latent_height'] = height return (options,) class ClownOptions_SwapSampler_Beta: @classmethod def INPUT_TYPES(cls): return { "required": { "sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}), "swap_below_err": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Swap samplers if the error per step falls below this threshold."}), "swap_at_step": ("INT", {"default": 30, "min": 1, "max": 10000}), "log_err_to_console": ("BOOLEAN", {"default": False}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, sampler_name = "res_3m", swap_below_err = 0.0, swap_at_step = 30, log_err_to_console = False, options = None, ): sampler_name, implicit_sampler_name = process_sampler_name(sampler_name) sampler_name = sampler_name if implicit_sampler_name == "use_explicit" else implicit_sampler_name options = options if options is not None else {} options['rk_swap_type'] = sampler_name options['rk_swap_threshold'] = swap_below_err options['rk_swap_step'] = swap_at_step options['rk_swap_print'] = log_err_to_console return (options,) class ClownOptions_SDE_Mask_Beta: @classmethod def INPUT_TYPES(cls): return { "required": { "max": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Clamp the max value for the mask."}), "min": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Clamp the min value for the mask."}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "mask": ("MASK", ), "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, max = 1.0, min = 0.0, invert_mask = False, mask = None, options = None, ): options = copy.deepcopy(options) if options is not None else {} if invert_mask: mask = 1-mask mask = ((mask - mask.min()) * (max - min)) / (mask.max() - mask.min()) + min options['sde_mask'] = mask return (options,) class ClownGuide_Mean_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "cutoff": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide": ("LATENT", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight_scheduler = "constant", start_step = 0, end_step = 30, cutoff = 1.0, guide = None, weight = 0.0, channelwise_mode = False, projection_mode = False, weights = None, mask = None, invert_mask = False, guides = None, ): default_dtype = torch.float64 mask = 1-mask if mask is not None else None if end_step == -1: end_step = MAX_STEPS if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} if weight_scheduler == "constant": # and weights == None: weights = initialize_or_scale(None, weight, end_step).to(default_dtype) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) guides = copy.deepcopy(guides) if guides is not None else {} guides['weight_mean'] = weight guides['weights_mean'] = weights guides['guide_mean'] = guide guides['mask_mean'] = mask guides['weight_scheduler_mean'] = weight_scheduler guides['start_step_mean'] = start_step guides['end_step_mean'] = end_step guides['cutoff_mean'] = cutoff return (guides, ) class ClownGuide_FrequencySeparation: @classmethod def INPUT_TYPES(cls): return {"required": { "apply_to" : (["AdaIN"], {"default": "AdaIN"}), "method" : (["gaussian", "gaussian_pw", "median", "median_pw",], {"default": "median"}), "sigma": ("FLOAT", {"default": 3.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Low values produce results closer to the guide image. No effect with median."}), "kernel_size": ("INT", {"default": 8, "min": 1, "max": 11111, "step": 1, "tooltip": "Primary control with median. Set the Re___Patcher node to float32 or lower precision if you have OOMs. You may have them regardless at higher kernel sizes with median."}), "inner_kernel_size": ("INT", {"default": 2, "min": 1, "max": 11111, "step": 1, "tooltip": "Should be equal to, or less than, kernel_size."}), "stride": ("INT", {"default": 2, "min": 1, "max": 11111, "step": 1, "tooltip": "Should be equal to, or less than, inner_kernel_size."}), "lowpass_weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Typically should be set to 1.0. Lower values may sharpen the image, higher values may blur the image."}), "highpass_weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Typically should be set to 1.0. Higher values may sharpen the image, lower values may blur the image."}), "guides": ("GUIDES", ), }, "optional": { "mask" : ("MASK",), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" EXPERIMENTAL = True def main(self, apply_to = "AdaIN", method = "median", sigma = 3.0, kernel_size = 9, inner_kernel_size = 2, stride = 2, lowpass_weight = 1.0, highpass_weight = 1.0, guides = None, mask = None, ): guides = copy.deepcopy(guides) if guides is not None else {} guides['freqsep_apply_to'] = apply_to guides['freqsep_lowpass_method'] = method guides['freqsep_sigma'] = sigma guides['freqsep_kernel_size'] = kernel_size guides['freqsep_inner_kernel_size'] = inner_kernel_size guides['freqsep_stride'] = stride guides['freqsep_lowpass_weight'] = lowpass_weight guides['freqsep_highpass_weight']= highpass_weight guides['freqsep_mask'] = mask return (guides, ) class ClownGuide_Style_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "apply_to": (["positive", "negative", "denoised"], {"default": "positive", "tooltip": "When using CFG, decides whether to apply the guide to the positive or negative conditioning."}), "method": (["AdaIN", "WCT", "WCT2", "scattersort","none"], {"default": "WCT"}), "weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}), "synweight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the relative strength of the guide on the opposite conditioning to what was selected: i.e., negative if positive in apply_to. Recommended to avoid CFG burn."}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant", "tooltip": "Selecting any scheduler except constant will cause the strength to gradually decay to zero. Try beta57 vs. linear quadratic."},), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": -1, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide": ("LATENT", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" DESCRIPTION = "Transfer some visual aspects of style from a guide (reference) image. If nothing about style is specified in the prompt, it may just transfer the lighting and color scheme." + \ "If using CFG results in burn, or a very dark/bright image in the preview followed by a bad output, try duplicating and chaining this node, so that the guide may be applied to both positive and negative conditioning." + \ "Currently supported models: SD1.5, SDXL, Stable Cascade, SD3.5, AuraFlow, Flux, HiDream, WAN, and LTXV." def main(self, apply_to = "all", method = "WCT", weight = 1.0, synweight = 1.0, weight_scheduler = "constant", start_step = 0, end_step = 15, invert_mask = False, guide = None, mask = None, weights = None, guides = None, ): default_dtype = torch.float64 mask = 1-mask if mask is not None else None if end_step == -1: end_step = MAX_STEPS if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} if weight_scheduler == "constant": # and weights == None: weights = initialize_or_scale(None, weight, end_step).to(default_dtype) prepend = torch.zeros(start_step).to(weights) weights = torch.cat([prepend, weights]) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) guides = copy.deepcopy(guides) if guides is not None else {} guides['style_method'] = method if apply_to in {"positive", "all"}: guides['weight_style_pos'] = weight guides['weights_style_pos'] = weights guides['synweight_style_pos'] = synweight guides['guide_style_pos'] = guide guides['mask_style_pos'] = mask guides['weight_scheduler_style_pos'] = weight_scheduler guides['start_step_style_pos'] = start_step guides['end_step_style_pos'] = end_step if apply_to in {"negative", "all"}: guides['weight_style_neg'] = weight guides['weights_style_neg'] = weights guides['synweight_style_neg'] = synweight guides['guide_style_neg'] = guide guides['mask_style_neg'] = mask guides['weight_scheduler_style_neg'] = weight_scheduler guides['start_step_style_neg'] = start_step guides['end_step_style_neg'] = end_step if apply_to in {"denoised", "all"}: guides['weight_style_denoised'] = weight guides['weights_style_denoised'] = weights guides['synweight_style_denoised'] = synweight guides['guide_style_denoised'] = guide guides['mask_style_denoised'] = mask guides['weight_scheduler_style_denoised'] = weight_scheduler guides['start_step_style_denoised'] = start_step guides['end_step_style_denoised'] = end_step return (guides, ) class ClownGuide_Style_EdgeWidth: @classmethod def INPUT_TYPES(cls): return {"required": { "edge_width": ("INT", {"default": 20, "min": 1, "max": 10000}), }, "optional": { "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" DESCRIPTION = "Set an edge mask for some style guide types such as scattersort. Can help mitigate seams." def main(self, edge_width = 20, guides = None, ): guides = copy.deepcopy(guides) if guides is not None else {} if guides.get('mask_style_pos') is not None: guides['mask_edge_style_pos'] = get_edge_mask(guides.get('mask_style_pos'), edge_width) if guides.get('mask_style_neg') is not None: guides['mask_edge_style_neg'] = get_edge_mask(guides.get('mask_style_neg'), edge_width) return (guides, ) class ClownGuide_Style_TileSize: @classmethod def INPUT_TYPES(cls): return {"required": { "height": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16}), "width" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16}), "padding" : ("INT", {"default": 64, "min": 0, "max": 10000, "step": 16}), }, "optional": { "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" DESCRIPTION = "Set a tile size for some style guide types such as scattersort. Can improve adherence to the input image." def main(self, height = 128, width = 128, padding = 64, guides = None, ): guides = copy.deepcopy(guides) if guides is not None else {} guides['style_tile_height'] = height // 16 guides['style_tile_width'] = width // 16 guides['style_tile_padding'] = padding // 16 return (guides, ) class ClownGuides_Sync: @classmethod def INPUT_TYPES(cls): return {"required": { "weight_masked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "weight_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "weight_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "weight_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "weight_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}), "weight_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}), "weight_end_step_masked": ("INT", {"default": 15, "min": -1, "max": 10000}), "weight_end_step_unmasked": ("INT", {"default": 15, "min": -1, "max": 10000}), "sync_masked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "sync_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "sync_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "sync_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "sync_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}), "sync_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}), "sync_end_step_masked": ("INT", {"default": 15, "min": -1, "max": 10000}), "sync_end_step_unmasked": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide_masked": ("LATENT", ), "guide_unmasked": ("LATENT", ), "mask": ("MASK", ), "weights_masked": ("SIGMAS", ), "weights_unmasked": ("SIGMAS", ), "syncs_masked": ("SIGMAS", ), "syncs_unmasked": ("SIGMAS", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" EXPERIMENTAL = True def main(self, weight_masked = 0.0, weight_unmasked = 0.0, weight_scheduler_masked = "constant", weight_scheduler_unmasked = "constant", weight_start_step_masked = 0, weight_start_step_unmasked = 0, weight_end_step_masked = 30, weight_end_step_unmasked = 30, sync_masked = 0.0, sync_unmasked = 0.0, sync_scheduler_masked = "constant", sync_scheduler_unmasked = "constant", sync_start_step_masked = 0, sync_start_step_unmasked = 0, sync_end_step_masked = 30, sync_end_step_unmasked = 30, guide_masked = None, guide_unmasked = None, weights_masked = None, weights_unmasked = None, syncs_masked = None, syncs_unmasked = None, mask = None, unmask = None, invert_mask = False, guide_mode = "sync", channelwise_mode = False, projection_mode = False, cutoff_masked = 1.0, cutoff_unmasked = 1.0, ): default_dtype = torch.float64 if weight_end_step_masked == -1: weight_end_step_masked = MAX_STEPS if weight_end_step_unmasked == -1: weight_end_step_unmasked = MAX_STEPS if sync_end_step_masked == -1: sync_end_step_masked = MAX_STEPS if sync_end_step_unmasked == -1: sync_end_step_unmasked = MAX_STEPS if guide_masked is None: weight_scheduler_masked = "constant" weight_start_step_masked = 0 weight_end_step_masked = 30 weight_masked = 0.0 weights_masked = None sync_scheduler_masked = "constant" sync_start_step_masked = 0 sync_end_step_masked = 30 sync_masked = 0.0 syncs_masked = None if guide_unmasked is None: weight_scheduler_unmasked = "constant" weight_start_step_unmasked = 0 weight_end_step_unmasked = 30 weight_unmasked = 0.0 weights_unmasked = None sync_scheduler_unmasked = "constant" sync_start_step_unmasked = 0 sync_end_step_unmasked = 30 sync_unmasked = 0.0 syncs_unmasked = None if guide_masked is not None: raw_x = guide_masked.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_masked = {'samples': guide_masked['state_info']['raw_x'].clone()} else: guide_masked = {'samples': guide_masked['samples'].clone()} if guide_unmasked is not None: raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()} else: guide_unmasked = {'samples': guide_unmasked['samples'].clone()} if invert_mask and mask is not None: mask = 1-mask if projection_mode: guide_mode = guide_mode + "_projection" if channelwise_mode: guide_mode = guide_mode + "_cw" if guide_mode == "unsample_cw": guide_mode = "unsample" if guide_mode == "resample_cw": guide_mode = "resample" if weight_scheduler_masked == "constant" and weights_masked == None: weights_masked = initialize_or_scale(None, weight_masked, weight_end_step_masked).to(default_dtype) prepend = torch.zeros(weight_start_step_masked, dtype=default_dtype, device=weights_masked.device) weights_masked = torch.cat((prepend, weights_masked), dim=0) weights_masked = F.pad(weights_masked, (0, MAX_STEPS), value=0.0) if weight_scheduler_unmasked == "constant" and weights_unmasked == None: weights_unmasked = initialize_or_scale(None, weight_unmasked, weight_end_step_unmasked).to(default_dtype) prepend = torch.zeros(weight_start_step_unmasked, dtype=default_dtype, device=weights_unmasked.device) weights_unmasked = torch.cat((prepend, weights_unmasked), dim=0) weights_unmasked = F.pad(weights_unmasked, (0, MAX_STEPS), value=0.0) # Values for the sync scheduler will be inverted in rk_guide_func_beta.py as it's easier to understand: # makes it so that a sync weight of 1.0 = full guide strength (which previously was 0.0) if sync_scheduler_masked == "constant" and syncs_masked == None: syncs_masked = initialize_or_scale(None, sync_masked, sync_end_step_masked).to(default_dtype) prepend = torch.zeros(sync_start_step_masked, dtype=default_dtype, device=syncs_masked.device) syncs_masked = torch.cat((prepend, syncs_masked), dim=0) syncs_masked = F.pad(syncs_masked, (0, MAX_STEPS), value=0.0) if sync_scheduler_unmasked == "constant" and syncs_unmasked == None: syncs_unmasked = initialize_or_scale(None, sync_unmasked, sync_end_step_unmasked).to(default_dtype) prepend = torch.zeros(sync_start_step_unmasked, dtype=default_dtype, device=syncs_unmasked.device) syncs_unmasked = torch.cat((prepend, syncs_unmasked), dim=0) syncs_unmasked = F.pad(syncs_unmasked, (0, MAX_STEPS), value=0.0) guides = { "guide_mode" : guide_mode, "guide_masked" : guide_masked, "guide_unmasked" : guide_unmasked, "mask" : mask, "unmask" : unmask, "weight_masked" : weight_masked, "weight_unmasked" : weight_unmasked, "weight_scheduler_masked" : weight_scheduler_masked, "weight_scheduler_unmasked" : weight_scheduler_unmasked, "start_step_masked" : weight_start_step_masked, "start_step_unmasked" : weight_start_step_unmasked, "end_step_masked" : weight_end_step_masked, "end_step_unmasked" : weight_end_step_unmasked, "weights_masked" : weights_masked, "weights_unmasked" : weights_unmasked, "weight_masked_sync" : sync_masked, "weight_unmasked_sync" : sync_unmasked, "weight_scheduler_masked_sync" : sync_scheduler_masked, "weight_scheduler_unmasked_sync" : sync_scheduler_unmasked, "start_step_masked_sync" : sync_start_step_masked, "start_step_unmasked_sync" : sync_start_step_unmasked, "end_step_masked_sync" : sync_end_step_masked, "end_step_unmasked_sync" : sync_end_step_unmasked, "weights_masked_sync" : syncs_masked, "weights_unmasked_sync" : syncs_unmasked, "cutoff_masked" : cutoff_masked, "cutoff_unmasked" : cutoff_unmasked } return (guides, ) class ClownGuides_Sync_Advanced: @classmethod def INPUT_TYPES(cls): return {"required": { "weight_masked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "weight_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "weight_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "weight_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "weight_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}), "weight_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}), "weight_end_step_masked": ("INT", {"default": 30, "min": -1, "max": 10000}), "weight_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}), "sync_masked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "sync_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "sync_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "sync_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "sync_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}), "sync_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}), "sync_end_step_masked": ("INT", {"default": -1, "min": -1, "max": 10000}), "sync_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}), "drift_x_data": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "drift_x_sync": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "drift_x_masked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "drift_x_unmasked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "drift_x_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "drift_x_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "drift_x_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}), "drift_x_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}), "drift_x_end_step_masked": ("INT", {"default": -1, "min": -1, "max": 10000}), "drift_x_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}), "drift_y_data": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "drift_y_sync": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "drift_y_guide": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "drift_y_masked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "drift_y_unmasked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "drift_y_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "drift_y_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "drift_y_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}), "drift_y_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}), "drift_y_end_step_masked": ("INT", {"default": -1, "min": -1, "max": 10000}), "drift_y_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}), "lure_x_masked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "lure_x_unmasked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "lure_x_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "lure_x_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "lure_x_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}), "lure_x_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}), "lure_x_end_step_masked": ("INT", {"default": -1, "min": -1, "max": 10000}), "lure_x_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}), "lure_y_masked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "lure_y_unmasked": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "lure_y_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "lure_y_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "lure_y_start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}), "lure_y_start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}), "lure_y_end_step_masked": ("INT", {"default": -1, "min": -1, "max": 10000}), "lure_y_end_step_unmasked": ("INT", {"default": -1, "min": -1, "max": 10000}), "lure_iter": ("INT", {"default": 0, "min": 0, "max": 10000}), "lure_sequence": (["x -> y", "y -> x", "xy -> xy"], {"default": "y -> x"}), "invert_mask": ("BOOLEAN", {"default": False}), "invert_mask_sync": ("BOOLEAN", {"default": False}), "invert_mask_drift_x": ("BOOLEAN", {"default": False}), "invert_mask_drift_y": ("BOOLEAN", {"default": False}), "invert_mask_lure_x": ("BOOLEAN", {"default": False}), "invert_mask_lure_y": ("BOOLEAN", {"default": False}), }, "optional": { "guide_masked": ("LATENT", ), "guide_unmasked": ("LATENT", ), "mask": ("MASK", ), "mask_sync": ("MASK", ), "mask_drift_x": ("MASK", ), "mask_drift_y": ("MASK", ), "mask_lure_x": ("MASK", ), "mask_lure_y": ("MASK", ), "weights_masked": ("SIGMAS", ), "weights_unmasked": ("SIGMAS", ), "syncs_masked": ("SIGMAS", ), "syncs_unmasked": ("SIGMAS", ), "drift_xs_masked": ("SIGMAS", ), "drift_xs_unmasked": ("SIGMAS", ), "drift_ys_masked": ("SIGMAS", ), "drift_ys_unmasked": ("SIGMAS", ), "lure_xs_masked": ("SIGMAS", ), "lure_xs_unmasked": ("SIGMAS", ), "lure_ys_masked": ("SIGMAS", ), "lure_ys_unmasked": ("SIGMAS", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" EXPERIMENTAL = True def main(self, weight_masked = 0.0, weight_unmasked = 0.0, weight_scheduler_masked = "constant", weight_scheduler_unmasked = "constant", weight_start_step_masked = 0, weight_start_step_unmasked = 0, weight_end_step_masked = 30, weight_end_step_unmasked = 30, sync_masked = 0.0, sync_unmasked = 0.0, sync_scheduler_masked = "constant", sync_scheduler_unmasked = "constant", sync_start_step_masked = 0, sync_start_step_unmasked = 0, sync_end_step_masked = 30, sync_end_step_unmasked = 30, drift_x_data = 0.0, drift_x_sync = 0.0, drift_y_data = 0.0, drift_y_sync = 0.0, drift_y_guide = 0.0, drift_x_masked = 0.0, drift_x_unmasked = 0.0, drift_x_scheduler_masked = "constant", drift_x_scheduler_unmasked = "constant", drift_x_start_step_masked = 0, drift_x_start_step_unmasked = 0, drift_x_end_step_masked = 30, drift_x_end_step_unmasked = 30, drift_y_masked = 0.0, drift_y_unmasked = 0.0, drift_y_scheduler_masked = "constant", drift_y_scheduler_unmasked = "constant", drift_y_start_step_masked = 0, drift_y_start_step_unmasked = 0, drift_y_end_step_masked = 30, drift_y_end_step_unmasked = 30, lure_x_masked = 0.0, lure_x_unmasked = 0.0, lure_x_scheduler_masked = "constant", lure_x_scheduler_unmasked = "constant", lure_x_start_step_masked = 0, lure_x_start_step_unmasked = 0, lure_x_end_step_masked = 30, lure_x_end_step_unmasked = 30, lure_y_masked = 0.0, lure_y_unmasked = 0.0, lure_y_scheduler_masked = "constant", lure_y_scheduler_unmasked = "constant", lure_y_start_step_masked = 0, lure_y_start_step_unmasked = 0, lure_y_end_step_masked = 30, lure_y_end_step_unmasked = 30, guide_masked = None, guide_unmasked = None, weights_masked = None, weights_unmasked = None, syncs_masked = None, syncs_unmasked = None, drift_xs_masked = None, drift_xs_unmasked = None, drift_ys_masked = None, drift_ys_unmasked = None, lure_xs_masked = None, lure_xs_unmasked = None, lure_ys_masked = None, lure_ys_unmasked = None, lure_iter = 0, lure_sequence = "x -> y", mask = None, unmask = None, mask_sync = None, mask_drift_x = None, mask_drift_y = None, mask_lure_x = None, mask_lure_y = None, invert_mask = False, invert_mask_sync = False, invert_mask_drift_x = False, invert_mask_drift_y = False, invert_mask_lure_x = False, invert_mask_lure_y = False, guide_mode = "sync", channelwise_mode = False, projection_mode = False, cutoff_masked = 1.0, cutoff_unmasked = 1.0, ): default_dtype = torch.float64 if weight_end_step_masked == -1: weight_end_step_masked = MAX_STEPS if weight_end_step_unmasked == -1: weight_end_step_unmasked = MAX_STEPS if sync_end_step_masked == -1: sync_end_step_masked = MAX_STEPS if sync_end_step_unmasked == -1: sync_end_step_unmasked = MAX_STEPS if drift_x_end_step_masked == -1: drift_x_end_step_masked = MAX_STEPS if drift_x_end_step_unmasked == -1: drift_x_end_step_unmasked = MAX_STEPS if drift_y_end_step_masked == -1: drift_y_end_step_masked = MAX_STEPS if drift_y_end_step_unmasked == -1: drift_y_end_step_unmasked = MAX_STEPS if lure_x_end_step_masked == -1: lure_x_end_step_masked = MAX_STEPS if lure_x_end_step_unmasked == -1: lure_x_end_step_unmasked = MAX_STEPS if lure_y_end_step_masked == -1: lure_y_end_step_masked = MAX_STEPS if lure_y_end_step_unmasked == -1: lure_y_end_step_unmasked = MAX_STEPS if guide_masked is None: weight_scheduler_masked = "constant" weight_start_step_masked = 0 weight_end_step_masked = 30 weight_masked = 0.0 weights_masked = None sync_scheduler_masked = "constant" sync_start_step_masked = 0 sync_end_step_masked = 30 sync_masked = 0.0 syncs_masked = None drift_x_scheduler_masked = "constant" drift_x_start_step_masked = 0 drift_x_end_step_masked = 30 drift_x_masked = 0.0 drift_xs_masked = None drift_y_scheduler_masked = "constant" drift_y_start_step_masked = 0 drift_y_end_step_masked = 30 drift_y_masked = 0.0 drift_ys_masked = None lure_x_scheduler_masked = "constant" lure_x_start_step_masked = 0 lure_x_end_step_masked = 30 lure_x_masked = 0.0 lure_xs_masked = None lure_y_scheduler_masked = "constant" lure_y_start_step_masked = 0 lure_y_end_step_masked = 30 lure_y_masked = 0.0 lure_ys_masked = None if guide_unmasked is None: weight_scheduler_unmasked = "constant" weight_start_step_unmasked = 0 weight_end_step_unmasked = 30 weight_unmasked = 0.0 weights_unmasked = None sync_scheduler_unmasked = "constant" sync_start_step_unmasked = 0 sync_end_step_unmasked = 30 sync_unmasked = 0.0 syncs_unmasked = None drift_x_scheduler_unmasked = "constant" drift_x_start_step_unmasked = 0 drift_x_end_step_unmasked = 30 drift_x_unmasked = 0.0 drift_xs_unmasked = None drift_y_scheduler_unmasked = "constant" drift_y_start_step_unmasked = 0 drift_y_end_step_unmasked = 30 drift_y_unmasked = 0.0 drift_ys_unmasked = None lure_x_scheduler_unmasked = "constant" lure_x_start_step_unmasked = 0 lure_x_end_step_unmasked = 30 lure_x_unmasked = 0.0 lure_xs_unmasked = None lure_y_scheduler_unmasked = "constant" lure_y_start_step_unmasked = 0 lure_y_end_step_unmasked = 30 lure_y_unmasked = 0.0 lure_ys_unmasked = None if guide_masked is not None: raw_x = guide_masked.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_masked = {'samples': guide_masked['state_info']['raw_x'].clone()} else: guide_masked = {'samples': guide_masked['samples'].clone()} if guide_unmasked is not None: raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()} else: guide_unmasked = {'samples': guide_unmasked['samples'].clone()} if invert_mask and mask is not None: mask = 1-mask if invert_mask_sync and mask_sync is not None: mask_sync = 1-mask_sync if invert_mask_drift_x and mask_drift_x is not None: mask_drift_x = 1-mask_drift_x if invert_mask_drift_y and mask_drift_y is not None: mask_drift_y = 1-mask_drift_y if invert_mask_lure_x and mask_lure_x is not None: mask_lure_x = 1-mask_lure_x if invert_mask_lure_y and mask_lure_y is not None: mask_lure_y = 1-mask_lure_y if projection_mode: guide_mode = guide_mode + "_projection" if channelwise_mode: guide_mode = guide_mode + "_cw" if guide_mode == "unsample_cw": guide_mode = "unsample" if guide_mode == "resample_cw": guide_mode = "resample" if weight_scheduler_masked == "constant" and weights_masked == None: weights_masked = initialize_or_scale(None, weight_masked, weight_end_step_masked).to(default_dtype) prepend = torch.zeros(weight_start_step_masked, dtype=default_dtype, device=weights_masked.device) weights_masked = torch.cat((prepend, weights_masked), dim=0) weights_masked = F.pad(weights_masked, (0, MAX_STEPS), value=0.0) if weight_scheduler_unmasked == "constant" and weights_unmasked == None: weights_unmasked = initialize_or_scale(None, weight_unmasked, weight_end_step_unmasked).to(default_dtype) prepend = torch.zeros(weight_start_step_unmasked, dtype=default_dtype, device=weights_unmasked.device) weights_unmasked = torch.cat((prepend, weights_unmasked), dim=0) weights_unmasked = F.pad(weights_unmasked, (0, MAX_STEPS), value=0.0) # Values for the sync scheduler will be inverted in rk_guide_func_beta.py as it's easier to understand: # makes it so that a sync weight of 1.0 = full guide strength (which previously was 0.0) if sync_scheduler_masked == "constant" and syncs_masked == None: syncs_masked = initialize_or_scale(None, sync_masked, sync_end_step_masked).to(default_dtype) prepend = torch.zeros(sync_start_step_masked, dtype=default_dtype, device=syncs_masked.device) syncs_masked = torch.cat((prepend, syncs_masked), dim=0) syncs_masked = F.pad(syncs_masked, (0, MAX_STEPS), value=0.0) if sync_scheduler_unmasked == "constant" and syncs_unmasked == None: syncs_unmasked = initialize_or_scale(None, sync_unmasked, sync_end_step_unmasked).to(default_dtype) prepend = torch.zeros(sync_start_step_unmasked, dtype=default_dtype, device=syncs_unmasked.device) syncs_unmasked = torch.cat((prepend, syncs_unmasked), dim=0) syncs_unmasked = F.pad(syncs_unmasked, (0, MAX_STEPS), value=0.0) if drift_x_scheduler_masked == "constant" and drift_xs_masked == None: drift_xs_masked = initialize_or_scale(None, drift_x_masked, drift_x_end_step_masked).to(default_dtype) prepend = torch.zeros(drift_x_start_step_masked, dtype=default_dtype, device=drift_xs_masked.device) drift_xs_masked = torch.cat((prepend, drift_xs_masked), dim=0) drift_xs_masked = F.pad(drift_xs_masked, (0, MAX_STEPS), value=0.0) if drift_x_scheduler_unmasked == "constant" and drift_xs_unmasked == None: drift_xs_unmasked = initialize_or_scale(None, drift_x_unmasked, drift_x_end_step_unmasked).to(default_dtype) prepend = torch.zeros(drift_x_start_step_unmasked, dtype=default_dtype, device=drift_xs_unmasked.device) drift_xs_unmasked = torch.cat((prepend, drift_xs_unmasked), dim=0) drift_xs_unmasked = F.pad(drift_xs_unmasked, (0, MAX_STEPS), value=0.0) if drift_y_scheduler_masked == "constant" and drift_ys_masked == None: drift_ys_masked = initialize_or_scale(None, drift_y_masked, drift_y_end_step_masked).to(default_dtype) prepend = torch.zeros(drift_y_start_step_masked, dtype=default_dtype, device=drift_ys_masked.device) drift_ys_masked = torch.cat((prepend, drift_ys_masked), dim=0) drift_ys_masked = F.pad(drift_ys_masked, (0, MAX_STEPS), value=0.0) if drift_y_scheduler_unmasked == "constant" and drift_ys_unmasked == None: drift_ys_unmasked = initialize_or_scale(None, drift_y_unmasked, drift_y_end_step_unmasked).to(default_dtype) prepend = torch.zeros(drift_y_start_step_unmasked, dtype=default_dtype, device=drift_ys_unmasked.device) drift_ys_unmasked = torch.cat((prepend, drift_ys_unmasked), dim=0) drift_ys_unmasked = F.pad(drift_ys_unmasked, (0, MAX_STEPS), value=0.0) if lure_x_scheduler_masked == "constant" and lure_xs_masked == None: lure_xs_masked = initialize_or_scale(None, lure_x_masked, lure_x_end_step_masked).to(default_dtype) prepend = torch.zeros(lure_x_start_step_masked, dtype=default_dtype, device=lure_xs_masked.device) lure_xs_masked = torch.cat((prepend, lure_xs_masked), dim=0) lure_xs_masked = F.pad(lure_xs_masked, (0, MAX_STEPS), value=0.0) if lure_x_scheduler_unmasked == "constant" and lure_xs_unmasked == None: lure_xs_unmasked = initialize_or_scale(None, lure_x_unmasked, lure_x_end_step_unmasked).to(default_dtype) prepend = torch.zeros(lure_x_start_step_unmasked, dtype=default_dtype, device=lure_xs_unmasked.device) lure_xs_unmasked = torch.cat((prepend, lure_xs_unmasked), dim=0) lure_xs_unmasked = F.pad(lure_xs_unmasked, (0, MAX_STEPS), value=0.0) if lure_y_scheduler_masked == "constant" and lure_ys_masked == None: lure_ys_masked = initialize_or_scale(None, lure_y_masked, lure_y_end_step_masked).to(default_dtype) prepend = torch.zeros(lure_y_start_step_masked, dtype=default_dtype, device=lure_ys_masked.device) lure_ys_masked = torch.cat((prepend, lure_ys_masked), dim=0) lure_ys_masked = F.pad(lure_ys_masked, (0, MAX_STEPS), value=0.0) if lure_y_scheduler_unmasked == "constant" and lure_ys_unmasked == None: lure_ys_unmasked = initialize_or_scale(None, lure_y_unmasked, lure_y_end_step_unmasked).to(default_dtype) prepend = torch.zeros(lure_y_start_step_unmasked, dtype=default_dtype, device=lure_ys_unmasked.device) lure_ys_unmasked = torch.cat((prepend, lure_ys_unmasked), dim=0) lure_ys_unmasked = F.pad(lure_ys_unmasked, (0, MAX_STEPS), value=0.0) guides = { "guide_mode" : guide_mode, "guide_masked" : guide_masked, "guide_unmasked" : guide_unmasked, "mask" : mask, "unmask" : unmask, "mask_sync" : mask_sync, "mask_lure_x" : mask_lure_x, "mask_lure_y" : mask_lure_y, "weight_masked" : weight_masked, "weight_unmasked" : weight_unmasked, "weight_scheduler_masked" : weight_scheduler_masked, "weight_scheduler_unmasked" : weight_scheduler_unmasked, "start_step_masked" : weight_start_step_masked, "start_step_unmasked" : weight_start_step_unmasked, "end_step_masked" : weight_end_step_masked, "end_step_unmasked" : weight_end_step_unmasked, "weights_masked" : weights_masked, "weights_unmasked" : weights_unmasked, "weight_masked_sync" : sync_masked, "weight_unmasked_sync" : sync_unmasked, "weight_scheduler_masked_sync" : sync_scheduler_masked, "weight_scheduler_unmasked_sync" : sync_scheduler_unmasked, "start_step_masked_sync" : sync_start_step_masked, "start_step_unmasked_sync" : sync_start_step_unmasked, "end_step_masked_sync" : sync_end_step_masked, "end_step_unmasked_sync" : sync_end_step_unmasked, "weights_masked_sync" : syncs_masked, "weights_unmasked_sync" : syncs_unmasked, "drift_x_data" : drift_x_data, "drift_x_sync" : drift_x_sync, "drift_y_data" : drift_y_data, "drift_y_sync" : drift_y_sync, "drift_y_guide" : drift_y_guide, "weight_masked_drift_x" : drift_x_masked, "weight_unmasked_drift_x" : drift_x_unmasked, "weight_scheduler_masked_drift_x" : drift_x_scheduler_masked, "weight_scheduler_unmasked_drift_x" : drift_x_scheduler_unmasked, "start_step_masked_drift_x" : drift_x_start_step_masked, "start_step_unmasked_drift_x" : drift_x_start_step_unmasked, "end_step_masked_drift_x" : drift_x_end_step_masked, "end_step_unmasked_drift_x" : drift_x_end_step_unmasked, "weights_masked_drift_x" : drift_xs_masked, "weights_unmasked_drift_x" : drift_xs_unmasked, "weight_masked_drift_y" : drift_y_masked, "weight_unmasked_drift_y" : drift_y_unmasked, "weight_scheduler_masked_drift_y" : drift_y_scheduler_masked, "weight_scheduler_unmasked_drift_y" : drift_y_scheduler_unmasked, "start_step_masked_drift_y" : drift_y_start_step_masked, "start_step_unmasked_drift_y" : drift_y_start_step_unmasked, "end_step_masked_drift_y" : drift_y_end_step_masked, "end_step_unmasked_drift_y" : drift_y_end_step_unmasked, "weights_masked_drift_y" : drift_ys_masked, "weights_unmasked_drift_y" : drift_ys_unmasked, "weight_masked_lure_x" : lure_x_masked, "weight_unmasked_lure_x" : lure_x_unmasked, "weight_scheduler_masked_lure_x" : lure_x_scheduler_masked, "weight_scheduler_unmasked_lure_x" : lure_x_scheduler_unmasked, "start_step_masked_lure_x" : lure_x_start_step_masked, "start_step_unmasked_lure_x" : lure_x_start_step_unmasked, "end_step_masked_lure_x" : lure_x_end_step_masked, "end_step_unmasked_lure_x" : lure_x_end_step_unmasked, "weights_masked_lure_x" : lure_xs_masked, "weights_unmasked_lure_x" : lure_xs_unmasked, "weight_masked_lure_y" : lure_y_masked, "weight_unmasked_lure_y" : lure_y_unmasked, "weight_scheduler_masked_lure_y" : lure_y_scheduler_masked, "weight_scheduler_unmasked_lure_y" : lure_y_scheduler_unmasked, "start_step_masked_lure_y" : lure_y_start_step_masked, "start_step_unmasked_lure_y" : lure_y_start_step_unmasked, "end_step_masked_lure_y" : lure_y_end_step_masked, "end_step_unmasked_lure_y" : lure_y_end_step_unmasked, "weights_masked_lure_y" : lure_ys_masked, "weights_unmasked_lure_y" : lure_ys_unmasked, "sync_lure_iter" : lure_iter, "sync_lure_sequence" : lure_sequence, "cutoff_masked" : cutoff_masked, "cutoff_unmasked" : cutoff_unmasked } return (guides, ) class ClownGuide_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}), "channelwise_mode": ("BOOLEAN", {"default": True}), "projection_mode": ("BOOLEAN", {"default": True}), "weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "cutoff": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide": ("LATENT", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight_scheduler = "constant", weight_scheduler_unmasked = "constant", start_step = 0, start_step_unmasked = 0, end_step = 30, end_step_unmasked = 30, cutoff = 1.0, cutoff_unmasked = 1.0, guide = None, guide_unmasked = None, weight = 0.0, weight_unmasked = 0.0, guide_mode = "epsilon", channelwise_mode = False, projection_mode = False, weights = None, weights_unmasked = None, mask = None, unmask = None, invert_mask = False, ): CG = ClownGuides_Beta() mask = 1-mask if mask is not None else None if end_step == -1: end_step = MAX_STEPS if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if False: # raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} if guide_unmasked is not None: raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()} else: guide_unmasked = {'samples': guide_unmasked['samples'].clone()} guides, = CG.main( weight_scheduler_masked = weight_scheduler, weight_scheduler_unmasked = weight_scheduler_unmasked, start_step_masked = start_step, start_step_unmasked = start_step_unmasked, end_step_masked = end_step, end_step_unmasked = end_step_unmasked, cutoff_masked = cutoff, cutoff_unmasked = cutoff_unmasked, guide_masked = guide, guide_unmasked = guide_unmasked, weight_masked = weight, weight_unmasked = weight_unmasked, guide_mode = guide_mode, channelwise_mode = channelwise_mode, projection_mode = projection_mode, weights_masked = weights, weights_unmasked = weights_unmasked, mask = mask, unmask = unmask, invert_mask = invert_mask ) return (guides, ) #return (guides[0], ) class ClownGuides_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}), "channelwise_mode": ("BOOLEAN", {"default": True}), "projection_mode": ("BOOLEAN", {"default": True}), "weight_masked": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "weight_unmasked": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "cutoff_masked": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "cutoff_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "weight_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "weight_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}), "start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step_masked": ("INT", {"default": 15, "min": -1, "max": 10000}), "end_step_unmasked": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide_masked": ("LATENT", ), "guide_unmasked": ("LATENT", ), "mask": ("MASK", ), "weights_masked": ("SIGMAS", ), "weights_unmasked": ("SIGMAS", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight_scheduler_masked = "constant", weight_scheduler_unmasked = "constant", start_step_masked = 0, start_step_unmasked = 0, end_step_masked = 30, end_step_unmasked = 30, cutoff_masked = 1.0, cutoff_unmasked = 1.0, guide_masked = None, guide_unmasked = None, weight_masked = 0.0, weight_unmasked = 0.0, guide_mode = "epsilon", channelwise_mode = False, projection_mode = False, weights_masked = None, weights_unmasked = None, mask = None, unmask = None, invert_mask = False, ): default_dtype = torch.float64 if end_step_masked == -1: end_step_masked = MAX_STEPS if end_step_unmasked == -1: end_step_unmasked = MAX_STEPS if guide_masked is None: weight_scheduler_masked = "constant" start_step_masked = 0 end_step_masked = 30 cutoff_masked = 1.0 guide_masked = None weight_masked = 0.0 weights_masked = None #mask = None if guide_unmasked is None: weight_scheduler_unmasked = "constant" start_step_unmasked = 0 end_step_unmasked = 30 cutoff_unmasked = 1.0 guide_unmasked = None weight_unmasked = 0.0 weights_unmasked = None #unmask = None if guide_masked is not None: raw_x = guide_masked.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_masked = {'samples': guide_masked['state_info']['raw_x'].clone()} else: guide_masked = {'samples': guide_masked['samples'].clone()} if guide_unmasked is not None: raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()} else: guide_unmasked = {'samples': guide_unmasked['samples'].clone()} if invert_mask and mask is not None: mask = 1-mask if projection_mode: guide_mode = guide_mode + "_projection" if channelwise_mode: guide_mode = guide_mode + "_cw" if guide_mode == "unsample_cw": guide_mode = "unsample" if guide_mode == "resample_cw": guide_mode = "resample" if weight_scheduler_masked == "constant" and weights_masked == None: weights_masked = initialize_or_scale(None, weight_masked, end_step_masked).to(default_dtype) prepend = torch.zeros(start_step_masked, dtype=default_dtype, device=weights_masked.device) weights_masked = torch.cat((prepend, weights_masked), dim=0) weights_masked = F.pad(weights_masked, (0, MAX_STEPS), value=0.0) if weight_scheduler_unmasked == "constant" and weights_unmasked == None: weights_unmasked = initialize_or_scale(None, weight_unmasked, end_step_unmasked).to(default_dtype) prepend = torch.zeros(start_step_unmasked, dtype=default_dtype, device=weights_unmasked.device) weights_unmasked = torch.cat((prepend, weights_unmasked), dim=0) weights_unmasked = F.pad(weights_unmasked, (0, MAX_STEPS), value=0.0) guides = { "guide_mode" : guide_mode, "weight_masked" : weight_masked, "weight_unmasked" : weight_unmasked, "weights_masked" : weights_masked, "weights_unmasked" : weights_unmasked, "guide_masked" : guide_masked, "guide_unmasked" : guide_unmasked, "mask" : mask, "unmask" : unmask, "weight_scheduler_masked" : weight_scheduler_masked, "weight_scheduler_unmasked" : weight_scheduler_unmasked, "start_step_masked" : start_step_masked, "start_step_unmasked" : start_step_unmasked, "end_step_masked" : end_step_masked, "end_step_unmasked" : end_step_unmasked, "cutoff_masked" : cutoff_masked, "cutoff_unmasked" : cutoff_unmasked } return (guides, ) class ClownGuidesAB_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}), "channelwise_mode": ("BOOLEAN", {"default": False}), "projection_mode": ("BOOLEAN", {"default": False}), "weight_A": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "weight_B": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "cutoff_A": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "cutoff_B": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "weight_scheduler_A": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "weight_scheduler_B": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "start_step_A": ("INT", {"default": 0, "min": 0, "max": 10000}), "start_step_B": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step_A": ("INT", {"default": 15, "min": -1, "max": 10000}), "end_step_B": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_masks": ("BOOLEAN", {"default": False}), }, "optional": { "guide_A": ("LATENT", ), "guide_B": ("LATENT", ), "mask_A": ("MASK", ), "mask_B": ("MASK", ), "weights_A": ("SIGMAS", ), "weights_B": ("SIGMAS", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight_scheduler_A = "constant", weight_scheduler_B = "constant", start_step_A = 0, start_step_B = 0, end_step_A = 30, end_step_B = 30, cutoff_A = 1.0, cutoff_B = 1.0, guide_A = None, guide_B = None, weight_A = 0.0, weight_B = 0.0, guide_mode = "epsilon", channelwise_mode = False, projection_mode = False, weights_A = None, weights_B = None, mask_A = None, mask_B = None, invert_masks : bool = False, ): default_dtype = torch.float64 if end_step_A == -1: end_step_A = MAX_STEPS if end_step_B == -1: end_step_B = MAX_STEPS if guide_A is not None: raw_x = guide_A.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_A = {'samples': guide_A['state_info']['raw_x'].clone()} else: guide_A = {'samples': guide_A['samples'].clone()} if guide_B is not None: raw_x = guide_B.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_B = {'samples': guide_B['state_info']['raw_x'].clone()} else: guide_B = {'samples': guide_B['samples'].clone()} if guide_A is None: guide_A = guide_B guide_B = None mask_A = mask_B mask_B = None weight_B = 0.0 if guide_B is None: weight_B = 0.0 if mask_A is None and mask_B is not None: mask_A = 1-mask_B if projection_mode: guide_mode = guide_mode + "_projection" if channelwise_mode: guide_mode = guide_mode + "_cw" if guide_mode == "unsample_cw": guide_mode = "unsample" if guide_mode == "resample_cw": guide_mode = "resample" if weight_scheduler_A == "constant" and weights_A == None: weights_A = initialize_or_scale(None, weight_A, end_step_A).to(default_dtype) prepend = torch.zeros(start_step_A, dtype=default_dtype, device=weights_A.device) weights_A = torch.cat((prepend, weights_A), dim=0) weights_A = F.pad(weights_A, (0, MAX_STEPS), value=0.0) if weight_scheduler_B == "constant" and weights_B == None: weights_B = initialize_or_scale(None, weight_B, end_step_B).to(default_dtype) prepend = torch.zeros(start_step_B, dtype=default_dtype, device=weights_B.device) weights_B = torch.cat((prepend, weights_B), dim=0) weights_B = F.pad(weights_B, (0, MAX_STEPS), value=0.0) if invert_masks: mask_A = 1-mask_A if mask_A is not None else None mask_B = 1-mask_B if mask_B is not None else None guides = { "guide_mode" : guide_mode, "weight_masked" : weight_A, "weight_unmasked" : weight_B, "weights_masked" : weights_A, "weights_unmasked" : weights_B, "guide_masked" : guide_A, "guide_unmasked" : guide_B, "mask" : mask_A, "unmask" : mask_B, "weight_scheduler_masked" : weight_scheduler_A, "weight_scheduler_unmasked" : weight_scheduler_B, "start_step_masked" : start_step_A, "start_step_unmasked" : start_step_B, "end_step_masked" : end_step_A, "end_step_unmasked" : end_step_B, "cutoff_masked" : cutoff_A, "cutoff_unmasked" : cutoff_B } return (guides, ) class ClownOptions_Combine: @classmethod def INPUT_TYPES(s): return { "required": { "options": ("OPTIONS",), }, } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, options, **kwargs): options_mgr = OptionsManager(options, **kwargs) return (options_mgr.as_dict(),) class ClownOptions_Frameweights: @classmethod def INPUT_TYPES(s): return { "required": { "config_name": (FRAME_WEIGHTS_CONFIG_NAMES, {"default": "frame_weights", "tooltip": "Apply to specific type of per-frame weights."}), "dynamics": (FRAME_WEIGHTS_DYNAMICS_NAMES, {"default": "ease_out", "tooltip": "The function type used for the dynamic period. constant: no change, linear: steady change, ease_out: starts fast, ease_in: starts slow"}), "schedule": (FRAME_WEIGHTS_SCHEDULE_NAMES, {"default": "moderate_early", "tooltip": "fast_early: fast change starts immediately, slow_late: slow change starts later"}), "scale": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The amount of change over the course of the frame weights. 1.0 means that the guides have no influence by the end."}), "reverse": ("BOOLEAN", {"default": False, "tooltip": "Reverse the frame weights"}), }, "optional": { "frame_weights": ("SIGMAS", {"tooltip": "Overrides all other settings EXCEPT reverse."}), "custom_string": ("STRING", {"tooltip": "Overrides all other settings EXCEPT reverse.", "multiline": True}), "options": ("OPTIONS",), }, } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, config_name, dynamics, schedule, scale, reverse, frame_weights = None, custom_string = None, options = None, ): options_mgr = OptionsManager(options if options is not None else {}) frame_weights_mgr = options_mgr.get("frame_weights_mgr") if frame_weights_mgr is None: frame_weights_mgr = FrameWeightsManager() if custom_string is not None and custom_string.strip() == "": custom_string = None frame_weights_mgr.add_weight_config( config_name, dynamics=dynamics, schedule=schedule, scale=scale, is_reversed=reverse, frame_weights=frame_weights, custom_string=custom_string ) options_mgr.update("frame_weights_mgr", frame_weights_mgr) return (options_mgr.as_dict(),) class SharkOptions_GuiderInput: @classmethod def INPUT_TYPES(s): return {"required": {"guider": ("GUIDER", ), }, "optional": {"options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, guider, options=None): options_mgr = OptionsManager(options if options is not None else {}) if isinstance(guider, dict): guider = guider.get('samples', None) if isinstance(guider, torch.Tensor): guider = guider.detach().cpu() if options_mgr is None: options_mgr = OptionsManager() options_mgr.update("guider", guider) return (options_mgr.as_dict(), ) class ClownGuide_AdaIN_MMDiT_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "double_blocks" : ("STRING", {"default": "", "multiline": True}), "double_weights" : ("STRING", {"default": "", "multiline": True}), "single_blocks" : ("STRING", {"default": "20", "multiline": True}), "single_weights" : ("STRING", {"default": "0.5", "multiline": True}), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide": ("LATENT", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight = 1.0, weight_scheduler = "constant", double_weights = "0.1", single_weights = "0.0", double_blocks = "all", single_blocks = "all", start_step = 0, end_step = 15, invert_mask = False, guide = None, mask = None, weights = None, guides = None, ): default_dtype = torch.float64 mask = 1-mask if mask is not None else None double_weights = parse_range_string(double_weights) single_weights = parse_range_string(single_weights) if len(double_weights) == 0: double_weights.append(0.0) if len(single_weights) == 0: single_weights.append(0.0) if len(double_weights) == 1: double_weights = double_weights * 100 if len(single_weights) == 1: single_weights = single_weights * 100 if type(double_weights[0]) == int: double_weights = [float(val) for val in double_weights] if type(single_weights[0]) == int: single_weights = [float(val) for val in single_weights] if double_blocks == "all": double_blocks = [val for val in range(100)] if len(double_weights) == 1: double_weights = [double_weights[0]] * 100 else: double_blocks = parse_range_string(double_blocks) weights_expanded = [0.0] * 100 for b, w in zip(double_blocks, double_weights): weights_expanded[b] = w double_weights = weights_expanded if single_blocks == "all": single_blocks = [val for val in range(100)] if len(single_weights) == 1: single_weights = [single_weights[0]] * 100 else: single_blocks = parse_range_string(single_blocks) weights_expanded = [0.0] * 100 for b, w in zip(single_blocks, single_weights): weights_expanded[b] = w single_weights = weights_expanded if end_step == -1: end_step = MAX_STEPS if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} if weight_scheduler == "constant": # and weights == None: weights = initialize_or_scale(None, weight, end_step).to(default_dtype) prepend = torch.zeros(start_step).to(weights) weights = torch.cat([prepend, weights]) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) guides = copy.deepcopy(guides) if guides is not None else {} guides['weight_adain'] = weight guides['weights_adain'] = weights guides['blocks_adain_mmdit'] = { "double_weights": double_weights, "single_weights": single_weights, "double_blocks" : double_blocks, "single_blocks" : single_blocks, } guides['guide_adain'] = guide guides['mask_adain'] = mask guides['weight_scheduler_adain'] = weight_scheduler guides['start_step_adain'] = start_step guides['end_step_adain'] = end_step return (guides, ) class ClownGuide_AttnInj_MMDiT_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "double_blocks" : ("STRING", {"default": "0,1,3", "multiline": True}), "double_weights" : ("STRING", {"default": "1.0", "multiline": True}), "single_blocks" : ("STRING", {"default": "20", "multiline": True}), "single_weights" : ("STRING", {"default": "0.5", "multiline": True}), "img_q": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "img_k": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "img_v": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_q": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_k": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_v": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "img_q_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "img_k_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "img_v_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_q_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_k_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_v_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide": ("LATENT", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight = 1.0, weight_scheduler = "constant", double_weights = "0.1", single_weights = "0.0", double_blocks = "all", single_blocks = "all", img_q = 0.0, img_k = 0.0, img_v = 0.0, txt_q = 0.0, txt_k = 0.0, txt_v = 0.0, img_q_norm = 0.0, img_k_norm = 0.0, img_v_norm = 0.0, txt_q_norm = 0.0, txt_k_norm = 0.0, txt_v_norm = 0.0, start_step = 0, end_step = 15, invert_mask = False, guide = None, mask = None, weights = None, guides = None, ): default_dtype = torch.float64 mask = 1-mask if mask is not None else None double_weights = parse_range_string(double_weights) single_weights = parse_range_string(single_weights) if len(double_weights) == 0: double_weights.append(0.0) if len(single_weights) == 0: single_weights.append(0.0) if len(double_weights) == 1: double_weights = double_weights * 100 if len(single_weights) == 1: single_weights = single_weights * 100 if type(double_weights[0]) == int: double_weights = [float(val) for val in double_weights] if type(single_weights[0]) == int: single_weights = [float(val) for val in single_weights] if double_blocks == "all": double_blocks = [val for val in range(100)] if len(double_weights) == 1: double_weights = [double_weights[0]] * 100 else: double_blocks = parse_range_string(double_blocks) weights_expanded = [0.0] * 100 for b, w in zip(double_blocks, double_weights): weights_expanded[b] = w double_weights = weights_expanded if single_blocks == "all": single_blocks = [val for val in range(100)] if len(single_weights) == 1: single_weights = [single_weights[0]] * 100 else: single_blocks = parse_range_string(single_blocks) weights_expanded = [0.0] * 100 for b, w in zip(single_blocks, single_weights): weights_expanded[b] = w single_weights = weights_expanded if end_step == -1: end_step = MAX_STEPS if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} if weight_scheduler == "constant": # and weights == None: weights = initialize_or_scale(None, weight, end_step).to(default_dtype) prepend = torch.zeros(start_step).to(weights) weights = torch.cat([prepend, weights]) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) guides = copy.deepcopy(guides) if guides is not None else {} guides['weight_attninj'] = weight guides['weights_attninj'] = weights guides['blocks_attninj_mmdit'] = { "double_weights": double_weights, "single_weights": single_weights, "double_blocks" : double_blocks, "single_blocks" : single_blocks, } guides['blocks_attninj_qkv'] = { "img_q": img_q, "img_k": img_k, "img_v": img_v, "txt_q": txt_q, "txt_k": txt_k, "txt_v": txt_v, "img_q_norm": img_q_norm, "img_k_norm": img_k_norm, "img_v_norm": img_v_norm, "txt_q_norm": txt_q_norm, "txt_k_norm": txt_k_norm, "txt_v_norm": txt_v_norm, } guides['guide_attninj'] = guide guides['mask_attninj'] = mask guides['weight_scheduler_attninj'] = weight_scheduler guides['start_step_attninj'] = start_step guides['end_step_attninj'] = end_step return (guides, ) class ClownGuide_StyleNorm_Advanced_HiDream: @classmethod def INPUT_TYPES(cls): return {"required": { "weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "double_blocks" : ("STRING", {"default": "all", "multiline": True}), "double_weights" : ("STRING", {"default": "1.0", "multiline": True}), "single_blocks" : ("STRING", {"default": "all", "multiline": True}), "single_weights" : ("STRING", {"default": "1.0", "multiline": True}), "mode": (["scattersort", "AdaIN"], {"default": "scattersort"},), "noise_mode": (["direct", "update", "smart", "recon", "bonanza"], {"default": "smart"},), #"shared_experts": ("BOOLEAN", {"default": False}), "ff_1" : ("BOOLEAN", {"default": False}), "ff_1_silu" : ("BOOLEAN", {"default": False}), "ff_3" : ("BOOLEAN", {"default": False}), "ff_13" : ("BOOLEAN", {"default": False}), "ff_2" : ("BOOLEAN", {"default": False}), "moe_gate" : ("BOOLEAN", {"default": False}), "topk_weight" : ("BOOLEAN", {"default": False}), "moe_ff_1" : ("BOOLEAN", {"default": False}), "moe_ff_1_silu" : ("BOOLEAN", {"default": False}), "moe_ff_3" : ("BOOLEAN", {"default": False}), "moe_ff_13" : ("BOOLEAN", {"default": False}), "moe_ff_2" : ("BOOLEAN", {"default": False}), "moe_sum" : ("BOOLEAN", {"default": False}), "moe_out" : ("BOOLEAN", {"default": False}), "double_img_io": ("BOOLEAN", {"default": False}), "double_img_norm0": ("BOOLEAN", {"default": False}), "double_img_attn": ("BOOLEAN", {"default": False}), "double_img_attn_gated": ("BOOLEAN", {"default": False}), "double_img": ("BOOLEAN", {"default": False}), "double_img_norm1": ("BOOLEAN", {"default": False}), "double_img_ff_i": ("BOOLEAN", {"default": False}), "double_txt_io": ("BOOLEAN", {"default": False}), "double_txt_norm0": ("BOOLEAN", {"default": False}), "double_txt_attn": ("BOOLEAN", {"default": False}), "double_txt_attn_gated": ("BOOLEAN", {"default": False}), "double_txt": ("BOOLEAN", {"default": False}), "double_txt_norm1": ("BOOLEAN", {"default": False}), "double_txt_ff_t": ("BOOLEAN", {"default": False}), "single_img_io": ("BOOLEAN", {"default": False}), "single_img_norm0": ("BOOLEAN", {"default": False}), "single_img_attn": ("BOOLEAN", {"default": False}), "single_img_attn_gated": ("BOOLEAN", {"default": False}), "single_img": ("BOOLEAN", {"default": False}), "single_img_norm1": ("BOOLEAN", {"default": False}), "single_img_ff_i": ("BOOLEAN", {"default": False}), "attn_img_q_norm" : ("BOOLEAN", {"default": False}), "attn_img_k_norm" : ("BOOLEAN", {"default": False}), "attn_img_v_norm" : ("BOOLEAN", {"default": False}), "attn_txt_q_norm" : ("BOOLEAN", {"default": False}), "attn_txt_k_norm" : ("BOOLEAN", {"default": False}), "attn_txt_v_norm" : ("BOOLEAN", {"default": False}), "attn_img_double" : ("BOOLEAN", {"default": False}), "attn_txt_double" : ("BOOLEAN", {"default": False}), "attn_img_single" : ("BOOLEAN", {"default": False}), "proj_out" : ("BOOLEAN", {"default": False}), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide": ("LATENT", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" EXPERIMENTAL = True def main(self, weight = 1.0, weight_scheduler = "constant", mode = "scattersort", noise_mode = "smart", double_weights = "0.1", single_weights = "0.0", double_blocks = "all", single_blocks = "all", start_step = 0, end_step = 15, invert_mask = False, moe_gate = False, topk_weight = False, moe_out = False, moe_sum = False, ff_1 = False, ff_1_silu = False, ff_3 = False, ff_13 = False, ff_2 = False, shared_experts = False, moe_ff_1 = False, moe_ff_1_silu = False, moe_ff_3 = False, moe_ff_13 = False, moe_ff_2 = False, double_img_io = False, double_img_norm0 = False, double_img_attn = False, double_img_norm1 = False, double_img_attn_gated = False, double_img = False, double_img_ff_i = False, double_txt_io = False, double_txt_norm0 = False, double_txt_attn = False, double_txt_attn_gated = False, double_txt = False, double_txt_norm1 = False, double_txt_ff_t = False, single_img_io = False, single_img_norm0 = False, single_img_attn = False, single_img_attn_gated = False, single_img = False, single_img_norm1 = False, single_img_ff_i = False, attn_img_q_norm = False, attn_img_k_norm = False, attn_img_v_norm = False, attn_txt_q_norm = False, attn_txt_k_norm = False, attn_txt_v_norm = False, attn_img_single = False, attn_img_double = False, attn_txt_double = False, proj_out = False, guide = None, mask = None, weights = None, guides = None, ): default_dtype = torch.float64 mask = 1-mask if mask is not None else None double_weights = parse_range_string(double_weights) single_weights = parse_range_string(single_weights) if len(double_weights) == 0: double_weights.append(0.0) if len(single_weights) == 0: single_weights.append(0.0) if len(double_weights) == 1: double_weights = double_weights * 100 if len(single_weights) == 1: single_weights = single_weights * 100 if type(double_weights[0]) == int: double_weights = [float(val) for val in double_weights] if type(single_weights[0]) == int: single_weights = [float(val) for val in single_weights] if double_blocks == "all": double_blocks = [val for val in range(100)] if len(double_weights) == 1: double_weights = [double_weights[0]] * 100 else: double_blocks = parse_range_string(double_blocks) weights_expanded = [0.0] * 100 for b, w in zip(double_blocks, double_weights): weights_expanded[b] = w double_weights = weights_expanded if single_blocks == "all": single_blocks = [val for val in range(100)] if len(single_weights) == 1: single_weights = [single_weights[0]] * 100 else: single_blocks = parse_range_string(single_blocks) weights_expanded = [0.0] * 100 for b, w in zip(single_blocks, single_weights): weights_expanded[b] = w single_weights = weights_expanded if end_step == -1: end_step = MAX_STEPS if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} if weight_scheduler == "constant": # and weights == None: weights = initialize_or_scale(None, weight, end_step).to(default_dtype) prepend = torch.zeros(start_step).to(weights) weights = torch.cat([prepend, weights]) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) guides = copy.deepcopy(guides) if guides is not None else {} guides['weight_adain'] = weight guides['weights_adain'] = weights guides['blocks_adain_mmdit'] = { "double_weights": double_weights, "single_weights": single_weights, "double_blocks" : double_blocks, "single_blocks" : single_blocks, } guides['sort_and_scatter'] = { "mode" : mode, "noise_mode" : noise_mode, "moe_gate" : moe_gate, "topk_weight" : topk_weight, "moe_sum" : moe_sum, "moe_out" : moe_out, "ff_1" : ff_1, "ff_1_silu" : ff_1_silu, "ff_3" : ff_3, "ff_13" : ff_13, "ff_2" : ff_2, "moe_ff_1" : moe_ff_1, "moe_ff_1_silu" : moe_ff_1_silu, "moe_ff_3" : moe_ff_3, "moe_ff_13" : moe_ff_13, "moe_ff_2" : moe_ff_2, "shared_experts" : shared_experts, "double_img_io" : double_img_io, "double_img_norm0" : double_img_norm0, "double_img_attn" : double_img_attn, "double_img_norm1" : double_img_norm1, "double_img_attn_gated" : double_img_attn_gated, "double_img" : double_img, "double_img_ff_i" : double_img_ff_i, "double_txt_io" : double_txt_io, "double_txt_norm0" : double_txt_norm0, "double_txt_attn" : double_txt_attn, "double_txt_attn_gated" : double_txt_attn_gated, "double_txt" : double_txt, "double_txt_norm1" : double_txt_norm1, "double_txt_ff_t" : double_txt_ff_t, "single_img_io" : single_img_io, "single_img_norm0" : single_img_norm0, "single_img_attn" : single_img_attn, "single_img_attn_gated" : single_img_attn_gated, "single_img" : single_img, "single_img_norm1" : single_img_norm1, "single_img_ff_i" : single_img_ff_i, "attn_img_q_norm" : attn_img_q_norm, "attn_img_k_norm" : attn_img_k_norm, "attn_img_v_norm" : attn_img_v_norm, "attn_txt_q_norm" : attn_txt_q_norm, "attn_txt_k_norm" : attn_txt_k_norm, "attn_txt_v_norm" : attn_txt_v_norm, "attn_img_single" : attn_img_single, "attn_img_double" : attn_img_double, "proj_out" : proj_out, } guides['guide_adain'] = guide guides['mask_adain'] = mask guides['weight_scheduler_adain'] = weight_scheduler guides['start_step_adain'] = start_step guides['end_step_adain'] = end_step return (guides, ) from ..style_transfer import StyleMMDiT_Model, StyleUNet_Model, DEFAULT_BLOCK_WEIGHTS_MMDIT, DEFAULT_ATTN_WEIGHTS_MMDIT, DEFAULT_BASE_WEIGHTS_MMDIT STYLE_MODES = [ "none", #"sinkhornsort", "scattersort_dir", "scattersort_dir2", "scattersort", "tiled_scattersort", "AdaIN", "tiled_AdaIN", "WCT", "WCT2", "injection", ] class ClownStyle_Boost: @classmethod def INPUT_TYPES(cls): return {"required": { "noise_mode": (["direct", "update", "smart", "recon", "bonanza"], {"default": "update"},), "recon_lure": (STYLE_MODES, {"default": "WCT", "tooltip": "Only used if noise_mode = recon. Can increase the strength of the style."},), "datashock": (STYLE_MODES, {"default": "scattersort", "tooltip": "Will drastically increase the strength at low denoise levels. Use with img2img workflows."},), "datashock_weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}), "datashock_start_step": ("INT", {"default": 0, "min": 0, "max": 10000, "step": 1, "tooltip": "Start step for data shock."}), "datashock_end_step" : ("INT", {"default": 1, "min": 1, "max": 10000, "step": 1, "tooltip": "End step for data shock."}), "tile_h" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "tile_w" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), }, "optional": { "guides": ("GUIDES", ), #"datashock_weights": ("SIGMAS",), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, noise_mode = "update", recon_lure = "default", datashock = None, datashock_weight = 1.0, datashock_start_step = None, datashock_end_step = None, tile_h = 0, tile_w = 0, guides = None, ): guides = copy.deepcopy(guides) if guides is not None else {} StyleMMDiT = guides.get('StyleMMDiT') if StyleMMDiT is None: StyleMMDiT = StyleMMDiT_Model() weights = { "h_tile" : tile_h // 16, "w_tile" : tile_w // 16, } StyleMMDiT.set_weights(**weights) StyleMMDiT.noise_mode = noise_mode StyleMMDiT.recon_lure = recon_lure StyleMMDiT.data_shock = datashock StyleMMDiT.data_shock_weight = datashock_weight StyleMMDiT.data_shock_start_step = datashock_start_step StyleMMDiT.data_shock_end_step = datashock_end_step guides['StyleMMDiT'] = StyleMMDiT return (guides,) #guides['StyleMMDiT'].noise_mode = noise_mode #guides['StyleMMDiT'].recon_lure = recon_lure #guides['StyleMMDiT'].data_shock = datashock #guides['StyleMMDiT'].data_shock_start_step = datashock_start_step #guides['StyleMMDiT'].data_shock_end_step = datashock_end_step #return (guides, ) class ClownStyle_MMDiT: @classmethod def INPUT_TYPES(cls): return {"required": { "mode": (STYLE_MODES, {"default": "scattersort"},), "proj_in": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "proj_out": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "tile_h" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "tile_w" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), #"start_step": ("INT", {"default": 0, "min": 16, "max": 10000, "step": 1, "tooltip": "Start step for data shock."}), #"end_step" : ("INT", {"default": 1, "min": 16, "max": 10000, "step": 1, "tooltip": "End step for data shock."}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "positive" : ("CONDITIONING", ), "negative" : ("CONDITIONING", ), "guide": ("LATENT", ), "mask": ("MASK", ), "blocks": ("BLOCKS", ), "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, mode = "scattersort", proj_in = 0.0, proj_out = 0.0, tile_h = 128, tile_w = 128, invert_mask = False, positive = None, negative = None, guide = None, mask = None, blocks = None, guides = None, ): #mask = 1-mask if mask is not None else None if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} guides = copy.deepcopy(guides) if guides is not None else {} blocks = copy.deepcopy(blocks) if blocks is not None else {} StyleMMDiT = blocks.get('StyleMMDiT') if StyleMMDiT is None: StyleMMDiT = StyleMMDiT_Model() weights = { "proj_in" : proj_in, "proj_out": proj_out, "h_tile" : tile_h // 16, "w_tile" : tile_w // 16, } StyleMMDiT.set_mode(mode) StyleMMDiT.set_weights(**weights) StyleMMDiT.set_conditioning(positive, negative) StyleMMDiT.mask = [mask] StyleMMDiT.guides = [guide] StyleMMDiT_ = guides.get('StyleMMDiT') if StyleMMDiT_ is not None: StyleMMDiT_.merge_weights(StyleMMDiT) else: StyleMMDiT_ = StyleMMDiT guides['StyleMMDiT'] = StyleMMDiT_ return (guides, ) class ClownStyle_Block_MMDiT: @classmethod def INPUT_TYPES(cls): return {"required": { "mode": (STYLE_MODES, {"default": "scattersort"},), "apply_to": (["img", "img+txt","img,txt", "txt",], {"default": "img+txt"},), "block_type": (["double", "double,single", "single"], {"default": "single"},), "block_list": ("STRING", {"default": "all", "multiline": True}), "block_weights": ("STRING", {"default": "1.0", "multiline": True}), "attn_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "attn_norm_mod": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "attn": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "attn_gated": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "attn_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "ff_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "ff_norm_mod": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "ff": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "ff_gated": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "ff_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "invert_mask": ("BOOLEAN",{"default": False}), }, "optional": { "mask": ("MASK", ), "blocks": ("BLOCKS", ), } } RETURN_TYPES = ("BLOCKS",) RETURN_NAMES = ("blocks",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, mode = "scattersort", noise_mode = "update", apply_to = "joint", block_type = "double", block_list = "all", block_weights = "1.0", attn_norm = 0.0, attn_norm_mod = 0.0, attn = 0.0, attn_gated = 0.0, attn_res = 0.0, ff_norm = 0.0, ff_norm_mod = 0.0, ff = 0.0, ff_gated = 0.0, ff_res = 0.0, tile_h = 128, tile_w = 128, invert_mask = False, Attn = None, MoE = None, FF = None, mask = None, blocks = None, ): #mask = 1-mask if mask is not None else None blocks = copy.deepcopy(blocks) if blocks is not None else {} block_weights = parse_range_string(block_weights) if len(block_weights) == 0: block_weights.append(0.0) if len(block_weights) == 1: block_weights = block_weights * 100 if type(block_weights[0]) == int: block_weights = [float(val) for val in block_weights] if "all" in block_list: block_list = [val for val in range(100)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "even" in block_list: block_list = [val for val in range(0, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "odd" in block_list: block_list = [val for val in range(1, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 else: block_list = parse_range_string_int(block_list) weights_expanded = [0.0] * 100 for b, w in zip(block_list, block_weights): weights_expanded[b] = w block_weights = weights_expanded StyleMMDiT = blocks.get('StyleMMDiT') if StyleMMDiT is None: StyleMMDiT = StyleMMDiT_Model() weights = { "attn_norm" : attn_norm, "attn_norm_mod": attn_norm_mod, "attn" : attn, "attn_gated" : attn_gated, "attn_res" : attn_res, "ff_norm" : ff_norm, "ff_norm_mod" : ff_norm_mod, "ff" : ff, "ff_gated" : ff_gated, "ff_res" : ff_res, "h_tile" : tile_h // 16, "w_tile" : tile_w // 16, } block_types = block_type.split(",") for block_type in block_types: if block_type == "double": style_blocks = StyleMMDiT.double_blocks elif block_type == "single": style_blocks = StyleMMDiT.single_blocks for bid in block_list: block = style_blocks[bid] scaled_weights = { k: (v * block_weights[bid]) if isinstance(v, float) else v for k, v in weights.items() } if "img" in apply_to or block_type == "single": block.img.set_mode(mode) block.img.set_weights(**scaled_weights) block.img.apply_to = [apply_to] if "txt" in apply_to and block_type == "double": mode = "scattersort" if mode == "tiled_scattersort" else mode mode = "AdaIN" if mode == "tiled_AdaIN" else mode block.txt.set_mode(mode) block.txt.set_weights(**scaled_weights) block.txt.apply_to = [apply_to] block.img.apply_to = [apply_to] if hasattr(block, "txt"): block.txt.apply_to = [apply_to] block.mask = [mask] blocks['StyleMMDiT'] = StyleMMDiT return (blocks, ) class ClownStyle_Attn_MMDiT: @classmethod def INPUT_TYPES(cls): return {"required": { "mode": (STYLE_MODES, {"default": "scattersort"},), "apply_to": (["img","img+txt","img,txt","txt"], {"default": "img+txt"},), "block_type": (["double", "double,single", "single"], {"default": "single"},), "block_list": ("STRING", {"default": "all", "multiline": True}), "block_weights": ("STRING", {"default": "1.0", "multiline": True}), "q_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "k_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "v_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "q_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "k_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "out": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "mask": ("MASK", ), "blocks": ("BLOCKS", ), } } RETURN_TYPES = ("BLOCKS",) RETURN_NAMES = ("blocks",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, mode = "scattersort", noise_mode = "update", apply_to = "joint", block_type = "double", block_list = "all", block_weights = "1.0", q_proj = 0.0, k_proj = 0.0, v_proj = 0.0, q_norm = 0.0, k_norm = 0.0, out = 0.0, tile_h = 128, tile_w = 128, invert_mask = False, mask = None, blocks = None, ): #mask = 1-mask if mask is not None else None blocks = copy.deepcopy(blocks) if blocks is not None else {} block_weights = parse_range_string(block_weights) if len(block_weights) == 0: block_weights.append(0.0) if len(block_weights) == 1: block_weights = block_weights * 100 if type(block_weights[0]) == int: block_weights = [float(val) for val in block_weights] if "all" in block_list: block_list = [val for val in range(100)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "even" in block_list: block_list = [val for val in range(0, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "odd" in block_list: block_list = [val for val in range(1, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 else: block_list = parse_range_string_int(block_list) weights_expanded = [0.0] * 100 for b, w in zip(block_list, block_weights): weights_expanded[b] = w block_weights = weights_expanded StyleMMDiT = blocks.get('StyleMMDiT') if StyleMMDiT is None: StyleMMDiT = StyleMMDiT_Model() weights = { "q_proj": q_proj, "k_proj": k_proj, "v_proj": v_proj, "q_norm": q_norm, "k_norm": k_norm, "out" : out, "h_tile": tile_h // 16, "w_tile": tile_w // 16, } block_types = block_type.split(",") for block_type in block_types: if block_type == "double": style_blocks = StyleMMDiT.double_blocks elif block_type == "single": style_blocks = StyleMMDiT.single_blocks for bid in block_list: block = style_blocks[bid] scaled_weights = { k: (v * block_weights[bid]) if isinstance(v, float) else v for k, v in weights.items() } if "img" in apply_to or block_type == "single": block.img.ATTN.set_mode(mode) block.img.ATTN.set_weights(**scaled_weights) block.img.ATTN.apply_to = [apply_to] if "txt" in apply_to and block_type == "double": mode = "scattersort" if mode == "tiled_scattersort" else mode mode = "AdaIN" if mode == "tiled_AdaIN" else mode block.txt.ATTN.set_mode(mode) block.txt.ATTN.set_weights(**scaled_weights) block.txt.ATTN.apply_to = [apply_to] block.img.ATTN.apply_to = [apply_to] if hasattr(block, "txt"): block.txt.ATTN.apply_to = [apply_to] block.attn_mask = [mask] blocks['StyleMMDiT'] = StyleMMDiT return (blocks, ) class ClownStyle_UNet: @classmethod def INPUT_TYPES(cls): return {"required": { "mode": (STYLE_MODES, {"default": "scattersort"},), "proj_in": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "proj_out": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "tile_h" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "tile_w" : ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), #"start_step": ("INT", {"default": 0, "min": 16, "max": 10000, "step": 1, "tooltip": "Start step for data shock."}), #"end_step" : ("INT", {"default": 1, "min": 16, "max": 10000, "step": 1, "tooltip": "End step for data shock."}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "positive" : ("CONDITIONING", ), "negative" : ("CONDITIONING", ), "guide": ("LATENT", ), "mask": ("MASK", ), "blocks": ("BLOCKS", ), "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, mode = "scattersort", proj_in = 0.0, proj_out = 0.0, tile_h = 128, tile_w = 128, invert_mask = False, positive = None, negative = None, guide = None, mask = None, blocks = None, guides = None, ): #mask = 1-mask if mask is not None else None if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} guides = copy.deepcopy(guides) if guides is not None else {} blocks = copy.deepcopy(blocks) if blocks is not None else {} StyleMMDiT = blocks.get('StyleMMDiT') if StyleMMDiT is None: StyleMMDiT = StyleUNet_Model() weights = { "proj_in" : proj_in, "proj_out": proj_out, "h_tile" : tile_h // 8, "w_tile" : tile_w // 8, } StyleMMDiT.set_mode(mode) StyleMMDiT.set_weights(**weights) StyleMMDiT.set_conditioning(positive, negative) StyleMMDiT.mask = [mask] StyleMMDiT.guides = [guide] StyleMMDiT_ = guides.get('StyleMMDiT') if StyleMMDiT_ is not None: StyleMMDiT_.merge_weights(StyleMMDiT) else: StyleMMDiT_ = StyleMMDiT guides['StyleMMDiT'] = StyleMMDiT_ return (guides, ) UNET_BLOCK_TYPES = [ "input", "middle", "output", "input,middle", "input,output", "middle,output", "input,middle,output", ] class ClownStyle_Block_UNet: @classmethod def INPUT_TYPES(cls): return {"required": { "mode": (STYLE_MODES, {"default": "scattersort"},), #"apply_to": (["img", "img+txt","img,txt", "txt",], {"default": "img+txt"},), "block_type": (UNET_BLOCK_TYPES, {"default": "input"},), "block_list": ("STRING", {"default": "all", "multiline": True}), "block_weights": ("STRING", {"default": "1.0", "multiline": True}), "resample": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "spatial": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "invert_mask": ("BOOLEAN",{"default": False}), }, "optional": { "mask": ("MASK", ), "blocks": ("BLOCKS", ), } } RETURN_TYPES = ("BLOCKS",) RETURN_NAMES = ("blocks",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, mode = "scattersort", noise_mode = "update", apply_to = "", block_type = "input", block_list = "all", block_weights = "1.0", resample = 0.0, res = 0.0, spatial = 0.0, tile_h = 128, tile_w = 128, invert_mask = False, mask = None, blocks = None, ): #mask = 1-mask if mask is not None else None blocks = copy.deepcopy(blocks) if blocks is not None else {} block_weights = parse_range_string(block_weights) if len(block_weights) == 0: block_weights.append(0.0) if len(block_weights) == 1: block_weights = block_weights * 100 if type(block_weights[0]) == int: block_weights = [float(val) for val in block_weights] if "all" in block_list: block_list = [val for val in range(100)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "even" in block_list: block_list = [val for val in range(0, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "odd" in block_list: block_list = [val for val in range(1, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 else: block_list = parse_range_string_int(block_list) weights_expanded = [0.0] * 100 for b, w in zip(block_list, block_weights): weights_expanded[b] = w block_weights = weights_expanded StyleMMDiT = blocks.get('StyleMMDiT') if StyleMMDiT is None: StyleMMDiT = StyleUNet_Model() weights = { "resample": resample, "res": res, "spatial": spatial, "h_tile" : tile_h // 16, "w_tile" : tile_w // 16, } block_types = block_type.split(",") for block_type in block_types: if block_type == "input": style_blocks = StyleMMDiT.input_blocks elif block_type == "middle": style_blocks = StyleMMDiT.middle_blocks elif block_type == "output": style_blocks = StyleMMDiT.output_blocks for bid in block_list: block = style_blocks[bid] scaled_weights = { k: (v * block_weights[bid]) if isinstance(v, float) else v for k, v in weights.items() } block.set_mode(mode) block.set_weights(**scaled_weights) block.apply_to = [apply_to] block.mask = [mask] blocks['StyleMMDiT'] = StyleMMDiT return (blocks, ) class ClownStyle_Attn_UNet: @classmethod def INPUT_TYPES(cls): return {"required": { "mode": (STYLE_MODES, {"default": "scattersort"},), "apply_to": (["self","self,cross","cross"], {"default": "self"},), "block_type": (UNET_BLOCK_TYPES, {"default": "input"},), "block_list": ("STRING", {"default": "all", "multiline": True}), "block_weights": ("STRING", {"default": "1.0", "multiline": True}), "q_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "k_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "v_proj": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "out": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "mask": ("MASK", ), "blocks": ("BLOCKS", ), } } RETURN_TYPES = ("BLOCKS",) RETURN_NAMES = ("blocks",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, mode = "scattersort", noise_mode = "update", apply_to = "self", block_type = "input", block_list = "all", block_weights = "1.0", q_proj = 0.0, k_proj = 0.0, v_proj = 0.0, out = 0.0, tile_h = 128, tile_w = 128, invert_mask = False, mask = None, blocks = None, ): #mask = 1-mask if mask is not None else None blocks = copy.deepcopy(blocks) if blocks is not None else {} block_weights = parse_range_string(block_weights) if len(block_weights) == 0: block_weights.append(0.0) if len(block_weights) == 1: block_weights = block_weights * 100 if type(block_weights[0]) == int: block_weights = [float(val) for val in block_weights] if "all" in block_list: block_list = [val for val in range(100)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "even" in block_list: block_list = [val for val in range(0, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "odd" in block_list: block_list = [val for val in range(1, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 else: block_list = parse_range_string_int(block_list) weights_expanded = [0.0] * 100 for b, w in zip(block_list, block_weights): weights_expanded[b] = w block_weights = weights_expanded StyleMMDiT = blocks.get('StyleMMDiT') if StyleMMDiT is None: StyleMMDiT = StyleUNet_Model() weights = { "q_proj": q_proj, "k_proj": k_proj, "v_proj": v_proj, "out" : out, "h_tile": tile_h // 8, "w_tile": tile_w // 8, } block_types = block_type.split(",") for block_type in block_types: if block_type == "input": style_blocks = StyleMMDiT.input_blocks elif block_type == "middle": style_blocks = StyleMMDiT.middle_blocks elif block_type == "output": style_blocks = StyleMMDiT.output_blocks for bid in block_list: block = style_blocks[bid] scaled_weights = { k: (v * block_weights[bid]) if isinstance(v, float) else v for k, v in weights.items() } #for tfmr_block in block.spatial_block.TFMR: tfmr_block = block.spatial_block.TFMR if "self" in apply_to: tfmr_block.ATTN1.set_mode(mode) tfmr_block.ATTN1.set_weights(**scaled_weights) tfmr_block.ATTN1.apply_to = [apply_to] if "cross" in apply_to: tfmr_block.ATTN2.set_mode(mode) tfmr_block.ATTN2.set_weights(**scaled_weights) tfmr_block.ATTN2.apply_to = [apply_to] block.attn_mask = [mask] blocks['StyleMMDiT'] = StyleMMDiT return (blocks, ) class ClownStyle_ResBlock_UNet: @classmethod def INPUT_TYPES(cls): return {"required": { "mode": (STYLE_MODES, {"default": "scattersort"},), #"apply_to": (["img", "img+txt","img,txt", "txt",], {"default": "img+txt"},), "block_type": (UNET_BLOCK_TYPES, {"default": "input"},), "block_list": ("STRING", {"default": "all", "multiline": True}), "block_weights": ("STRING", {"default": "1.0", "multiline": True}), "in_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "in_silu": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "in_conv": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "emb_silu": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "emb_linear": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "emb_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "out_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "out_silu": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "out_conv": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "residual": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "invert_mask": ("BOOLEAN",{"default": False}), }, "optional": { "mask": ("MASK", ), "blocks": ("BLOCKS", ), } } RETURN_TYPES = ("BLOCKS",) RETURN_NAMES = ("blocks",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, mode = "scattersort", noise_mode = "update", apply_to = "", block_type = "input", block_list = "all", block_weights = "1.0", in_norm = 0.0, in_silu = 0.0, in_conv = 0.0, emb_silu = 0.0, emb_linear = 0.0, emb_res = 0.0, out_norm = 0.0, out_silu = 0.0, out_conv = 0.0, residual = 0.0, tile_h = 128, tile_w = 128, invert_mask = False, mask = None, blocks = None, ): #mask = 1-mask if mask is not None else None blocks = copy.deepcopy(blocks) if blocks is not None else {} block_weights = parse_range_string(block_weights) if len(block_weights) == 0: block_weights.append(0.0) if len(block_weights) == 1: block_weights = block_weights * 100 if type(block_weights[0]) == int: block_weights = [float(val) for val in block_weights] if "all" in block_list: block_list = [val for val in range(100)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "even" in block_list: block_list = [val for val in range(0, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "odd" in block_list: block_list = [val for val in range(1, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 else: block_list = parse_range_string_int(block_list) weights_expanded = [0.0] * 100 for b, w in zip(block_list, block_weights): weights_expanded[b] = w block_weights = weights_expanded StyleMMDiT = blocks.get('StyleMMDiT') if StyleMMDiT is None: StyleMMDiT = StyleUNet_Model() weights = { "in_norm": in_norm, "in_silu": in_silu, "in_conv": in_conv, "emb_silu": emb_silu, "emb_linear": emb_linear, "emb_res": emb_res, "out_norm": out_norm, "out_silu": out_silu, "out_conv": out_conv, "residual": residual, "h_tile": tile_h // 8, "w_tile": tile_w // 8, } block_types = block_type.split(",") for block_type in block_types: if block_type == "input": style_blocks = StyleMMDiT.input_blocks elif block_type == "middle": style_blocks = StyleMMDiT.middle_blocks elif block_type == "output": style_blocks = StyleMMDiT.output_blocks for bid in block_list: block = style_blocks[bid] scaled_weights = { k: (v * block_weights[bid]) if isinstance(v, float) else v for k, v in weights.items() } block.res_block.set_mode(mode) block.res_block.set_weights(**scaled_weights) block.res_block.apply_to = [apply_to] block.res_block.mask = [mask] blocks['StyleMMDiT'] = StyleMMDiT return (blocks, ) class ClownStyle_SpatialBlock_UNet: @classmethod def INPUT_TYPES(cls): return {"required": { "mode": (STYLE_MODES, {"default": "scattersort"},), #"apply_to": (["img", "img+txt","img,txt", "txt",], {"default": "img+txt"},), "block_type": (UNET_BLOCK_TYPES, {"default": "input"},), "block_list": ("STRING", {"default": "all", "multiline": True}), "block_weights": ("STRING", {"default": "1.0", "multiline": True}), "norm_in": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "proj_in": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "transformer_block": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "transformer": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "proj_out": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "invert_mask": ("BOOLEAN",{"default": False}), }, "optional": { "mask": ("MASK", ), "blocks": ("BLOCKS", ), } } RETURN_TYPES = ("BLOCKS",) RETURN_NAMES = ("blocks",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, mode = "scattersort", noise_mode = "update", apply_to = "", block_type = "input", block_list = "all", block_weights = "1.0", norm_in = 0.0, proj_in = 0.0, transformer_block = 0.0, transformer = 0.0, proj_out = 0.0, res = 0.0, tile_h = 128, tile_w = 128, invert_mask = False, mask = None, blocks = None, ): spatial_norm_in = norm_in spatial_proj_in = proj_in spatial_transformer_block = transformer_block spatial_transformer = transformer spatial_proj_out = proj_out spatial_res = res #mask = 1-mask if mask is not None else None blocks = copy.deepcopy(blocks) if blocks is not None else {} block_weights = parse_range_string(block_weights) if len(block_weights) == 0: block_weights.append(0.0) if len(block_weights) == 1: block_weights = block_weights * 100 if type(block_weights[0]) == int: block_weights = [float(val) for val in block_weights] if "all" in block_list: block_list = [val for val in range(100)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "even" in block_list: block_list = [val for val in range(0, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "odd" in block_list: block_list = [val for val in range(1, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 else: block_list = parse_range_string_int(block_list) weights_expanded = [0.0] * 100 for b, w in zip(block_list, block_weights): weights_expanded[b] = w block_weights = weights_expanded StyleMMDiT = blocks.get('StyleMMDiT') if StyleMMDiT is None: StyleMMDiT = StyleUNet_Model() weights = { "spatial_norm_in" : spatial_norm_in, "spatial_proj_in" : spatial_proj_in, "spatial_transformer_block": spatial_transformer_block, "spatial_transformer": spatial_transformer, "spatial_proj_out" : spatial_proj_out, "spatial_res" : spatial_res, "h_tile": tile_h // 8, "w_tile": tile_w // 8, } block_types = block_type.split(",") for block_type in block_types: if block_type == "input": style_blocks = StyleMMDiT.input_blocks elif block_type == "middle": style_blocks = StyleMMDiT.middle_blocks elif block_type == "output": style_blocks = StyleMMDiT.output_blocks for bid in block_list: block = style_blocks[bid] scaled_weights = { k: (v * block_weights[bid]) if isinstance(v, float) else v for k, v in weights.items() } block.spatial_block.set_mode(mode) block.spatial_block.set_weights(**scaled_weights) block.spatial_block.apply_to = [apply_to] block.spatial_block.mask = [mask] blocks['StyleMMDiT'] = StyleMMDiT return (blocks, ) class ClownStyle_TransformerBlock_UNet: @classmethod def INPUT_TYPES(cls): return {"required": { "mode": (STYLE_MODES, {"default": "scattersort"},), #"apply_to": (["img", "img+txt","img,txt", "txt",], {"default": "img+txt"},), "block_type": (UNET_BLOCK_TYPES, {"default": "input"},), "block_list": ("STRING", {"default": "all", "multiline": True}), "block_weights": ("STRING", {"default": "1.0", "multiline": True}), "norm1": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "norm2": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "norm3": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "self_attn": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "cross_attn": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "ff": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "self_attn_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "cross_attn_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "ff_res": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Strength of effect on layer; skips extra calculation if set to 0.0. Skips interpolation if set to 1.0."}), "tile_h": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "tile_w": ("INT", {"default": 128, "min": 16, "max": 10000, "step": 16, "tooltip": "Tile size for tiled modes. Lower values will transfer composition more effectively. Dimensions of image must be divisible by this value."}), "invert_mask": ("BOOLEAN",{"default": False}), }, "optional": { "mask": ("MASK", ), "blocks": ("BLOCKS", ), } } RETURN_TYPES = ("BLOCKS",) RETURN_NAMES = ("blocks",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, mode = "scattersort", noise_mode = "update", apply_to = "", block_type = "input", block_list = "all", block_weights = "1.0", norm1 = 0.0, norm2 = 0.0, norm3 = 0.0, self_attn = 0.0, cross_attn = 0.0, ff = 0.0, self_attn_res = 0.0, cross_attn_res = 0.0, ff_res = 0.0, tile_h = 128, tile_w = 128, invert_mask = False, mask = None, blocks = None, ): #mask = 1-mask if mask is not None else None blocks = copy.deepcopy(blocks) if blocks is not None else {} block_weights = parse_range_string(block_weights) if len(block_weights) == 0: block_weights.append(0.0) if len(block_weights) == 1: block_weights = block_weights * 100 if type(block_weights[0]) == int: block_weights = [float(val) for val in block_weights] if "all" in block_list: block_list = [val for val in range(100)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "even" in block_list: block_list = [val for val in range(0, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 elif "odd" in block_list: block_list = [val for val in range(1, 100, 2)] if len(block_weights) == 1: block_weights = [block_weights[0]] * 100 else: block_list = parse_range_string_int(block_list) weights_expanded = [0.0] * 100 for b, w in zip(block_list, block_weights): weights_expanded[b] = w block_weights = weights_expanded StyleMMDiT = blocks.get('StyleMMDiT') if StyleMMDiT is None: StyleMMDiT = StyleUNet_Model() weights = { "norm1" : norm1, "norm2" : norm2, "norm3" : norm3, "self_attn" : self_attn, "cross_attn": cross_attn, "ff" : ff, "self_attn_res" : self_attn_res, "cross_attn_res": cross_attn_res, "ff_res" : ff_res, "h_tile": tile_h // 8, "w_tile": tile_w // 8, } block_types = block_type.split(",") for block_type in block_types: if block_type == "input": style_blocks = StyleMMDiT.input_blocks elif block_type == "middle": style_blocks = StyleMMDiT.middle_blocks elif block_type == "output": style_blocks = StyleMMDiT.output_blocks for bid in block_list: block = style_blocks[bid] scaled_weights = { k: (v * block_weights[bid]) if isinstance(v, float) else v for k, v in weights.items() } block.spatial_block.TFMR.set_mode(mode) block.spatial_block.TFMR.set_weights(**scaled_weights) block.spatial_block.TFMR.apply_to = [apply_to] block.spatial_block.TFMR.mask = [mask] blocks['StyleMMDiT'] = StyleMMDiT return (blocks, ) ================================================ FILE: chroma/layers.py ================================================ import torch from torch import Tensor, nn #from comfy.ldm.flux.math import attention from comfy.ldm.flux.layers import ( MLPEmbedder, RMSNorm, QKNorm, SelfAttention, ModulationOut, ) from .math import attention, rope, apply_rope class ChromaModulationOut(ModulationOut): @classmethod def from_offset(cls, tensor: torch.Tensor, offset: int = 0) -> ModulationOut: return cls( shift=tensor[:, offset : offset + 1, :], scale=tensor[:, offset + 1 : offset + 2, :], gate=tensor[:, offset + 2 : offset + 3, :], ) class Approximator(nn.Module): def __init__(self, in_dim: int, out_dim: int, hidden_dim: int, n_layers = 5, dtype=None, device=None, operations=None): super().__init__() self.in_proj = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device) self.layers = nn.ModuleList([MLPEmbedder(hidden_dim, hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)]) self.norms = nn.ModuleList([RMSNorm(hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)]) self.out_proj = operations.Linear(hidden_dim, out_dim, dtype=dtype, device=device) @property def device(self): # Get the device of the module (assumes all parameters are on the same device) return next(self.parameters()).device def forward(self, x: Tensor) -> Tensor: x = self.in_proj(x) for layer, norms in zip(self.layers, self.norms): x = x + layer(norms(x)) x = self.out_proj(x) return x class ReChromaDoubleStreamBlock(nn.Module): def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, dtype=None, device=None, operations=None): super().__init__() mlp_hidden_dim = int(hidden_size * mlp_ratio) self.num_heads = num_heads self.hidden_size = hidden_size self.img_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) self.img_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.img_mlp = nn.Sequential( operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device), nn.GELU(approximate="tanh"), operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device), ) self.txt_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) self.txt_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.txt_mlp = nn.Sequential( operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device), nn.GELU(approximate="tanh"), operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device), ) self.flipped_img_txt = flipped_img_txt def forward(self, img: Tensor, txt: Tensor, pe: Tensor, vec: Tensor, attn_mask=None): (img_mod1, img_mod2), (txt_mod1, txt_mod2) = vec # prepare image for attention img_modulated = self.img_norm1(img) img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift img_qkv = self.img_attn.qkv(img_modulated) img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) img_q, img_k = self.img_attn.norm(img_q, img_k, img_v) # prepare txt for attention txt_modulated = self.txt_norm1(txt) txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift txt_qkv = self.txt_attn.qkv(txt_modulated) txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v) # run actual attention attn = attention(torch.cat((txt_q, img_q), dim=2), torch.cat((txt_k, img_k), dim=2), torch.cat((txt_v, img_v), dim=2), pe=pe, mask=attn_mask) txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :] # calculate the img bloks img = img + img_mod1.gate * self.img_attn.proj(img_attn) img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift) # calculate the txt bloks txt += txt_mod1.gate * self.txt_attn.proj(txt_attn) txt += txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift) if txt.dtype == torch.float16: txt = torch.nan_to_num(txt, nan=0.0, posinf=65504, neginf=-65504) return img, txt class ReChromaSingleStreamBlock(nn.Module): """ A DiT block with parallel linear layers as described in https://arxiv.org/abs/2302.05442 and adapted modulation interface. """ def __init__( self, hidden_size: int, num_heads: int, mlp_ratio: float = 4.0, qk_scale: float = None, dtype=None, device=None, operations=None ): super().__init__() self.hidden_dim = hidden_size self.num_heads = num_heads head_dim = hidden_size // num_heads self.scale = qk_scale or head_dim**-0.5 self.mlp_hidden_dim = int(hidden_size * mlp_ratio) # qkv and mlp_in self.linear1 = operations.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim, dtype=dtype, device=device) # proj and mlp_out self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, dtype=dtype, device=device) self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations) self.hidden_size = hidden_size self.pre_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.mlp_act = nn.GELU(approximate="tanh") def forward(self, x: Tensor, pe: Tensor, vec: Tensor, attn_mask=None) -> Tensor: mod = vec x_mod = (1 + mod.scale) * self.pre_norm(x) + mod.shift qkv, mlp = torch.split(self.linear1(x_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1) q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) q, k = self.norm(q, k, v) # compute attention attn = attention(q, k, v, pe=pe, mask=attn_mask) # compute activation in mlp stream, cat again and run second linear layer output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2)) x += mod.gate * output if x.dtype == torch.float16: x = torch.nan_to_num(x, nan=0.0, posinf=65504, neginf=-65504) return x class LastLayer(nn.Module): def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None): super().__init__() self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.linear = operations.Linear(hidden_size, out_channels, bias=True, dtype=dtype, device=device) def forward(self, x: Tensor, vec: Tensor) -> Tensor: shift, scale = vec shift = shift.squeeze(1) scale = scale.squeeze(1) x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :] x = self.linear(x) return x ================================================ FILE: chroma/math.py ================================================ import torch from einops import rearrange from torch import Tensor from comfy.ldm.modules.attention import attention_pytorch import comfy.model_management def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor: q, k = apply_rope(q, k, pe) heads = q.shape[1] x = attention_pytorch(q, k, v, heads, skip_reshape=True, mask=mask) #if mask is not None: # x = attention_pytorch(q, k, v, heads, skip_reshape=True, mask=mask) #else: # from comfy.ldm.modules.attention import optimized_attention # x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=None) return x def rope(pos: Tensor, dim: int, theta: int) -> Tensor: assert dim % 2 == 0 if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled(): device = torch.device("cpu") else: device = pos.device scale = torch.linspace(0, (dim - 2) / dim, steps=dim//2, dtype=torch.float64, device=device) omega = 1.0 / (theta**scale) out = torch.einsum("...n,d->...nd", pos.to(dtype=torch.float32, device=device), omega) out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1) out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2) return out.to(dtype=torch.float32, device=pos.device) def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor): xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2) xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2) xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1] return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) ================================================ FILE: chroma/model.py ================================================ #Original code can be found on: https://github.com/black-forest-labs/flux from dataclasses import dataclass import torch import torch.nn.functional as F from torch import Tensor, nn from einops import rearrange, repeat import comfy.ldm.common_dit from ..helper import ExtraOptions from ..latents import tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d from ..style_transfer import apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch from comfy.ldm.flux.layers import ( EmbedND, timestep_embedding, ) from .layers import ( ReChromaDoubleStreamBlock, LastLayer, ReChromaSingleStreamBlock, Approximator, ChromaModulationOut, ) @dataclass class ChromaParams: in_channels : int out_channels : int context_in_dim : int hidden_size : int mlp_ratio : float num_heads : int depth : int depth_single_blocks: int axes_dim : list theta : int patch_size : int qkv_bias : bool in_dim : int out_dim : int hidden_dim : int n_layers : int class ReChroma(nn.Module): """ Transformer model for flow matching on sequences. """ def __init__(self, image_model=None, final_layer=True, dtype=None, device=None, operations=None, **kwargs): super().__init__() self.dtype = dtype params = ChromaParams(**kwargs) self.params = params self.patch_size = params.patch_size self.in_channels = params.in_channels self.out_channels = params.out_channels if params.hidden_size % params.num_heads != 0: raise ValueError( f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}" ) pe_dim = params.hidden_size // params.num_heads if sum(params.axes_dim) != pe_dim: raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}") self.hidden_size = params.hidden_size self.num_heads = params.num_heads self.in_dim = params.in_dim self.out_dim = params.out_dim self.hidden_dim = params.hidden_dim self.n_layers = params.n_layers self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim) self.img_in = operations.Linear(self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device) self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, dtype=dtype, device=device) # set as nn identity for now, will overwrite it later. self.distilled_guidance_layer = Approximator( in_dim=self.in_dim, hidden_dim=self.hidden_dim, out_dim=self.out_dim, n_layers=self.n_layers, dtype=dtype, device=device, operations=operations ) self.double_blocks = nn.ModuleList( [ ReChromaDoubleStreamBlock( self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, qkv_bias=params.qkv_bias, dtype=dtype, device=device, operations=operations ) for _ in range(params.depth) ] ) self.single_blocks = nn.ModuleList( [ ReChromaSingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, dtype=dtype, device=device, operations=operations) for _ in range(params.depth_single_blocks) ] ) if final_layer: self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels, dtype=dtype, device=device, operations=operations) self.skip_mmdit = [] self.skip_dit = [] self.lite = False def get_modulations(self, tensor: torch.Tensor, block_type: str, *, idx: int = 0): # This function slices up the modulations tensor which has the following layout: # single : num_single_blocks * 3 elements # double_img : num_double_blocks * 6 elements # double_txt : num_double_blocks * 6 elements # final : 2 elements if block_type == "final": return (tensor[:, -2:-1, :], tensor[:, -1:, :]) single_block_count = self.params.depth_single_blocks double_block_count = self.params.depth offset = 3 * idx if block_type == "single": return ChromaModulationOut.from_offset(tensor, offset) # Double block modulations are 6 elements so we double 3 * idx. offset *= 2 if block_type in {"double_img", "double_txt"}: # Advance past the single block modulations. offset += 3 * single_block_count if block_type == "double_txt": # Advance past the double block img modulations. offset += 6 * double_block_count return ( ChromaModulationOut.from_offset(tensor, offset), ChromaModulationOut.from_offset(tensor, offset + 3), ) raise ValueError("Bad block_type") def forward_blocks( self, img : Tensor, img_ids : Tensor, txt : Tensor, txt_ids : Tensor, timesteps : Tensor, guidance : Tensor = None, control = None, update_cross_attn = None, transformer_options ={}, attn_mask : Tensor = None, UNCOND : bool = False, ) -> Tensor: patches_replace = transformer_options.get("patches_replace", {}) if img.ndim != 3 or txt.ndim != 3: raise ValueError("Input img and txt tensors must have 3 dimensions.") # running on sequences img img = self.img_in(img) # distilled vector guidance mod_index_length = 344 distill_timestep = timestep_embedding(timesteps.detach().clone(), 16).to(img.device, img.dtype) # guidance = guidance * distil_guidance = timestep_embedding(guidance.detach().clone(), 16).to(img.device, img.dtype) # get all modulation index modulation_index = timestep_embedding(torch.arange(mod_index_length), 32).to(img.device, img.dtype) # we need to broadcast the modulation index here so each batch has all of the index modulation_index = modulation_index.unsqueeze(0).repeat(img.shape[0], 1, 1).to(img.device, img.dtype) # and we need to broadcast timestep and guidance along too timestep_guidance = torch.cat([distill_timestep, distil_guidance], dim=1).unsqueeze(1).repeat(1, mod_index_length, 1).to(img.dtype).to(img.device, img.dtype) # then and only then we could concatenate it together input_vec = torch.cat([timestep_guidance, modulation_index], dim=-1).to(img.device, img.dtype) mod_vectors = self.distilled_guidance_layer(input_vec) txt = self.txt_in(txt) ids = torch.cat((txt_ids, img_ids), dim=1) pe = self.pe_embedder(ids) weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0) floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0) mask_zero = None mask = None text_len = txt.shape[1] # mask_obj[0].text_len if not UNCOND and 'AttnMask' in transformer_options: # and weight != 0: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') if mask_zero is None: mask_zero = torch.ones_like(mask) img_len = transformer_options['AttnMask'].img_len #mask_zero[:text_len, :text_len] = mask[:text_len, :text_len] mask_zero[:text_len, :] = mask[:text_len, :] mask_zero[:, :text_len] = mask[:, :text_len] if weight == 0: mask = None if UNCOND and 'AttnMask_neg' in transformer_options: # and weight != 0: AttnMask = transformer_options['AttnMask_neg'] if mask_zero is None: mask_zero = torch.ones_like(mask) img_len = transformer_options['AttnMask_neg'].img_len #mask_zero[:text_len, :text_len] = mask[:text_len, :text_len] mask_zero[:text_len, :] = mask[:text_len, :] mask_zero[:, :text_len] = mask[:, :text_len] if weight == 0: mask = None elif UNCOND and 'AttnMask' in transformer_options: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') if mask_zero is None: mask_zero = torch.ones_like(mask) img_len = transformer_options['AttnMask'].img_len #mask_zero[:text_len, :text_len] = mask[:text_len, :text_len] mask_zero[:text_len, :] = mask[:text_len, :] mask_zero[:, :text_len] = mask[:, :text_len] if weight == 0: mask = None if mask is not None and not type(mask[0][0].item()) == bool: mask = mask.to(img.dtype) if mask_zero is not None and not type(mask_zero[0][0].item()) == bool: mask_zero = mask_zero.to(img.dtype) total_layers = len(self.double_blocks) + len(self.single_blocks) attn_mask = mask if attn_mask is None else attn_mask blocks_replace = patches_replace.get("dit", {}) for i, block in enumerate(self.double_blocks): if i not in self.skip_mmdit: double_mod = ( self.get_modulations(mod_vectors, "double_img", idx=i), self.get_modulations(mod_vectors, "double_txt", idx=i), ) if ("double_block", i) in blocks_replace: def block_wrap(args): out = {} out["img"], out["txt"] = block( img = args["img"], txt = args["txt"], vec = args["vec"], pe = args["pe"], attn_mask = args.get("attn_mask")) return out out = blocks_replace[("double_block", i)]({ "img" : img, "txt" : txt, "vec" : double_mod, "pe" : pe, "attn_mask" : attn_mask}, {"original_block" : block_wrap}) txt = out["txt"] img = out["img"] else: if weight > 0 and mask is not None and weight <= i/total_layers: img, txt = block(img=img, txt=txt, vec=double_mod, pe=pe, attn_mask=mask_zero) elif (weight < 0 and mask is not None and abs(weight) <= (1 - i/total_layers)): img_tmpZ, txt_tmpZ = img.clone(), txt.clone() img_tmpZ, txt = block(img=img_tmpZ, txt=txt_tmpZ, vec=double_mod, pe=pe, attn_mask=mask) img, txt_tmpZ = block(img=img , txt=txt , vec=double_mod, pe=pe, attn_mask=mask_zero) elif floor > 0 and mask is not None and floor >= i/total_layers: mask_tmp = mask.clone() mask_tmp[text_len:, text_len:] = 1.0 img, txt = block(img=img, txt=txt, vec=double_mod, pe=pe, attn_mask=mask_tmp) elif floor < 0 and mask is not None and abs(floor) >= (1 - i/total_layers): mask_tmp = mask.clone() mask_tmp[text_len:, text_len:] = 1.0 img, txt = block(img=img, txt=txt, vec=double_mod, pe=pe, attn_mask=mask_tmp) elif update_cross_attn is not None and update_cross_attn['skip_cross_attn']: print("update_cross_attn not yet implemented for Chroma.", flush=True) #img, txt_init = block(img, img_masks, txt, clip, rope, mask, update_cross_attn=update_cross_attn) else: img, txt = block(img=img, txt=txt, vec=double_mod, pe=pe, attn_mask=attn_mask) #img, txt = block(img=img, txt=txt, vec=double_mod, pe=pe, attn_mask=attn_mask) if control is not None: # Controlnet control_i = control.get("input") if i < len(control_i): add = control_i[i] if add is not None: img += add img = torch.cat((txt, img), 1) for i, block in enumerate(self.single_blocks): if i not in self.skip_dit: single_mod = self.get_modulations(mod_vectors, "single", idx=i) if ("single_block", i) in blocks_replace: def block_wrap(args): out = {} out["img"] = block( args["img"], vec=args["vec"], pe=args["pe"], attn_mask=args.get("attn_mask")) return out out = blocks_replace[("single_block", i)]({ "img" : img, "vec" : single_mod, "pe" : pe, "attn_mask" : attn_mask}, {"original_block" : block_wrap}) img = out["img"] else: if weight > 0 and mask is not None and weight <= (i+len(self.double_blocks))/total_layers: img = block(img, vec=single_mod, pe=pe, attn_mask=mask_zero) elif weight < 0 and mask is not None and abs(weight) <= (1 - (i+len(self.double_blocks))/total_layers): img = block(img, vec=single_mod, pe=pe, attn_mask=mask_zero) elif floor > 0 and mask is not None and floor >= (i+len(self.double_blocks))/total_layers: mask_tmp = mask.clone() mask_tmp[text_len:, text_len:] = 1.0 img = block(img, vec=single_mod, pe=pe, attn_mask=mask_tmp) elif floor < 0 and mask is not None and abs(floor) >= (1 - (i+len(self.double_blocks))/total_layers): mask_tmp = mask.clone() mask_tmp[text_len:, text_len:] = 1.0 img = block(img, vec=single_mod, pe=pe, attn_mask=mask_tmp) else: img = block(img, vec=single_mod, pe=pe, attn_mask=attn_mask) if control is not None: # Controlnet control_o = control.get("output") if i < len(control_o): add = control_o[i] if add is not None: img[:, txt.shape[1] :, ...] += add img = img[:, txt.shape[1] :, ...] final_mod = self.get_modulations(mod_vectors, "final") img = self.final_layer(img, vec=final_mod) # (N, T, patch_size ** 2 * out_channels) return img def forward_chroma_depr(self, x, timestep, context, guidance, control=None, transformer_options={}, **kwargs): bs, c, h, w = x.shape patch_size = 2 x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size)) img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) h_len = ((h + (patch_size // 2)) // patch_size) w_len = ((w + (patch_size // 2)) // patch_size) img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype) img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1) img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0) img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype) out = self.forward_orig(img, img_ids, context, txt_ids, timestep, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None)) return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w] def _get_img_ids(self, x, bs, h_len, w_len, h_start, h_end, w_start, w_end): img_ids = torch.zeros( (h_len, w_len, 3), device=x.device, dtype=x.dtype) img_ids[..., 1] += torch.linspace(h_start, h_end - 1, steps=h_len, device=x.device, dtype=x.dtype)[:, None] img_ids[..., 2] += torch.linspace(w_start, w_end - 1, steps=w_len, device=x.device, dtype=x.dtype)[None, :] img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) return img_ids def forward(self, x, timestep, context, #y, guidance, control = None, transformer_options = {}, **kwargs ): x_orig = x.clone() SIGMA = timestep[0].unsqueeze(0) update_cross_attn = transformer_options.get("update_cross_attn") EO = transformer_options.get("ExtraOptions", ExtraOptions("")) if EO is not None: EO.mute = True y0_style_pos = transformer_options.get("y0_style_pos") y0_style_neg = transformer_options.get("y0_style_neg") y0_style_pos_weight = transformer_options.get("y0_style_pos_weight", 0.0) y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight", 0.0) y0_style_pos_synweight *= y0_style_pos_weight y0_style_neg_weight = transformer_options.get("y0_style_neg_weight", 0.0) y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight", 0.0) y0_style_neg_synweight *= y0_style_neg_weight weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0) floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0) freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method") freqsep_sigma = transformer_options.get("freqsep_sigma") freqsep_kernel_size = transformer_options.get("freqsep_kernel_size") freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size") freqsep_stride = transformer_options.get("freqsep_stride") freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight") freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight") freqsep_mask = transformer_options.get("freqsep_mask") out_list = [] for i in range(len(transformer_options['cond_or_uncond'])): UNCOND = transformer_options['cond_or_uncond'][i] == 1 if update_cross_attn is not None: update_cross_attn['UNCOND'] = UNCOND img = x bs, c, h, w = x.shape patch_size = 2 img = comfy.ldm.common_dit.pad_to_patch_size(img, (patch_size, patch_size)) # 1,16,192,192 transformer_options['original_shape'] = img.shape transformer_options['patch_size'] = patch_size h_len = ((h + (patch_size // 2)) // patch_size) # h_len 96 w_len = ((w + (patch_size // 2)) // patch_size) # w_len 96 img = rearrange(img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64 1,16,128,128 -> 1,4096,64 context_tmp = None if not UNCOND and 'AttnMask' in transformer_options: # and weight != 0: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') if weight == 0: context_tmp = transformer_options['RegContext'].context.to(context.dtype).to(context.device) mask = None else: context_tmp = transformer_options['RegContext'].context.to(context.dtype).to(context.device) if UNCOND and 'AttnMask_neg' in transformer_options: # and weight != 0: AttnMask = transformer_options['AttnMask_neg'] mask = transformer_options['AttnMask_neg'].attn_mask.mask.to('cuda') if weight == 0: context_tmp = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device) mask = None else: context_tmp = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device) elif UNCOND and 'AttnMask' in transformer_options: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') A = context B = transformer_options['RegContext'].context context_tmp = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :] if context_tmp is None: context_tmp = context[i][None,...].clone() txt_ids = torch.zeros((bs, context_tmp.shape[1], 3), device=img.device, dtype=img.dtype) # txt_ids 1, 256,3 img_ids_orig = self._get_img_ids(img, bs, h_len, w_len, 0, h_len, 0, w_len) # img_ids_orig = 1,9216,3 out_tmp = self.forward_blocks(img [i][None,...].clone(), img_ids_orig[i][None,...].clone(), context_tmp, txt_ids [i][None,...].clone(), timestep [i][None,...].clone(), #y [i][None,...].clone(), guidance [i][None,...].clone(), control, update_cross_attn=update_cross_attn, transformer_options=transformer_options, UNCOND = UNCOND, ) # context 1,256,4096 y 1,768 out_list.append(out_tmp) out = torch.stack(out_list, dim=0).squeeze(dim=1) eps = rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w] dtype = eps.dtype if self.style_dtype is None else self.style_dtype if y0_style_pos is not None: y0_style_pos_weight = transformer_options.get("y0_style_pos_weight") y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight") y0_style_pos_synweight *= y0_style_pos_weight y0_style_pos_mask = transformer_options.get("y0_style_pos_mask") y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge") y0_style_pos = y0_style_pos.to(dtype) x = x_orig.clone().to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) y0_adain_embed = self.Retrojector.embed(y0_style_pos) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if EO("scattersort_median_LP"): denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7)) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7)) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad) denoised_spatial = denoised_spatial_LP + denoised_spatial_HP denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": if freqsep_mask is not None: freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float() freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact') if hasattr(self, "adain_tile"): tile_h, tile_w = self.adain_tile denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if self.adain_flag: h_off = tile_h // 2 w_off = tile_w // 2 denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] self.adain_flag = False else: h_off = 0 w_off = 0 self.adain_flag = True tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w)) y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w)) tiles_out = [] for i in range(tiles.shape[0]): tile = tiles[i].unsqueeze(0) y0_tile = y0_tiles[i].unsqueeze(0) tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) tile = adain_seq_inplace(tile, y0_tile) tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w)) tiles_out_tensor = torch.cat(tiles_out, dim=0) tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides) if h_off == 0: denoised_pretile = tiles_out_tensor else: denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"): denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median_pw": denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight) elif freqsep_lowpass_method == "gaussian_pw": denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median": denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size) elif freqsep_lowpass_method == "gaussian": denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP if EO("adain_fs_uhp"): y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP #denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) else: denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) if transformer_options.get('y0_standard_guide') is not None: y0_standard_guide = transformer_options.get('y0_standard_guide') y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide) f_cs = self.StyleWCT.get(y0_standard_guide_embed) self.y0_standard_guide = self.Retrojector.unembed(f_cs) if transformer_options.get('y0_inv_standard_guide') is not None: y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide') y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide) f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed) self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs) denoised_approx = self.Retrojector.unembed(denoised_embed) eps = (x - denoised_approx) / sigma if not UNCOND: if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) else: eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) elif eps.shape[0] == 1 and UNCOND: eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) eps = eps.float() if y0_style_neg is not None: y0_style_neg_weight = transformer_options.get("y0_style_neg_weight") y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight") y0_style_neg_synweight *= y0_style_neg_weight y0_style_neg_mask = transformer_options.get("y0_style_neg_mask") y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge") y0_style_neg = y0_style_neg.to(dtype) x = x_orig.clone().to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) y0_adain_embed = self.Retrojector.embed(y0_style_neg) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) denoised_approx = self.Retrojector.unembed(denoised_embed) if UNCOND: eps = (x - denoised_approx) / sigma eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) elif eps.shape[0] == 1 and not UNCOND: eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0]) eps = eps.float() return eps dtype = eps.dtype if self.style_dtype is None else self.style_dtype pinv_dtype = torch.float32 if dtype != torch.float64 else dtype W_inv = None #if eps.shape[0] == 2 or (eps.shape[0] == 1): #: and not UNCOND): if y0_style_pos is not None and y0_style_pos_weight != 0.0: y0_style_pos = y0_style_pos.to(dtype) x = x.to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps img = comfy.ldm.common_dit.pad_to_patch_size(denoised, (self.patch_size, self.patch_size)) img = rearrange(img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64 1,16,128,128 -> 1,4096,64 img_y0_adain = comfy.ldm.common_dit.pad_to_patch_size(y0_style_pos, (self.patch_size, self.patch_size)) img_y0_adain = rearrange(img_y0_adain, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64 1,16,128,128 -> 1,4096,64 W = self.img_in.weight.data.to(dtype) # shape [2560, 64] b = self.img_in.bias.data.to(dtype) # shape [2560] denoised_embed = F.linear(img .to(W), W, b).to(img) y0_adain_embed = F.linear(img_y0_adain.to(W), W, b).to(img_y0_adain) if transformer_options['y0_style_method'] == "AdaIN": if freqsep_mask is not None: freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float() freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact') if freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"): #if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: # self.y0_adain_embed = y0_adain_embed # self.adain_pw_cache = None denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median_alt": denoised_spatial_new = adain_patchwise_row_batch_medblur(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True) elif freqsep_lowpass_method == "median_pw": denoised_spatial_new = adain_patchwise_row_batch_realmedblur(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight) elif freqsep_lowpass_method == "gaussian_pw": denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None and freqsep_lowpass_method == "distribution": denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) denoised_spatial_new = adain_patchwise_strict_sortmatch9(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), kernel_size=freqsep_kernel_size, inner_kernel_size=freqsep_inner_kernel_size, mask=freqsep_mask, stride=freqsep_stride) denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median": denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size) elif freqsep_lowpass_method == "gaussian": denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP if EO("adain_fs_uhp"): y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP #denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) else: denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) #denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) #for adain_iter in range(EO("style_iter", 0)): # denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) # denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype) # denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img) # denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: self.y0_adain_embed = y0_adain_embed f_s = y0_adain_embed[0].clone() self.mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - self.mu_s cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T self.y0_color = whiten.to(f_s_centered) for wct_i in range(eps.shape[0]): f_c = denoised_embed[wct_i].clone() mu_c = f_c.mean(dim=0, keepdim=True) f_c_centered = f_c - mu_c cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) inv_sqrt_eig = S_eig.clamp(min=0).rsqrt() whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T whiten = whiten.to(f_c_centered) f_c_whitened = f_c_centered @ whiten.T f_cs = f_c_whitened @ self.y0_color.T + self.mu_s denoised_embed[wct_i] = f_cs denoised_approx = (denoised_embed - b.to(denoised_embed)) @ torch.linalg.pinv(W).T.to(denoised_embed) denoised_approx = denoised_approx.to(eps) denoised_approx = rearrange(denoised_approx, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w] eps = (x - denoised_approx) / sigma if not UNCOND: if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) else: eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) elif eps.shape[0] == 1 and UNCOND: eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) #if eps.shape[0] == 2: # eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) eps = eps.float() #if eps.shape[0] == 2 or (eps.shape[0] == 1): # and UNCOND): if y0_style_neg is not None and y0_style_neg_weight != 0.0: y0_style_neg = y0_style_neg.to(dtype) x = x.to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps img = comfy.ldm.common_dit.pad_to_patch_size(denoised, (self.patch_size, self.patch_size)) h_len = ((h + (patch_size // 2)) // patch_size) # h_len 96 w_len = ((w + (patch_size // 2)) // patch_size) # w_len 96 img = rearrange(img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64 1,16,128,128 -> 1,4096,64 img_y0_adain = comfy.ldm.common_dit.pad_to_patch_size(y0_style_neg, (self.patch_size, self.patch_size)) img_y0_adain = rearrange(img_y0_adain, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64 1,16,128,128 -> 1,4096,64 W = self.img_in.weight.data.to(dtype) # shape [2560, 64] b = self.img_in.bias.data.to(dtype) # shape [2560] denoised_embed = F.linear(img .to(W), W, b).to(img) y0_adain_embed = F.linear(img_y0_adain.to(W), W, b).to(img_y0_adain) if transformer_options['y0_style_method'] == "AdaIN": if freqsep_mask is not None: freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float() freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact') if freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"): #if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: # self.y0_adain_embed = y0_adain_embed # self.adain_pw_cache = None denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median_alt": denoised_spatial_new = adain_patchwise_row_batch_medblur(denoised_spatial.clone(), y0_adain_spatial.clone(), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True) elif freqsep_lowpass_method == "median_pw": denoised_spatial_new = adain_patchwise_row_batch_realmedblur(denoised_spatial.clone(), y0_adain_spatial.clone(), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight) elif freqsep_lowpass_method == "gaussian_pw": denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone(), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None and freqsep_lowpass_method == "distribution": denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) denoised_spatial_new = adain_patchwise_strict_sortmatch9(denoised_spatial.clone(), y0_adain_spatial.clone(), kernel_size=freqsep_kernel_size, inner_kernel_size=freqsep_inner_kernel_size, mask=freqsep_mask, stride=freqsep_stride) denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median": denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size) elif freqsep_lowpass_method == "gaussian": denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP if EO("adain_fs_uhp"): y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP #denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) else: denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) #denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) #for adain_iter in range(EO("style_iter", 0)): # denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) # denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype) # denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img) # denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: self.y0_adain_embed = y0_adain_embed f_s = y0_adain_embed[0].clone() self.mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - self.mu_s cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T self.y0_color = whiten.to(f_s_centered) for wct_i in range(eps.shape[0]): f_c = denoised_embed[wct_i].clone() mu_c = f_c.mean(dim=0, keepdim=True) f_c_centered = f_c - mu_c cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) inv_sqrt_eig = S_eig.clamp(min=0).rsqrt() whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T whiten = whiten.to(f_c_centered) f_c_whitened = f_c_centered @ whiten.T f_cs = f_c_whitened @ self.y0_color.T + self.mu_s denoised_embed[wct_i] = f_cs denoised_approx = (denoised_embed - b.to(denoised_embed)) @ torch.linalg.pinv(W).T.to(denoised_embed) denoised_approx = denoised_approx.to(eps) denoised_approx = rearrange(denoised_approx, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w] if UNCOND: eps = (x - denoised_approx) / sigma eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) elif eps.shape[0] == 1 and not UNCOND: eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0]) eps = eps.float() return eps def adain_seq(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: return ((content - content.mean(1, keepdim=True)) / (content.std(1, keepdim=True) + eps)) * (style.std(1, keepdim=True) + eps) + style.mean(1, keepdim=True) def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: mean_c = content.mean(1, keepdim=True) std_c = content.std (1, keepdim=True).add_(eps) # in-place add mean_s = style.mean (1, keepdim=True) std_s = style.std (1, keepdim=True).add_(eps) content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain return content def gaussian_blur_2d(img: torch.Tensor, sigma: float, kernel_size: int = None) -> torch.Tensor: B, C, H, W = img.shape dtype = img.dtype device = img.device if kernel_size is None: kernel_size = int(2 * math.ceil(3 * sigma) + 1) if kernel_size % 2 == 0: kernel_size += 1 coords = torch.arange(kernel_size, dtype=torch.float64) - kernel_size // 2 g = torch.exp(-0.5 * (coords / sigma) ** 2) g = g / g.sum() kernel_2d = g[:, None] * g[None, :] kernel_2d = kernel_2d.to(dtype=dtype, device=device) kernel = kernel_2d.expand(C, 1, kernel_size, kernel_size) pad = kernel_size // 2 img_padded = F.pad(img, (pad, pad, pad, pad), mode='reflect') return F.conv2d(img_padded, kernel, groups=C) def median_blur_2d(img: torch.Tensor, kernel_size: int = 3) -> torch.Tensor: if kernel_size % 2 == 0: kernel_size += 1 pad = kernel_size // 2 B, C, H, W = img.shape img_padded = F.pad(img, (pad, pad, pad, pad), mode='reflect') unfolded = img_padded.unfold(2, kernel_size, 1).unfold(3, kernel_size, 1) # unfolded: [B, C, H, W, kH, kW] → flatten to patches patches = unfolded.contiguous().view(B, C, H, W, -1) median = patches.median(dim=-1).values return median def adain_patchwise(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5) -> torch.Tensor: B, C, H, W = content.shape device = content.device dtype = content.dtype if kernel_size is None: kernel_size = int(2 * math.ceil(3 * sigma) + 1) if kernel_size % 2 == 0: kernel_size += 1 pad = kernel_size // 2 coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad gauss = torch.exp(-0.5 * (coords / sigma) ** 2) gauss /= gauss.sum() kernel_2d = (gauss[:, None] * gauss[None, :]).to(dtype=dtype) weight = kernel_2d.view(1, 1, kernel_size, kernel_size) content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect') style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect') result = torch.zeros_like(content) for i in range(H): for j in range(W): c_patch = content_padded[:, :, i:i + kernel_size, j:j + kernel_size] s_patch = style_padded[:, :, i:i + kernel_size, j:j + kernel_size] w = weight.expand_as(c_patch) c_mean = (c_patch * w).sum(dim=(-1, -2), keepdim=True) c_std = ((c_patch - c_mean)**2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps s_mean = (s_patch * w).sum(dim=(-1, -2), keepdim=True) s_std = ((s_patch - s_mean)**2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps normed = (c_patch[:, :, pad:pad+1, pad:pad+1] - c_mean) / c_std stylized = normed * s_std + s_mean result[:, :, i, j] = stylized.squeeze(-1).squeeze(-1) return result def adain_patchwise_row_batch(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5) -> torch.Tensor: B, C, H, W = content.shape device, dtype = content.device, content.dtype if kernel_size is None: kernel_size = int(2 * math.ceil(3 * sigma) + 1) if kernel_size % 2 == 0: kernel_size += 1 pad = kernel_size // 2 coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad gauss = torch.exp(-0.5 * (coords / sigma) ** 2) gauss = (gauss / gauss.sum()).to(dtype) kernel_2d = (gauss[:, None] * gauss[None, :]) weight = kernel_2d.view(1, 1, kernel_size, kernel_size) content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect') style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect') result = torch.zeros_like(content) for i in range(H): c_row_patches = torch.stack([ content_padded[:, :, i:i+kernel_size, j:j+kernel_size] for j in range(W) ], dim=0) # [W, B, C, k, k] s_row_patches = torch.stack([ style_padded[:, :, i:i+kernel_size, j:j+kernel_size] for j in range(W) ], dim=0) w = weight.expand_as(c_row_patches[0]) c_mean = (c_row_patches * w).sum(dim=(-1, -2), keepdim=True) c_std = ((c_row_patches - c_mean) ** 2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps s_mean = (s_row_patches * w).sum(dim=(-1, -2), keepdim=True) s_std = ((s_row_patches - s_mean) ** 2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps center = kernel_size // 2 central = c_row_patches[:, :, :, center:center+1, center:center+1] normed = (central - c_mean) / c_std stylized = normed * s_std + s_mean result[:, :, i, :] = stylized.squeeze(-1).squeeze(-1).permute(1, 2, 0) # [B,C,W] return result def adain_patchwise_row_batch_medblur(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5, mask: torch.Tensor = None, use_median_blur: bool = False) -> torch.Tensor: B, C, H, W = content.shape device, dtype = content.device, content.dtype if kernel_size is None: kernel_size = int(2 * math.ceil(3 * abs(sigma)) + 1) if kernel_size % 2 == 0: kernel_size += 1 pad = kernel_size // 2 content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect') style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect') result = torch.zeros_like(content) scaling = torch.ones((B, 1, H, W), device=device, dtype=dtype) sigma_scale = torch.ones((H, W), device=device, dtype=torch.float32) if mask is not None: with torch.no_grad(): padded_mask = F.pad(mask.float(), (pad, pad, pad, pad), mode="reflect") blurred_mask = F.avg_pool2d(padded_mask, kernel_size=kernel_size, stride=1, padding=pad) blurred_mask = blurred_mask[..., pad:-pad, pad:-pad] edge_proximity = blurred_mask * (1.0 - blurred_mask) scaling = 1.0 - (edge_proximity / 0.25).clamp(0.0, 1.0) sigma_scale = scaling[0, 0] # assuming single-channel mask broadcasted across B, C if not use_median_blur: coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad base_gauss = torch.exp(-0.5 * (coords / sigma) ** 2) base_gauss = (base_gauss / base_gauss.sum()).to(dtype) gaussian_table = {} for s in sigma_scale.unique(): sig = float((sigma * s + eps).clamp(min=1e-3)) gauss_local = torch.exp(-0.5 * (coords / sig) ** 2) gauss_local = (gauss_local / gauss_local.sum()).to(dtype) kernel_2d = gauss_local[:, None] * gauss_local[None, :] gaussian_table[s.item()] = kernel_2d for i in range(H): row_result = torch.zeros(B, C, W, dtype=dtype, device=device) for j in range(W): c_patch = content_padded[:, :, i:i+kernel_size, j:j+kernel_size] s_patch = style_padded[:, :, i:i+kernel_size, j:j+kernel_size] if use_median_blur: c_flat = c_patch.reshape(B, C, -1) s_flat = s_patch.reshape(B, C, -1) c_median = c_flat.median(dim=-1, keepdim=True).values s_median = s_flat.median(dim=-1, keepdim=True).values c_std = (c_flat - c_median).abs().mean(dim=-1, keepdim=True) + eps s_std = (s_flat - s_median).abs().mean(dim=-1, keepdim=True) + eps center = kernel_size // 2 central = c_patch[:, :, center, center].unsqueeze(-1) normed = (central - c_median) / c_std stylized = normed * s_std + s_median else: k = gaussian_table[float(sigma_scale[i, j].item())] local_weight = k.view(1, 1, kernel_size, kernel_size).expand(B, C, kernel_size, kernel_size) c_mean = (c_patch * local_weight).sum(dim=(-1, -2), keepdim=True) c_std = ((c_patch - c_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps s_mean = (s_patch * local_weight).sum(dim=(-1, -2), keepdim=True) s_std = ((s_patch - s_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps center = kernel_size // 2 central = c_patch[:, :, center:center+1, center:center+1] normed = (central - c_mean) / c_std stylized = normed * s_std + s_mean local_scaling = scaling[:, :, i, j].view(B, 1, 1, 1) stylized = central * (1 - local_scaling) + stylized * local_scaling row_result[:, :, j] = stylized.squeeze(-1).squeeze(-1) result[:, :, i, :] = row_result return result def adain_patchwise_row_batch_realmedblur(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5, mask: torch.Tensor = None, use_median_blur: bool = False, lowpass_weight=1.0, highpass_weight=1.0) -> torch.Tensor: B, C, H, W = content.shape device, dtype = content.device, content.dtype if kernel_size is None: kernel_size = int(2 * math.ceil(3 * abs(sigma)) + 1) if kernel_size % 2 == 0: kernel_size += 1 pad = kernel_size // 2 content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect') style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect') result = torch.zeros_like(content) scaling = torch.ones((B, 1, H, W), device=device, dtype=dtype) sigma_scale = torch.ones((H, W), device=device, dtype=torch.float32) if mask is not None: with torch.no_grad(): padded_mask = F.pad(mask.float(), (pad, pad, pad, pad), mode="reflect") blurred_mask = F.avg_pool2d(padded_mask, kernel_size=kernel_size, stride=1, padding=pad) blurred_mask = blurred_mask[..., pad:-pad, pad:-pad] edge_proximity = blurred_mask * (1.0 - blurred_mask) scaling = 1.0 - (edge_proximity / 0.25).clamp(0.0, 1.0) sigma_scale = scaling[0, 0] # assuming single-channel mask broadcasted across B, C if not use_median_blur: coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad base_gauss = torch.exp(-0.5 * (coords / sigma) ** 2) base_gauss = (base_gauss / base_gauss.sum()).to(dtype) gaussian_table = {} for s in sigma_scale.unique(): sig = float((sigma * s + eps).clamp(min=1e-3)) gauss_local = torch.exp(-0.5 * (coords / sig) ** 2) gauss_local = (gauss_local / gauss_local.sum()).to(dtype) kernel_2d = gauss_local[:, None] * gauss_local[None, :] gaussian_table[s.item()] = kernel_2d for i in range(H): row_result = torch.zeros(B, C, W, dtype=dtype, device=device) for j in range(W): c_patch = content_padded[:, :, i:i+kernel_size, j:j+kernel_size] s_patch = style_padded[:, :, i:i+kernel_size, j:j+kernel_size] if use_median_blur: # Median blur with residual restoration unfolded_c = c_patch.reshape(B, C, -1) unfolded_s = s_patch.reshape(B, C, -1) c_median = unfolded_c.median(dim=-1, keepdim=True).values s_median = unfolded_s.median(dim=-1, keepdim=True).values center = kernel_size // 2 central = c_patch[:, :, center, center].view(B, C, 1) residual = central - c_median stylized = lowpass_weight * s_median + residual * highpass_weight else: k = gaussian_table[float(sigma_scale[i, j].item())] local_weight = k.view(1, 1, kernel_size, kernel_size).expand(B, C, kernel_size, kernel_size) c_mean = (c_patch * local_weight).sum(dim=(-1, -2), keepdim=True) c_std = ((c_patch - c_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps s_mean = (s_patch * local_weight).sum(dim=(-1, -2), keepdim=True) s_std = ((s_patch - s_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps center = kernel_size // 2 central = c_patch[:, :, center:center+1, center:center+1] normed = (central - c_mean) / c_std stylized = normed * s_std + s_mean local_scaling = scaling[:, :, i, j].view(B, 1, 1) stylized = central * (1 - local_scaling) + stylized * local_scaling row_result[:, :, j] = stylized.squeeze(-1) result[:, :, i, :] = row_result return result def patchwise_sort_transfer9(src: torch.Tensor, ref: torch.Tensor) -> torch.Tensor: """ src, ref: [B, C, N] where N = K*K Returns: [B, C, N] with values from ref permuted to match the sort-order of src. """ src_sorted, src_idx = src.sort(dim=-1) ref_sorted, _ = ref.sort(dim=-1) out = torch.zeros_like(src) out.scatter_(dim=-1, index=src_idx, src=ref_sorted) return out def masked_patchwise_sort_transfer9( src : torch.Tensor, # [B, C, N] ref : torch.Tensor, # [B, C, N] mask_flat : torch.Tensor # [B, N] bool ) -> torch.Tensor: """ Only rearrange N positions where mask_flat[b] is True... to be implemented fully later. """ B,C,N = src.shape out = src.clone() for b in range(B): valid = mask_flat[b] # [N] boolean if valid.sum() == 0: continue sc = src[b,:,valid] # [C, M] ss = ref[b,:,valid] # [C, M] sc_s, idx = sc.sort(dim=-1) # sort the channelz ss_s, _ = ss.sort(dim=-1) res = torch.zeros_like(sc) res.scatter_(dim=-1, index=idx, src=ss_s) out[b,:,valid] = res return out def adain_patchwise_strict_sortmatch9( content : torch.Tensor, # [B,C,H,W] style : torch.Tensor, # [B,C,H,W] kernel_size : int, inner_kernel_size : int = 1, stride : int = 1, mask : torch.Tensor = None # [B,1,H,W] ) -> torch.Tensor: B,C,H,W = content.shape assert inner_kernel_size <= kernel_size pad = kernel_size//2 inner_off = (kernel_size - inner_kernel_size)//2 # reflect-pad cp = F.pad(content, (pad,)*4, mode='reflect') sp = F.pad(style, (pad,)*4, mode='reflect') out = content.clone() if mask is not None: mask = mask[:,0].bool() # [B,H,W] for i in range(0, H, stride): for j in range(0, W, stride): pc = cp[:, :, i:i+kernel_size, j:j+kernel_size] # [B,C,K,K] ps = sp[:, :, i:i+kernel_size, j:j+kernel_size] Bc = pc.reshape(B, C, -1) Bs = ps.reshape(B, C, -1) matched_flat = patchwise_sort_transfer9(Bc, Bs) matched = matched_flat.view(B, C, kernel_size, kernel_size) y0, x0 = inner_off, inner_off y1, x1 = y0 + inner_kernel_size, x0 + inner_kernel_size inner = matched[:, :, y0:y1, x0:x1] # [B,C,inner,inner] dst_y0 = i + y0 - pad dst_x0 = j + x0 - pad dst_y1 = dst_y0 + inner_kernel_size dst_x1 = dst_x0 + inner_kernel_size oy0 = max(dst_y0, 0); ox0 = max(dst_x0, 0) oy1 = min(dst_y1, H); ox1 = min(dst_x1, W) iy0 = oy0 - dst_y0; ix0 = ox0 - dst_x0 iy1 = iy0 + (oy1 - oy0); ix1 = ix0 + (ox1 - ox0) if mask is None: out[:, :, oy0:oy1, ox0:ox1] = inner[:, :, iy0:iy1, ix0:ix1] else: ibm = mask[:, oy0:oy1, ox0:ox1] # [B,inner,inner] for b in range(B): sel = ibm[b] # [inner,inner] # w/ regard to kernel if sel.any(): out[b:b+1, :, oy0:oy1, ox0:ox1][:, :,sel] = inner[b:b+1, :, iy0:iy1, ix0:ix1][:, :, sel] return out ================================================ FILE: conditioning.py ================================================ import torch import torch.nn.functional as F import math from torch import Tensor from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar, List from dataclasses import dataclass, field import copy import base64 import pickle # used strictly for serializing conditioning in the ConditioningToBase64 and Base64ToConditioning nodes for API use. (Offloading T5 processing to another machine to avoid model shuffling.) import comfy.supported_models import node_helpers import gc from .sigmas import get_sigmas from .helper import initialize_or_scale, precision_tool, get_res4lyf_scheduler_list, pad_tensor_list_to_max_len from .latents import get_orthogonal, get_collinear from .res4lyf import RESplain from .beta.constants import MAX_STEPS from .attention_masks import FullAttentionMask, FullAttentionMaskHiDream, CrossAttentionMask, SplitAttentionMask, RegionalContext from .flux.redux import ReReduxImageEncoder def multiply_nested_tensors(structure, scalar): if isinstance(structure, torch.Tensor): return structure * scalar elif isinstance(structure, list): return [multiply_nested_tensors(item, scalar) for item in structure] elif isinstance(structure, dict): return {key: multiply_nested_tensors(value, scalar) for key, value in structure.items()} else: return structure def pad_to_same_tokens(x1, x2, pad_value=0.0): T1, T2 = x1.shape[1], x2.shape[1] if T1 == T2: return x1, x2 max_T = max(T1, T2) x1_padded = F.pad(x1, (0, 0, 0, max_T - T1), value=pad_value) x2_padded = F.pad(x2, (0, 0, 0, max_T - T2), value=pad_value) return x1_padded, x2_padded class ConditioningOrthoCollin: @classmethod def INPUT_TYPES(cls): return {"required": { "conditioning_0": ("CONDITIONING", ), "conditioning_1": ("CONDITIONING", ), "t5_strength" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "clip_strength" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), }} RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "combine" CATEGORY = "RES4LYF/conditioning" EXPERIMENTAL = True def combine(self, conditioning_0, conditioning_1, t5_strength, clip_strength): t5_0_1_collin = get_collinear (conditioning_0[0][0], conditioning_1[0][0]) t5_1_0_ortho = get_orthogonal(conditioning_1[0][0], conditioning_0[0][0]) t5_combined = t5_0_1_collin + t5_1_0_ortho t5_1_0_collin = get_collinear (conditioning_1[0][0], conditioning_0[0][0]) t5_0_1_ortho = get_orthogonal(conditioning_0[0][0], conditioning_1[0][0]) t5_B_combined = t5_1_0_collin + t5_0_1_ortho pooled_0_1_collin = get_collinear (conditioning_0[0][1]['pooled_output'].unsqueeze(0), conditioning_1[0][1]['pooled_output'].unsqueeze(0)).squeeze(0) pooled_1_0_ortho = get_orthogonal(conditioning_1[0][1]['pooled_output'].unsqueeze(0), conditioning_0[0][1]['pooled_output'].unsqueeze(0)).squeeze(0) pooled_combined = pooled_0_1_collin + pooled_1_0_ortho #conditioning_0[0][0] = conditioning_0[0][0] + t5_strength * (t5_combined - conditioning_0[0][0]) #conditioning_0[0][0] = t5_strength * t5_combined + (1-t5_strength) * t5_B_combined conditioning_0[0][0] = t5_strength * t5_0_1_collin + (1-t5_strength) * t5_1_0_collin conditioning_0[0][1]['pooled_output'] = conditioning_0[0][1]['pooled_output'] + clip_strength * (pooled_combined - conditioning_0[0][1]['pooled_output']) return (conditioning_0, ) class CLIPTextEncodeFluxUnguided: @classmethod def INPUT_TYPES(cls): return {"required": { "clip" : ("CLIP", ), "clip_l": ("STRING", {"multiline": True, "dynamicPrompts": True}), "t5xxl" : ("STRING", {"multiline": True, "dynamicPrompts": True}), }} RETURN_TYPES = ("CONDITIONING","INT","INT",) RETURN_NAMES = ("conditioning", "clip_l_end", "t5xxl_end",) FUNCTION = "encode" CATEGORY = "RES4LYF/conditioning" EXPERIMENTAL = True def encode(self, clip, clip_l, t5xxl): tokens = clip.tokenize(clip_l) tokens["t5xxl"] = clip.tokenize(t5xxl)["t5xxl"] clip_l_end=0 for i in range(len(tokens['l'][0])): if tokens['l'][0][i][0] == 49407: clip_l_end=i break t5xxl_end=0 for i in range(len(tokens['l'][0])): # bug? should this be t5xxl? if tokens['t5xxl'][0][i][0] == 1: t5xxl_end=i break output = clip.encode_from_tokens(tokens, return_pooled=True, return_dict=True) cond = output.pop("cond") conditioning = [[cond, output]] conditioning[0][1]['clip_l_end'] = clip_l_end conditioning[0][1]['t5xxl_end'] = t5xxl_end return (conditioning, clip_l_end, t5xxl_end,) class StyleModelApplyStyle: @classmethod def INPUT_TYPES(cls): return { "required": { "conditioning": ("CONDITIONING", ), "style_model": ("STYLE_MODEL", ), "clip_vision_output": ("CLIP_VISION_OUTPUT", ), "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.001}), } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" DESCRIPTION = "Use with Flux Redux." EXPERIMENTAL = True def main(self, clip_vision_output, style_model, conditioning, strength=1.0): c = style_model.model.feature_match(conditioning, clip_vision_output) #cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0) #cond = strength * cond #c = [] #for t in conditioning: # n = [torch.cat((t[0], cond), dim=1), t[1].copy()] # c.append(n) return (c, ) class ConditioningZeroAndTruncate: # needs updating to ensure dims are correct for arbitrary models without hardcoding. # vanilla ConditioningZeroOut node doesn't truncate and SD3.5M degrades badly with large embeddings, even if zeroed out, as the negative conditioning @classmethod def INPUT_TYPES(cls): return { "required": {"conditioning": ("CONDITIONING", )}} RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "zero_out" CATEGORY = "RES4LYF/conditioning" DESCRIPTION = "Use for negative conditioning with SD3.5. ConditioningZeroOut does not truncate the embedding, \ which results in severe degradation of image quality with SD3.5 when the token limit is exceeded." def zero_out(self, conditioning): c = [] for t in conditioning: d = t[1].copy() pooled_output = d.get("pooled_output", None) if pooled_output is not None: d["pooled_output"] = torch.zeros((1,2048), dtype=t[0].dtype, device=t[0].device) n = [torch.zeros((1,154,4096), dtype=t[0].dtype, device=t[0].device), d] c.append(n) return (c, ) class ConditioningTruncate: # needs updating to ensure dims are correct for arbitrary models without hardcoding. @classmethod def INPUT_TYPES(cls): return { "required": {"conditioning": ("CONDITIONING", )}} RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "zero_out" CATEGORY = "RES4LYF/conditioning" DESCRIPTION = "Use for positive conditioning with SD3.5. Tokens beyond 77 result in degradation of image quality." EXPERIMENTAL = True def zero_out(self, conditioning): c = [] for t in conditioning: d = t[1].copy() pooled_output = d.get("pooled_output", None) if pooled_output is not None: d["pooled_output"] = d["pooled_output"][:, :2048] n = [t[0][:, :154, :4096], d] c.append(n) return (c, ) class ConditioningMultiply: @classmethod def INPUT_TYPES(cls): return {"required": {"conditioning": ("CONDITIONING", ), "multiplier": ("FLOAT", {"default": 1.0, "min": -1000000000.0, "max": 1000000000.0, "step": 0.01}) }} RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def main(self, conditioning, multiplier): c = multiply_nested_tensors(conditioning, multiplier) return (c,) class ConditioningAdd: @classmethod def INPUT_TYPES(cls): return {"required": {"conditioning_1": ("CONDITIONING", ), "conditioning_2": ("CONDITIONING", ), "multiplier": ("FLOAT", {"default": 1.0, "min": -1000000000.0, "max": 1000000000.0, "step": 0.01}) }} RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def main(self, conditioning_1, conditioning_2, multiplier): conditioning_1[0][0] += multiplier * conditioning_2[0][0] conditioning_1[0][1]['pooled_output'] += multiplier * conditioning_2[0][1]['pooled_output'] return (conditioning_1,) class ConditioningCombine: @classmethod def INPUT_TYPES(cls): return {"required": {"conditioning_1": ("CONDITIONING", ), "conditioning_2": ("CONDITIONING", )}} RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "combine" CATEGORY = "RES4LYF/conditioning" def combine(self, conditioning_1, conditioning_2): return (conditioning_1 + conditioning_2, ) class ConditioningAverage : @classmethod def INPUT_TYPES(cls): return { "required": { "conditioning_to": ("CONDITIONING", ), "conditioning_from": ("CONDITIONING", ), "conditioning_to_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}) } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) CATEGORY = "RES4LYF/conditioning" FUNCTION = "addWeighted" def addWeighted(self, conditioning_to, conditioning_from, conditioning_to_strength): out = [] if len(conditioning_from) > 1: RESplain("Warning: ConditioningAverage conditioning_from contains more than 1 cond, only the first one will actually be applied to conditioning_to.") cond_from = conditioning_from[0][0] pooled_output_from = conditioning_from[0][1].get("pooled_output", None) for i in range(len(conditioning_to)): t1 = conditioning_to[i][0] pooled_output_to = conditioning_to[i][1].get("pooled_output", pooled_output_from) t0 = cond_from[:,:t1.shape[1]] if t0.shape[1] < t1.shape[1]: t0 = torch.cat([t0] + [torch.zeros((1, (t1.shape[1] - t0.shape[1]), t1.shape[2]))], dim=1) tw = torch.mul(t1, conditioning_to_strength) + torch.mul(t0, (1.0 - conditioning_to_strength)) t_to = conditioning_to[i][1].copy() if pooled_output_from is not None and pooled_output_to is not None: t_to["pooled_output"] = torch.mul(pooled_output_to, conditioning_to_strength) + torch.mul(pooled_output_from, (1.0 - conditioning_to_strength)) elif pooled_output_from is not None: t_to["pooled_output"] = pooled_output_from n = [tw, t_to] out.append(n) return (out, ) class ConditioningSetTimestepRange: @classmethod def INPUT_TYPES(cls): return {"required": {"conditioning": ("CONDITIONING", ), "start": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), "end": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001}) }} RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "set_range" CATEGORY = "RES4LYF/conditioning" def set_range(self, conditioning, start, end): c = node_helpers.conditioning_set_values(conditioning, {"start_percent": start, "end_percent": end}) return (c, ) class ConditioningAverageScheduler: # don't think this is implemented correctly. needs to be reworked @classmethod def INPUT_TYPES(cls): return { "required": { "conditioning_0": ("CONDITIONING", ), "conditioning_1": ("CONDITIONING", ), "ratio": ("SIGMAS", ), } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" EXPERIMENTAL = True @staticmethod def addWeighted(conditioning_to, conditioning_from, conditioning_to_strength): #this function borrowed from comfyui out = [] if len(conditioning_from) > 1: RESplain("Warning: ConditioningAverage conditioning_from contains more than 1 cond, only the first one will actually be applied to conditioning_to.") cond_from = conditioning_from[0][0] pooled_output_from = conditioning_from[0][1].get("pooled_output", None) for i in range(len(conditioning_to)): t1 = conditioning_to[i][0] pooled_output_to = conditioning_to[i][1].get("pooled_output", pooled_output_from) t0 = cond_from[:,:t1.shape[1]] if t0.shape[1] < t1.shape[1]: t0 = torch.cat([t0] + [torch.zeros((1, (t1.shape[1] - t0.shape[1]), t1.shape[2]))], dim=1) tw = torch.mul(t1, conditioning_to_strength) + torch.mul(t0, (1.0 - conditioning_to_strength)) t_to = conditioning_to[i][1].copy() if pooled_output_from is not None and pooled_output_to is not None: t_to["pooled_output"] = torch.mul(pooled_output_to, conditioning_to_strength) + torch.mul(pooled_output_from, (1.0 - conditioning_to_strength)) elif pooled_output_from is not None: t_to["pooled_output"] = pooled_output_from n = [tw, t_to] out.append(n) return out @staticmethod def create_percent_array(steps): step_size = 1.0 / steps return [{"start_percent": i * step_size, "end_percent": (i + 1) * step_size} for i in range(steps)] def main(self, conditioning_0, conditioning_1, ratio): steps = len(ratio) percents = self.create_percent_array(steps) cond = [] for i in range(steps): average = self.addWeighted(conditioning_0, conditioning_1, ratio[i].item()) cond += node_helpers.conditioning_set_values(average, {"start_percent": percents[i]["start_percent"], "end_percent": percents[i]["end_percent"]}) return (cond,) class StableCascade_StageB_Conditioning64: @classmethod def INPUT_TYPES(cls): return { "required": { "conditioning": ("CONDITIONING",), "stage_c": ("LATENT",), } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "set_prior" CATEGORY = "RES4LYF/conditioning" @precision_tool.cast_tensor def set_prior(self, conditioning, stage_c): c = [] for t in conditioning: d = t[1].copy() d['stable_cascade_prior'] = stage_c['samples'] n = [t[0], d] c.append(n) return (c, ) class Conditioning_Recast64: @classmethod def INPUT_TYPES(cls): return {"required": { "cond_0": ("CONDITIONING",), }, "optional": { "cond_1": ("CONDITIONING",),} } RETURN_TYPES = ("CONDITIONING","CONDITIONING",) RETURN_NAMES = ("cond_0_recast","cond_1_recast",) FUNCTION = "main" CATEGORY = "RES4LYF/precision" EXPERIMENTAL = True @precision_tool.cast_tensor def main(self, cond_0, cond_1 = None): cond_0[0][0] = cond_0[0][0].to(torch.float64) if 'pooled_output' in cond_0[0][1]: cond_0[0][1]["pooled_output"] = cond_0[0][1]["pooled_output"].to(torch.float64) if cond_1 is not None: cond_1[0][0] = cond_1[0][0].to(torch.float64) if 'pooled_output' in cond_0[0][1]: cond_1[0][1]["pooled_output"] = cond_1[0][1]["pooled_output"].to(torch.float64) return (cond_0, cond_1,) class ConditioningToBase64: @classmethod def INPUT_TYPES(cls): return { "required": { "conditioning": ("CONDITIONING",), }, "hidden": { "unique_id": "UNIQUE_ID", "extra_pnginfo": "EXTRA_PNGINFO", }, } RETURN_TYPES = ("STRING",) RETURN_NAMES = ("string",) FUNCTION = "notify" OUTPUT_NODE = True OUTPUT_IS_LIST = (True,) CATEGORY = "RES4LYF/utilities" def notify(self, unique_id=None, extra_pnginfo=None, conditioning=None): conditioning_pickle = pickle.dumps(conditioning) conditioning_base64 = base64.b64encode(conditioning_pickle).decode('utf-8') text = [conditioning_base64] if unique_id is not None and extra_pnginfo is not None: if not isinstance(extra_pnginfo, list): RESplain("Error: extra_pnginfo is not a list") elif ( not isinstance(extra_pnginfo[0], dict) or "workflow" not in extra_pnginfo[0] ): RESplain("Error: extra_pnginfo[0] is not a dict or missing 'workflow' key") else: workflow = extra_pnginfo[0]["workflow"] node = next( (x for x in workflow["nodes"] if str(x["id"]) == str(unique_id[0])), None, ) if node: node["widgets_values"] = [text] return {"ui": {"text": text}, "result": (text,)} class Base64ToConditioning: @classmethod def INPUT_TYPES(cls): return { "required": { "data": ("STRING", {"default": ""}), } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/utilities" def main(self, data): conditioning_pickle = base64.b64decode(data) conditioning = pickle.loads(conditioning_pickle) return (conditioning,) class ConditioningDownsampleT5: @classmethod def INPUT_TYPES(cls): return { "required": { "conditioning": ("CONDITIONING",), "token_limit" : ("INT", {'default': 128, 'min': 1, 'max': 16384}), }, "optional": { } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" EXPERIMENTAL = True def main(self, conditioning, token_limit): conditioning[0][0] = downsample_tokens(conditioning[0][0], token_limit) return (conditioning, ) """class ConditioningBatch4: @classmethod def INPUT_TYPES(cls): return { "required": { "conditioning_0": ("CONDITIONING",), }, "optional": { "conditioning_1": ("CONDITIONING",), "conditioning_2": ("CONDITIONING",), "conditioning_3": ("CONDITIONING",), } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def main(self, conditioning_0, conditioning_1=None, conditioning_2=None, conditioning_3=None, ): c = copy.deepcopy(conditioning_0) if conditioning_1 is not None: c.append(conditioning_1[0]) if conditioning_2 is not None: c.append(conditioning_2[0]) if conditioning_3 is not None: c.append(conditioning_3[0]) return (c, )""" class ConditioningBatch4: @classmethod def INPUT_TYPES(cls): return { "required": { "conditioning_0": ("CONDITIONING",), }, "optional": { "conditioning_1": ("CONDITIONING",), "conditioning_2": ("CONDITIONING",), "conditioning_3": ("CONDITIONING",), } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def main(self, conditioning_0, conditioning_1=None, conditioning_2=None, conditioning_3=None, ): c = [] c.append(conditioning_0) if conditioning_1 is not None: c.append(conditioning_1) if conditioning_2 is not None: c.append(conditioning_2) if conditioning_3 is not None: c.append(conditioning_3) return (c, ) class ConditioningBatch8: @classmethod def INPUT_TYPES(cls): return { "required": { "conditioning_0": ("CONDITIONING",), }, "optional": { "conditioning_1": ("CONDITIONING",), "conditioning_2": ("CONDITIONING",), "conditioning_3": ("CONDITIONING",), "conditioning_4": ("CONDITIONING",), "conditioning_5": ("CONDITIONING",), "conditioning_6": ("CONDITIONING",), "conditioning_7": ("CONDITIONING",), } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def main(self, conditioning_0, conditioning_1=None, conditioning_2=None, conditioning_3=None, conditioning_4=None, conditioning_5=None, conditioning_6=None, conditioning_7=None, ): c = [] c.append(conditioning_0) if conditioning_1 is not None: c.append(conditioning_1) if conditioning_2 is not None: c.append(conditioning_2) if conditioning_3 is not None: c.append(conditioning_3) if conditioning_4 is not None: c.append(conditioning_4) if conditioning_5 is not None: c.append(conditioning_5) if conditioning_6 is not None: c.append(conditioning_6) if conditioning_7 is not None: c.append(conditioning_7) return (c, ) class EmptyConditioningGenerator: def __init__(self, model=None, conditioning=None, device=None, dtype=None): """ device, dtype currently unused """ if model is not None: self.device = device self.dtype = dtype import comfy.supported_models self.model_config = model.model.model_config self.llama3_shape = None self.pooled_len = 0 if isinstance(self.model_config, comfy.supported_models.SD3): self.text_len_base = 154 self.text_channels = 4096 self.pooled_len = 2048 elif isinstance(self.model_config, (comfy.supported_models.Flux, comfy.supported_models.FluxSchnell, comfy.supported_models.Chroma)): self.text_len_base = 256 self.text_channels = 4096 self.pooled_len = 768 elif isinstance(self.model_config, comfy.supported_models.AuraFlow): self.text_len_base = 256 self.text_channels = 2048 #self.pooled_len = 1 elif isinstance(self.model_config, comfy.supported_models.Stable_Cascade_C): self.text_len_base = 77 self.text_channels = 1280 self.pooled_len = 1280 elif isinstance(self.model_config, comfy.supported_models.WAN21_T2V) or isinstance(self.model_config, comfy.supported_models.WAN21_I2V): self.text_len_base = 512 self.text_channels = 5120 # sometimes needs to be 4096, like when initializing in samplers_py in shark? #self.pooled_len = 1 elif isinstance(self.model_config, comfy.supported_models.HiDream): self.text_len_base = 128 self.text_channels = 4096 # sometimes needs to be 4096, like when initializing in samplers_py in shark? self.pooled_len = 2048 self.llama3_shape = torch.Size([1,32,128,4096]) elif isinstance(self.model_config, comfy.supported_models.LTXV): self.text_len_base = 128 self.text_channels = 4096 #self.pooled_len = 1 elif isinstance(self.model_config, comfy.supported_models.SD15): self.text_len_base = 77 self.text_channels = 768 self.pooled_len = 768 elif isinstance(self.model_config, comfy.supported_models.SDXL): self.text_len_base = 77 self.text_channels = 2048 self.pooled_len = 1280 elif isinstance(self.model_config, comfy.supported_models.HunyuanVideo) or \ isinstance (self.model_config, comfy.supported_models.HunyuanVideoI2V) or \ isinstance (self.model_config, comfy.supported_models.HunyuanVideoSkyreelsI2V): self.text_len_base = 128 self.text_channels = 4096 #self.pooled_len = 1 else: raise ValueError(f"Unknown model config: {type(self.model_config)}") elif conditioning is not None: self.device = conditioning[0][0].device self.dtype = conditioning[0][0].dtype self.text_len_base = conditioning[0][0].shape[-2] if 'pooled_output' in conditioning[0][1]: self.pooled_len = conditioning[0][1]['pooled_output'].shape[-1] else: self.pooled_len = 0 self.text_channels = conditioning[0][0].shape[-1] def get_empty_conditioning(self): if self.llama3_shape is not None and self.pooled_len > 0: return [[ torch.zeros((1, self.text_len_base, self.text_channels)), { 'pooled_output' : torch.zeros((1, self.pooled_len)), 'conditioning_llama3': torch.zeros(self.llama3_shape), } ]] elif self.pooled_len > 0: return [[ torch.zeros((1, self.text_len_base, self.text_channels)), { 'pooled_output': torch.zeros((1, self.pooled_len)), } ]] else: return [[ torch.zeros((1, self.text_len_base, self.text_channels)), ]] def get_empty_conditionings(self, count): return [self.get_empty_conditioning() for _ in range(count)] def zero_none_conditionings_(self, *conds): if len(conds) == 1 and isinstance(conds[0], (list, tuple)): conds = conds[0] for i, cond in enumerate(conds): conds[i] = self.get_empty_conditioning() if cond is None else cond return conds """def zero_conditioning_from_list(conds): for cond in conds: if cond is not None: for i in range(len(cond)): pooled = cond[i][1].get('pooled_output') pooled_len = pooled.shape[-1] if pooled is not None else 1 # 1 default pooled_output len for those without it cond_zero = [[ torch.zeros_like(cond[i][0]), {"pooled_output": torch.zeros((1,pooled_len), dtype=cond[i][0].dtype, device=cond[i][0].device)}, ]] return cond_zero""" def zero_conditioning_from_list(conds): for cond in conds: if cond is not None: for i in range(len(cond)): pooled = cond[i][1].get('pooled_output') llama3 = cond[i][1].get('conditioning_llama3') pooled_len = pooled.shape[-1] if pooled is not None else 1 llama3_shape = llama3.shape if llama3 is not None else (1, 32, 128, 4096) cond_zero = [[ torch.zeros_like(cond[i][0]), { "pooled_output": torch.zeros((1, pooled_len), dtype=cond[i][0].dtype, device=cond[i][0].device), "conditioning_llama3": torch.zeros(llama3_shape, dtype=cond[i][0].dtype, device=cond[i][0].device), }, ]] return cond_zero class TemporalMaskGenerator: @classmethod def INPUT_TYPES(cls): return {"required": { "switch_frame": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}), "frames": ("INT", {"default": 65, "min": 1, "step": 4, "max": 0xffffffffffffffff}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { } } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("temporal_mask",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" EXPERIMENTAL = True def main(self, switch_frame = 33, frames = 65, invert_mask = False, ): switch_frame = switch_frame // 4 frames = frames // 4 + 1 temporal_mask = torch.ones((frames, 2, 2)) temporal_mask[switch_frame:,...] = 0.0 if invert_mask: temporal_mask = 1 - temporal_mask return (temporal_mask,) class TemporalSplitAttnMask_Midframe: @classmethod def INPUT_TYPES(cls): return {"required": { "self_attn_midframe": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}), "cross_attn_midframe": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}), "self_attn_invert": ("BOOLEAN", {"default": False}), "cross_attn_invert": ("BOOLEAN", {"default": False}), "frames": ("INT", {"default": 65, "min": 1, "step": 4, "max": 0xffffffffffffffff}), }, "optional": { } } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("temporal_mask",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" EXPERIMENTAL = True def main(self, self_attn_midframe = 33, cross_attn_midframe = 33, self_attn_invert = False, cross_attn_invert = False, frames = 65, ): frames = frames // 4 + 1 temporal_self_mask = torch.ones((frames, 2, 2)) temporal_cross_mask = torch.ones((frames, 2, 2)) self_attn_midframe = self_attn_midframe // 4 cross_attn_midframe = cross_attn_midframe // 4 temporal_self_mask[self_attn_midframe :,...] = 0.0 temporal_cross_mask[cross_attn_midframe:,...] = 0.0 if self_attn_invert: temporal_self_mask = 1 - temporal_self_mask if cross_attn_invert: temporal_cross_mask = 1 - temporal_cross_mask temporal_attn_masks = torch.stack([temporal_cross_mask, temporal_self_mask]) return (temporal_attn_masks,) class TemporalSplitAttnMask: @classmethod def INPUT_TYPES(cls): return {"required": { "self_attn_start": ("INT", {"default": 1, "min": 1, "step": 4, "max": 0xffffffffffffffff}), "self_attn_stop": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}), "cross_attn_start": ("INT", {"default": 1, "min": 1, "step": 4, "max": 0xffffffffffffffff}), "cross_attn_stop": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}), #"frames": ("INT", {"default": 65, "min": 1, "step": 4, "max": 0xffffffffffffffff}), }, "optional": { } } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("temporal_mask",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" def main(self, self_attn_start = 0, self_attn_stop = 33, cross_attn_start = 0, cross_attn_stop = 33, #frames = 65, ): #frames = frames // 4 + 1 self_attn_start = self_attn_start // 4 #+ 1 self_attn_stop = self_attn_stop // 4 + 1 cross_attn_start = cross_attn_start // 4 #+ 1 cross_attn_stop = cross_attn_stop // 4 + 1 max_stop = max(self_attn_stop, cross_attn_stop) temporal_self_mask = torch.zeros((max_stop, 1, 1)) temporal_cross_mask = torch.zeros((max_stop, 1, 1)) temporal_self_mask [ self_attn_start: self_attn_stop,...] = 1.0 temporal_cross_mask[cross_attn_start:cross_attn_stop,...] = 1.0 temporal_attn_masks = torch.stack([temporal_cross_mask, temporal_self_mask]) return (temporal_attn_masks,) class TemporalCrossAttnMask: @classmethod def INPUT_TYPES(cls): return {"required": { "cross_attn_start": ("INT", {"default": 1, "min": 1, "step": 4, "max": 0xffffffffffffffff}), "cross_attn_stop": ("INT", {"default": 33, "min": 1, "step": 4, "max": 0xffffffffffffffff}), }, "optional": { } } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("temporal_mask",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" def main(self, cross_attn_start = 0, cross_attn_stop = 33, ): cross_attn_start = cross_attn_start // 4 #+ 1 cross_attn_stop = cross_attn_stop // 4 + 1 temporal_self_mask = torch.zeros((cross_attn_stop, 1, 1)) # dummy to satisfy stack temporal_cross_mask = torch.zeros((cross_attn_stop, 1, 1)) temporal_cross_mask[cross_attn_start:cross_attn_stop,...] = 1.0 temporal_attn_masks = torch.stack([temporal_cross_mask, temporal_self_mask]) return (temporal_attn_masks,) @dataclass class RegionalParameters: weights : List[float] = field(default_factory=list) floors : List[float] = field(default_factory=list) REG_MASK_TYPE_2 = [ "gradient", "gradient_masked", "gradient_unmasked", "boolean", "boolean_masked", "boolean_unmasked", ] REG_MASK_TYPE_3 = [ "gradient", "gradient_A", "gradient_B", "gradient_unmasked", "gradient_AB", "gradient_A,unmasked", "gradient_B,unmasked", "boolean", "boolean_A", "boolean_B", "boolean_unmasked", "boolean_AB", "boolean_A,unmasked", "boolean_B,unmasked", ] REG_MASK_TYPE_AB = [ "gradient", "gradient_A", "gradient_B", "boolean", "boolean_A", "boolean_B", ] REG_MASK_TYPE_ABC = [ "gradient", "gradient_A", "gradient_B", "gradient_C", "gradient_AB", "gradient_AC", "gradient_BC", "boolean", "boolean_A", "boolean_B", "boolean_C", "boolean_AB", "boolean_AC", "boolean_BC", ] class ClownRegionalConditioning_AB: @classmethod def INPUT_TYPES(cls): return { "required": { "weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "region_bleed_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": -1, "min": -1, "max": 10000}), "mask_type": (REG_MASK_TYPE_AB, {"default": "boolean"}), "edge_width": ("INT", {"default": 0, "min": 0, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "conditioning_A": ("CONDITIONING", ), "conditioning_B": ("CONDITIONING", ), "mask_A": ("MASK", ), "mask_B": ("MASK", ), "weights": ("SIGMAS", ), "region_bleeds": ("SIGMAS", ), } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def create_callback(self, **kwargs): def callback(model): kwargs["model"] = model pos_cond, = self.prepare_regional_cond(**kwargs) return pos_cond return callback def main(self, weight : float = 1.0, start_sigma : float = 0.0, end_sigma : float = 1.0, weight_scheduler = None, start_step : int = 0, end_step : int = -1, conditioning_A = None, conditioning_B = None, weights : Tensor = None, region_bleeds : Tensor = None, region_bleed : float = 0.0, region_bleed_start_step : int = 0, mask_type : str = "boolean", edge_width : int = 0, mask_A = None, mask_B = None, invert_mask : bool = False ) -> Tuple[Tensor]: mask = mask_A unmask = mask_B if end_step == -1: end_step = MAX_STEPS callback = self.create_callback(weight = weight, start_sigma = start_sigma, end_sigma = end_sigma, weight_scheduler = weight_scheduler, start_step = start_step, end_step = end_step, weights = weights, region_bleeds = region_bleeds, region_bleed = region_bleed, region_bleed_start_step = region_bleed_start_step, mask_type = mask_type, edge_width = edge_width, mask = mask, unmask = unmask, invert_mask = invert_mask, conditioning_A = conditioning_A, conditioning_B = conditioning_B, ) cond = zero_conditioning_from_list([conditioning_A, conditioning_B]) cond[0][1]['callback_regional'] = callback return (cond,) def prepare_regional_cond(self, model, weight : float = 1.0, start_sigma : float = 0.0, end_sigma : float = 1.0, weight_scheduler = None, start_step : int = 0, end_step : int = -1, conditioning_A = None, conditioning_B = None, weights : Tensor = None, region_bleeds : Tensor = None, region_bleed : float = 0.0, region_bleed_start_step : int = 0, mask_type : str = "gradient", edge_width : int = 0, mask = None, unmask = None, invert_mask : bool = False, ) -> Tuple[Tensor]: default_dtype = torch.float64 default_device = torch.device("cuda") if end_step == -1: end_step = MAX_STEPS if weights is None and weight_scheduler != "constant": total_steps = end_step - start_step weights = get_sigmas(model, weight_scheduler, total_steps, 1.0).to(dtype=default_dtype, device=default_device) #/ model.inner_model.inner_model.model_sampling.sigma_max #scaling doesn't matter as this is a flux-only node prepend = torch.zeros(start_step, dtype=default_dtype, device=default_device) weights = torch.cat((prepend, weights), dim=0) if invert_mask and mask is not None: mask = 1-mask unmask = 1-unmask floor, floors = region_bleed, region_bleeds weights = initialize_or_scale(weights, weight, end_step).to(default_dtype).to(default_device) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) prepend = torch.full((region_bleed_start_step,), 0.0, dtype=default_dtype, device=default_device) floors = initialize_or_scale(floors, floor, end_step).to(default_dtype).to(default_device) floors = F.pad(floors, (0, MAX_STEPS), value=0.0) floors = torch.cat((prepend, floors), dim=0) if (conditioning_A is None) and (conditioning_B is None): cond = None elif mask is not None: EmptyCondGen = EmptyConditioningGenerator(model) conditioning_A, conditioning_B = EmptyCondGen.zero_none_conditionings_([conditioning_A, conditioning_B]) cond = copy.deepcopy(conditioning_A) if isinstance(model.model.model_config, (comfy.supported_models.WAN21_T2V, comfy.supported_models.WAN21_I2V)): if model.model.diffusion_model.blocks[0].self_attn.winderz_type != "false": AttnMask = CrossAttentionMask(mask_type, edge_width) else: AttnMask = SplitAttentionMask(mask_type, edge_width) elif isinstance(model.model.model_config, comfy.supported_models.HiDream): AttnMask = FullAttentionMaskHiDream(mask_type, edge_width) elif isinstance(model.model.model_config, (comfy.supported_models.SDXL, comfy.supported_models.SD15, comfy.supported_models.Stable_Cascade_C)): AttnMask = SplitAttentionMask(mask_type, edge_width) else: AttnMask = FullAttentionMask(mask_type, edge_width) RegContext = RegionalContext() if isinstance(model.model.model_config, comfy.supported_models.HiDream): AttnMask.add_region_sizes( [ conditioning_A[0][0].shape[-2], conditioning_A[0][1]['conditioning_llama3'][0,0,...].shape[-2], conditioning_A[0][1]['conditioning_llama3'][0,0,...].shape[-2], ], mask) AttnMask.add_region_sizes( [ conditioning_B[0][0].shape[-2], conditioning_B[0][1]['conditioning_llama3'][0,0,...].shape[-2], conditioning_B[0][1]['conditioning_llama3'][0,0,...].shape[-2], ], unmask) RegContext.add_region_llama3(conditioning_A[0][1]['conditioning_llama3']) RegContext.add_region_llama3(conditioning_B[0][1]['conditioning_llama3']) else: AttnMask.add_region(conditioning_A[0][0], mask) AttnMask.add_region(conditioning_B[0][0], unmask) RegContext.add_region(conditioning_A[0][0], conditioning_A[0][1].get('pooled_output')) RegContext.add_region(conditioning_B[0][0], conditioning_B[0][1].get('pooled_output')) if 'clip_vision_output' in conditioning_A[0][1]: # For WAN... dicey results RegContext.add_region_clip_fea(conditioning_A[0][1]['clip_vision_output'].penultimate_hidden_states) RegContext.add_region_clip_fea(conditioning_B[0][1]['clip_vision_output'].penultimate_hidden_states) if 'unclip_conditioning' in conditioning_A[0][1]: RegContext.add_region_clip_fea(conditioning_A[0][1]['unclip_conditioning'][0]['clip_vision_output'].image_embeds) #['penultimate_hidden_states']) if 'unclip_conditioning' in conditioning_B[0][1]: RegContext.add_region_clip_fea(conditioning_B[0][1]['unclip_conditioning'][0]['clip_vision_output'].image_embeds) #['penultimate_hidden_states']) cond[0][1]['AttnMask'] = AttnMask cond[0][1]['RegContext'] = RegContext cond = merge_with_base(base=cond, others=[conditioning_A, conditioning_B]) if 'pooled_output' in cond[0][1] and cond[0][1]['pooled_output'] is not None: cond[0][1]['pooled_output'] = (conditioning_A[0][1]['pooled_output'] + conditioning_B[0][1]['pooled_output']) / 2 #if 'conditioning_llama3' in cond[0][1] and cond[0][1]['conditioning_llama3'] is not None: # cond[0][1]['conditioning_llama3'] = (conditioning_A[0][1]['conditioning_llama3'] + conditioning_B[0][1]['conditioning_llama3']) / 2 #cond[0] = list(cond[0]) #cond[0][0] = (conditioning_A[0][0] + conditioning_B[0][0]) / 2 #cond[0] = tuple(cond[0]) else: cond = conditioning_A cond[0][1]['RegParam'] = RegionalParameters(weights, floors) return (cond,) class ClownRegionalConditioning_ABC: @classmethod def INPUT_TYPES(cls): return { "required": { "weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "region_bleed_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": 100, "min": -1, "max": 10000}), "mask_type": (REG_MASK_TYPE_ABC, {"default": "boolean"}), "edge_width": ("INT", {"default": 0, "min": 0, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "conditioning_A": ("CONDITIONING", ), "conditioning_B": ("CONDITIONING", ), "conditioning_C": ("CONDITIONING", ), "mask_A": ("MASK", ), "mask_B": ("MASK", ), "mask_C": ("MASK", ), "weights": ("SIGMAS", ), "region_bleeds": ("SIGMAS", ), } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def create_callback(self, **kwargs): def callback(model): kwargs["model"] = model pos_cond, = self.prepare_regional_cond(**kwargs) return pos_cond return callback def main(self, weight : float = 1.0, start_sigma : float = 0.0, end_sigma : float = 1.0, weight_scheduler = None, start_step : int = 0, end_step : int = -1, conditioning_A = None, conditioning_B = None, conditioning_C = None, weights : Tensor = None, region_bleeds : Tensor = None, region_bleed : float = 0.0, region_bleed_start_step : int = 0, mask_type : str = "boolean", edge_width : int = 0, mask_A = None, mask_B = None, mask_C = None, invert_mask : bool = False ) -> Tuple[Tensor]: if end_step == -1: end_step = MAX_STEPS callback = self.create_callback(weight = weight, start_sigma = start_sigma, end_sigma = end_sigma, weight_scheduler = weight_scheduler, start_step = start_step, end_step = end_step, weights = weights, region_bleeds = region_bleeds, region_bleed = region_bleed, region_bleed_start_step = region_bleed_start_step, mask_type = mask_type, edge_width = edge_width, mask_A = mask_A, mask_B = mask_B, mask_C = mask_C, invert_mask = invert_mask, conditioning_A = conditioning_A, conditioning_B = conditioning_B, conditioning_C = conditioning_C, ) cond = zero_conditioning_from_list([conditioning_A, conditioning_B, conditioning_C]) cond[0][1]['callback_regional'] = callback return (cond,) def prepare_regional_cond(self, model, weight : float = 1.0, start_sigma : float = 0.0, end_sigma : float = 1.0, weight_scheduler = None, start_step : int = 0, end_step : int = -1, conditioning_A = None, conditioning_B = None, conditioning_C = None, weights : Tensor = None, region_bleeds : Tensor = None, region_bleed : float = 0.0, region_bleed_start_step : int = 0, mask_type : str = "boolean", edge_width : int = 0, mask_A = None, mask_B = None, mask_C = None, invert_mask : bool = False, ) -> Tuple[Tensor]: default_dtype = torch.float64 default_device = torch.device("cuda") if end_step == -1: end_step = MAX_STEPS if weights is None and weight_scheduler != "constant": total_steps = end_step - start_step weights = get_sigmas(model, weight_scheduler, total_steps, 1.0).to(dtype=default_dtype, device=default_device) #/ model.inner_model.inner_model.model_sampling.sigma_max #scaling doesn't matter as this is a flux-only node prepend = torch.zeros(start_step, dtype=default_dtype, device=default_device) weights = torch.cat((prepend, weights), dim=0) if invert_mask and mask_A is not None: mask_A = 1-mask_A if invert_mask and mask_B is not None: mask_B = 1-mask_B mask_AB_inv = mask_C if invert_mask and mask_AB_inv is not None: mask_AB_inv = 1-mask_AB_inv floor, floors = region_bleed, region_bleeds weights = initialize_or_scale(weights, weight, end_step).to(default_dtype) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) prepend = torch.full((region_bleed_start_step,), 0.0, dtype=default_dtype, device=default_device) floors = initialize_or_scale(floors, floor, end_step).to(default_dtype).to(default_device) floors = F.pad(floors, (0, MAX_STEPS), value=0.0) floors = torch.cat((prepend, floors), dim=0) if (conditioning_A is None) and (conditioning_B is None) and (conditioning_C is None): conditioning = None elif mask_A is not None: EmptyCondGen = EmptyConditioningGenerator(model) conditioning_A, conditioning_B, conditioning_C = EmptyCondGen.zero_none_conditionings_([conditioning_A, conditioning_B, conditioning_C]) conditioning = copy.deepcopy(conditioning_A) if isinstance(model.model.model_config, (comfy.supported_models.WAN21_T2V, comfy.supported_models.WAN21_I2V)): if model.model.diffusion_model.blocks[0].self_attn.winderz_type != "false": AttnMask = CrossAttentionMask(mask_type, edge_width) else: AttnMask = SplitAttentionMask(mask_type, edge_width) elif isinstance(model.model.model_config, comfy.supported_models.HiDream): AttnMask = FullAttentionMaskHiDream(mask_type, edge_width) elif isinstance(model.model.model_config, (comfy.supported_models.SDXL, comfy.supported_models.SD15, comfy.supported_models.Stable_Cascade_C)): AttnMask = SplitAttentionMask(mask_type, edge_width) else: AttnMask = FullAttentionMask(mask_type, edge_width) RegContext = RegionalContext() if isinstance(model.model.model_config, comfy.supported_models.HiDream): AttnMask.add_region_sizes( [ conditioning_A[0][0].shape[-2], conditioning_A[0][1]['conditioning_llama3'][0,0,...].shape[-2], conditioning_A[0][1]['conditioning_llama3'][0,0,...].shape[-2], ], mask_A) AttnMask.add_region_sizes( [ conditioning_B[0][0].shape[-2], conditioning_B[0][1]['conditioning_llama3'][0,0,...].shape[-2], conditioning_B[0][1]['conditioning_llama3'][0,0,...].shape[-2], ], mask_B) AttnMask.add_region_sizes( [ conditioning_C[0][0].shape[-2], conditioning_C[0][1]['conditioning_llama3'][0,0,...].shape[-2], conditioning_C[0][1]['conditioning_llama3'][0,0,...].shape[-2], ], mask_AB_inv) RegContext.add_region_llama3(conditioning_A[0][1]['conditioning_llama3']) RegContext.add_region_llama3(conditioning_B[0][1]['conditioning_llama3']) RegContext.add_region_llama3(conditioning_C[0][1]['conditioning_llama3']) else: AttnMask.add_region(conditioning_A[0][0], mask_A) AttnMask.add_region(conditioning_B[0][0], mask_B) AttnMask.add_region(conditioning_C[0][0], mask_AB_inv) RegContext.add_region(conditioning_A[0][0], conditioning_A[0][1].get('pooled_output')) RegContext.add_region(conditioning_B[0][0], conditioning_B[0][1].get('pooled_output')) RegContext.add_region(conditioning_C[0][0], conditioning_C[0][1].get('pooled_output')) #if 'pooled_output' in conditioning_A[0][1]: # RegContext.pooled_output = conditioning_A[0][1]['pooled_output'] + conditioning_B[0][1]['pooled_output'] + conditioning_C[0][1]['pooled_output'] conditioning[0][1]['AttnMask'] = AttnMask conditioning[0][1]['RegContext'] = RegContext conditioning = merge_with_base(base=conditioning, others=[conditioning_A, conditioning_B, conditioning_C]) if 'pooled_output' in conditioning[0][1] and conditioning[0][1]['pooled_output'] is not None: conditioning[0][1]['pooled_output'] = (conditioning_A[0][1]['pooled_output'] + conditioning_B[0][1]['pooled_output'] + conditioning_C[0][1]['pooled_output']) / 3 else: conditioning = conditioning_A conditioning[0][1]['RegParam'] = RegionalParameters(weights, floors) return (conditioning,) class ClownRegionalConditioning2(ClownRegionalConditioning_AB): @classmethod def INPUT_TYPES(cls): return { "required": { "weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "region_bleed_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": -1, "min": -1, "max": 10000}), "mask_type": (REG_MASK_TYPE_2, {"default": "boolean"}), "edge_width": ("INT", {"default": 0, "min": -10000, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "conditioning_masked": ("CONDITIONING", ), "conditioning_unmasked": ("CONDITIONING", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), "region_bleeds": ("SIGMAS", ), } } def main(self, conditioning_masked, conditioning_unmasked, mask, **kwargs): return super().main( conditioning_A = conditioning_masked, conditioning_B = conditioning_unmasked, mask_A = mask, mask_B = 1-mask, **kwargs ) class ClownRegionalConditioning3(ClownRegionalConditioning_ABC): @classmethod def INPUT_TYPES(cls): return { "required": { "weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "region_bleed_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": 100, "min": -1, "max": 10000}), "mask_type": (REG_MASK_TYPE_3, {"default": "boolean"}), "edge_width": ("INT", {"default": 0, "min": 0, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "conditioning_A": ("CONDITIONING", ), "conditioning_B": ("CONDITIONING", ), "conditioning_unmasked": ("CONDITIONING", ), "mask_A": ("MASK", ), "mask_B": ("MASK", ), "weights": ("SIGMAS", ), "region_bleeds": ("SIGMAS", ), } } def main(self, conditioning_unmasked, mask_A, mask_B, **kwargs): mask_AB_inv = torch.ones_like(mask_A) - mask_A - mask_B mask_AB_inv[mask_AB_inv < 0] = 0 return super().main( conditioning_C = conditioning_unmasked, mask_A = mask_A, mask_B = mask_B, mask_C = mask_AB_inv, **kwargs ) class ClownRegionalConditioning: @classmethod def INPUT_TYPES(cls): return {"required": { "spineless": ("BOOLEAN", {"default": False}), "edge_width": ("INT", {"default": 0, "min": -10000, "max": 10000}), }, "optional": { "cond_regions": ("COND_REGIONS", ), "conditioning": ("CONDITIONING", ), "mask": ("MASK", ), } } RETURN_TYPES = ("COND_REGIONS",) RETURN_NAMES = ("cond_regions",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def main(self, spineless = False, edge_width = 0, cond_regions = None, conditioning = None, mask = None, ): cond_reg = [] if cond_regions is None else copy.deepcopy(cond_regions) if mask is None: mask = torch.ones_like(cond_reg[0]['mask']) for i in range(len(cond_reg)): if mask.dtype == torch.bool: mask &= cond_reg[i]['mask'].to(cond_reg[0]['mask'].dtype) else: mask = mask - cond_reg[i]['mask'].to(cond_reg[0]['mask'].dtype) mask[mask < 0] = 0.0 cond_reg.append( { 'use_self_attn_mask': not spineless, 'edge_width' : edge_width, 'conditioning' : conditioning, 'mask' : mask, } ) return (cond_reg,) class ClownRegionalConditionings: @classmethod def INPUT_TYPES(cls): return { "required": { "weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "region_bleed_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": -1, "min": -1, "max": 10000}), "mask_type": (["gradient", "boolean"], {"default": "boolean"}), "invert_masks": ("BOOLEAN", {"default": False}), }, "optional": { "cond_regions": ("COND_REGIONS", ), "weights": ("SIGMAS", ), "region_bleeds": ("SIGMAS", ), } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def create_callback(self, **kwargs): def callback(model): kwargs["model"] = model pos_cond, = self.prepare_regional_cond(**kwargs) return pos_cond return callback def main(self, weight : float = 1.0, start_sigma : float = 0.0, end_sigma : float = 1.0, weight_scheduler = None, start_step : int = 0, end_step : int = -1, cond_regions = None, weights : Tensor = None, region_bleeds : Tensor = None, region_bleed : float = 0.0, region_bleed_start_step : int = 0, mask_type : str = "boolean", invert_masks : bool = False ) -> Tuple[Tensor]: if end_step == -1: end_step = MAX_STEPS callback = self.create_callback(weight = weight, start_sigma = start_sigma, end_sigma = end_sigma, weight_scheduler = weight_scheduler, start_step = start_step, end_step = end_step, weights = weights, region_bleeds = region_bleeds, region_bleed = region_bleed, region_bleed_start_step = region_bleed_start_step, mask_type = mask_type, invert_masks = invert_masks, cond_regions = cond_regions, ) cond_list = [region['conditioning'] for region in cond_regions] conditioning = zero_conditioning_from_list(cond_list) conditioning[0][1]['callback_regional'] = callback return (conditioning,) def prepare_regional_cond(self, model, weight : float = 1.0, start_sigma : float = 0.0, end_sigma : float = 1.0, weight_scheduler = None, start_step : int = 0, end_step : int = -1, weights : Tensor = None, region_bleeds : Tensor = None, region_bleed : float = 0.0, region_bleed_start_step : int = 0, mask_type : str = "gradient", cond_regions = None, invert_masks : bool = False, ) -> Tuple[Tensor]: default_dtype = torch.float64 default_device = torch.device("cuda") cond_list = [region['conditioning'] for region in cond_regions] mask_list = [region['mask'] for region in cond_regions] edge_width_list = [region['edge_width'] for region in cond_regions] use_self_attn_mask_list = [region['use_self_attn_mask'] for region in cond_regions] if end_step == -1: end_step = MAX_STEPS if weights is None and weight_scheduler != "constant": total_steps = end_step - start_step weights = get_sigmas(model, weight_scheduler, total_steps, 1.0).to(dtype=default_dtype, device=default_device) #/ model.inner_model.inner_model.model_sampling.sigma_max #scaling doesn't matter as this is a flux-only node prepend = torch.zeros(start_step, dtype=default_dtype, device=default_device) weights = torch.cat((prepend, weights), dim=0) if invert_masks: for i in range(len(mask_list)): if mask_list[i].dtype == torch.bool: mask_list[i] = ~mask_list[i] else: mask_list[i] = 1 - mask_list[i] floor, floors = region_bleed, region_bleeds weights = initialize_or_scale(weights, weight, end_step).to(default_dtype).to(default_device) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) prepend = torch.full((region_bleed_start_step,), 0.0, dtype=default_dtype, device=default_device) floors = initialize_or_scale(floors, floor, end_step).to(default_dtype).to(default_device) floors = F.pad(floors, (0, MAX_STEPS), value=0.0) floors = torch.cat((prepend, floors), dim=0) EmptyCondGen = EmptyConditioningGenerator(model) cond_list = EmptyCondGen.zero_none_conditionings_(cond_list) conditioning = copy.deepcopy(cond_list[0]) if isinstance(model.model.model_config, comfy.supported_models.WAN21_T2V) or isinstance(model.model.model_config, comfy.supported_models.WAN21_I2V): if model.model.diffusion_model.blocks[0].self_attn.winderz_type != "false": AttnMask = CrossAttentionMask (mask_type, edge_width_list=edge_width_list, use_self_attn_mask_list=use_self_attn_mask_list) else: AttnMask = SplitAttentionMask (mask_type, edge_width_list=edge_width_list, use_self_attn_mask_list=use_self_attn_mask_list) elif isinstance(model.model.model_config, comfy.supported_models.HiDream): AttnMask = FullAttentionMaskHiDream(mask_type, edge_width_list=edge_width_list, use_self_attn_mask_list=use_self_attn_mask_list) elif isinstance(model.model.model_config, comfy.supported_models.SDXL) or isinstance(model.model.model_config, comfy.supported_models.SD15): AttnMask = SplitAttentionMask(mask_type, edge_width_list=edge_width_list, use_self_attn_mask_list=use_self_attn_mask_list) else: AttnMask = FullAttentionMask (mask_type, edge_width_list=edge_width_list, use_self_attn_mask_list=use_self_attn_mask_list) RegContext = RegionalContext() for cond, mask in zip(cond_list, mask_list): if isinstance(model.model.model_config, comfy.supported_models.HiDream): AttnMask.add_region_sizes( [ cond[0][0].shape[-2], cond[0][1]['conditioning_llama3'][0,0,...].shape[-2], cond[0][1]['conditioning_llama3'][0,0,...].shape[-2], ], mask) RegContext.add_region_llama3(cond[0][1]['conditioning_llama3']) else: AttnMask.add_region(cond[0][0], mask) RegContext.add_region(cond[0][0]) if 'clip_vision_output' in cond[0][1]: # For WAN... dicey results RegContext.add_region_clip_fea(cond[0][1]['clip_vision_output'].penultimate_hidden_states) conditioning[0][1]['AttnMask'] = AttnMask conditioning[0][1]['RegContext'] = RegContext conditioning[0][1]['RegParam'] = RegionalParameters(weights, floors) conditioning = merge_with_base(base=conditioning, others=cond_list) if 'pooled_output' in conditioning[0][1] and conditioning[0][1]['pooled_output'] is not None: conditioning[0][1]['pooled_output'] = torch.stack([cond_tmp[0][1]['pooled_output'] for cond_tmp in cond_list]).mean(dim=0) #conditioning[0][1]['pooled_output'] = cond_list[0][0][1]['pooled_output'] return (conditioning,) def merge_with_base( base : List[ Tuple[torch.Tensor, Dict[str, Any]]], others : List[List[Tuple[torch.Tensor, Dict[str, Any]]]], dim : int = -2 ) -> List[ Tuple[torch.Tensor, Dict[str, Any]]]: """ Merge `base` plus an arbitrary list of other conditioning objects: - base: zero out its tensors, for use as an accumulator - For each level ℓ: • Collect the base’s zeroed tensor + all others’ ℓ-tensors. • Pad them along `dim` to the same length and sum. • Replace merged[ℓ][0] with that sum. - For each tensor-valued key in the base’s info-dict at level ℓ: • Gather a zeroed tensor + that key from all others. • Pad & sum, and store back under that key. - Any non-tensor entries in the base’s info are preserved untouched. """ max_levels = max(len(base), *(len(p) for p in others)) for lvl in range(max_levels): if lvl >= len(base): # if base lacks this level, skip entirely continue # --- tokens merge --- base_tokens, base_info = base[lvl] zero_tokens = torch.zeros_like(base_tokens) toks = [zero_tokens] # zero-out any tensor fields in base_info for key, val in base_info.items(): if isinstance(val, torch.Tensor): base_info[key] = torch.zeros_like(val) # collect same-level tokens from each other for pos in others: if lvl < len(pos): toks.append(pos[lvl][0]) toks = pad_tensor_list_to_max_len(toks, dim=dim) base_tokens = sum(toks) base[lvl] = (base_tokens, base_info) # --- info-dict tensor merge --- for key, val in list(base_info.items()): if not isinstance(val, torch.Tensor): continue pieces = [val] # zeroed base tensor for pos in others: if lvl < len(pos): info_i = pos[lvl][1] if key in info_i and isinstance(info_i[key], torch.Tensor): pieces.append(info_i[key]) pieces = pad_tensor_list_to_max_len(pieces, dim=dim) base[lvl][1][key] = sum(pieces) return base def best_hw(n): # get factor pair closesst to a true square best = (1, n) min_diff = n for i in range(1, int(n**0.5) + 1): if n % i == 0: j = n // i if abs(i - j) < min_diff: best = (i, j) min_diff = abs(i - j) return best def downsample_tokens(cond: torch.Tensor, target_tokens: int, mode="bicubic") -> torch.Tensor: B, T, D = cond.shape def next_square(n: int): root = math.ceil(n**0.5) return root * root padded_len = next_square(T) pad_amount = padded_len - T if pad_amount > 0: pad_tensor = torch.zeros(B, pad_amount, D, dtype=cond.dtype, device=cond.device) cond = torch.cat([cond, pad_tensor], dim=1) side_len = int(math.sqrt(padded_len)) cond_reshaped = cond.view(B, side_len, side_len, D).permute(0, 3, 1, 2) # [B, D, H, W] H_target, W_target = best_hw(target_tokens) cond_interp = F.interpolate(cond_reshaped, size=(H_target, W_target), mode=mode) cond_final = cond_interp.permute(0, 2, 3, 1).reshape(B, -1, D) cond_final = cond_final[:, :target_tokens, :] return cond_final class CrossAttn_EraseReplace_HiDream: @classmethod def INPUT_TYPES(s): return {"required": { "clip": ("CLIP", ), "t5xxl_erase": ("STRING", {"multiline": True, "dynamicPrompts": True}), "llama_erase": ("STRING", {"multiline": True, "dynamicPrompts": True}), "t5xxl_replace": ("STRING", {"multiline": True, "dynamicPrompts": True}), "llama_replace": ("STRING", {"multiline": True, "dynamicPrompts": True}), "t5xxl_erase_token": ("STRING", {"multiline": True, "dynamicPrompts": True}), "llama_erase_token": ("STRING", {"multiline": True, "dynamicPrompts": True}), "t5xxl_replace_token": ("STRING", {"multiline": True, "dynamicPrompts": True}), "llama_replace_token": ("STRING", {"multiline": True, "dynamicPrompts": True}), }} RETURN_TYPES = ("CONDITIONING","CONDITIONING",) RETURN_NAMES = ("positive", "negative",) FUNCTION = "encode" CATEGORY = "advanced/conditioning" EXPERIMENTAL = True def encode(self, clip, t5xxl_erase, llama_erase, t5xxl_replace, llama_replace, t5xxl_erase_token, llama_erase_token, t5xxl_replace_token, llama_replace_token): tokens_erase = clip.tokenize("") tokens_erase["l"] = clip.tokenize("")["l"] tokens_replace = clip.tokenize("") tokens_replace["l"] = clip.tokenize("")["l"] tokens_erase ["t5xxl"] = clip.tokenize(t5xxl_erase) ["t5xxl"] tokens_erase ["llama"] = clip.tokenize(llama_erase) ["llama"] tokens_replace["t5xxl"] = clip.tokenize(t5xxl_replace)["t5xxl"] tokens_replace["llama"] = clip.tokenize(llama_replace)["llama"] tokens_erase_token = clip.tokenize("") tokens_erase_token["l"] = clip.tokenize("")["l"] tokens_replace_token = clip.tokenize("") tokens_replace_token["l"] = clip.tokenize("")["l"] tokens_erase_token ["t5xxl"] = clip.tokenize(t5xxl_erase_token) ["t5xxl"] tokens_erase_token ["llama"] = clip.tokenize(llama_erase_token) ["llama"] tokens_replace_token["t5xxl"] = clip.tokenize(t5xxl_replace_token)["t5xxl"] tokens_replace_token["llama"] = clip.tokenize(llama_replace_token)["llama"] encoded_erase = clip.encode_from_tokens_scheduled(tokens_erase) encoded_replace = clip.encode_from_tokens_scheduled(tokens_replace) return (encoded_replace, encoded_erase, ) class CrossAttn_EraseReplace_Flux: @classmethod def INPUT_TYPES(s): return {"required": { "clip": ("CLIP", ), "t5xxl_erase": ("STRING", {"multiline": True, "dynamicPrompts": True}), "t5xxl_replace": ("STRING", {"multiline": True, "dynamicPrompts": True}), "t5xxl_erase_token": ("STRING", {"multiline": True, "dynamicPrompts": True}), "t5xxl_replace_token": ("STRING", {"multiline": True, "dynamicPrompts": True}), }} RETURN_TYPES = ("CONDITIONING","CONDITIONING",) RETURN_NAMES = ("positive", "negative",) FUNCTION = "encode" CATEGORY = "advanced/conditioning" EXPERIMENTAL = True def encode(self, clip, t5xxl_erase, llama_erase, t5xxl_replace, llama_replace, t5xxl_erase_token, llama_erase_token, t5xxl_replace_token, llama_replace_token): tokens_erase = clip.tokenize("") tokens_erase["l"] = clip.tokenize("")["l"] tokens_replace = clip.tokenize("") tokens_replace["l"] = clip.tokenize("")["l"] tokens_erase ["t5xxl"] = clip.tokenize(t5xxl_erase) ["t5xxl"] tokens_erase ["llama"] = clip.tokenize(llama_erase) ["llama"] tokens_replace["t5xxl"] = clip.tokenize(t5xxl_replace)["t5xxl"] tokens_replace["llama"] = clip.tokenize(llama_replace)["llama"] tokens_erase_token = clip.tokenize("") tokens_erase_token["l"] = clip.tokenize("")["l"] tokens_replace_token = clip.tokenize("") tokens_replace_token["l"] = clip.tokenize("")["l"] tokens_erase_token ["t5xxl"] = clip.tokenize(t5xxl_erase_token) ["t5xxl"] tokens_erase_token ["llama"] = clip.tokenize(llama_erase_token) ["llama"] tokens_replace_token["t5xxl"] = clip.tokenize(t5xxl_replace_token)["t5xxl"] tokens_replace_token["llama"] = clip.tokenize(llama_replace_token)["llama"] encoded_erase = clip.encode_from_tokens_scheduled(tokens_erase) encoded_replace = clip.encode_from_tokens_scheduled(tokens_replace) return (encoded_replace, encoded_erase, ) ================================================ FILE: example_workflows/chroma regional antiblur.json ================================================ {"last_node_id":726,"last_link_id":2104,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":2098}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":2099}],"outputs":[{"name":"","type":"CLIP","links":[1939,2092,2101],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":2100}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":21,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":701,"type":"Note","pos":[80,-520],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[-210,-520],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":676,"type":"InvertMask","pos":[20,-370],"size":[142.42074584960938,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2073}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2083],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":7,"type":"VAEEncodeAdvanced","pos":[719.6110229492188,16.752899169921875],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":16,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":710,"type":"MaskPreview","pos":[180,-190],"size":[210,246],"flags":{},"order":17,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":20,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":715,"type":"SolidMask","pos":[-220,-370],"size":[210,106],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2073],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1024,1024]},{"id":716,"type":"SolidMask","pos":[-220,-220],"size":[210,106],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2065],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,384,864]},{"id":709,"type":"MaskComposite","pos":[190,-370],"size":[210,126],"flags":{},"order":11,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2083},{"name":"source","localized_name":"source","type":"MASK","link":2065}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2091],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[256,160,"add"]},{"id":704,"type":"Note","pos":[101.74818420410156,112.67951965332031],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":703,"type":"Note","pos":[423.10699462890625,-96.14085388183594],"size":[241.9689483642578,386.7543640136719],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask.\n\nboolean_masked means that the masked area can \"see\" the rest of the image, but the unmasked area cannot. \"boolean\" would mean neither area could see the rest of the image.\n\nTry setting to boolean_unmasked and see what happens!\n\nIf you still have blur, try reducing edge_width (and if you have seams, try increasing it, or setting end_step to something like 20). \n\nAlso verify that you can generate the background prompt alone without blur (if you can't, this won't work). And don't get stuck on one seed.\n\nVaguely human-shaped masks also tend to work better than the blocky one used here."],"color":"#432","bgcolor":"#653"},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2104},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2102},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,4,3,"fixed","standard",true]},{"id":723,"type":"CLIPTextEncode","pos":[460,-240],"size":[210,88],"flags":{"collapsed":false},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2092}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2093],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a college campus"]},{"id":662,"type":"CLIPTextEncode","pos":[460,-370],"size":[210,88],"flags":{"collapsed":false},"order":13,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2094],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat"]},{"id":724,"type":"ClownModelLoader","pos":[615.2467651367188,-699.0204467773438],"size":[361.6804504394531,266],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2097],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2099],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2100],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["chroma-unlocked-v29.5.safetensors","fp8_e4m3fn_fast","t5xxl_fp8_e4m3fn_scaled.safetensors",".none",".none",".none","chroma","ae.sft"]},{"id":725,"type":"ReChromaPatcher","pos":[1030.2850341796875,-698.6190795898438],"size":[210,82],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2097}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2098],"slot_index":0}],"properties":{"Node name for S&R":"ReChromaPatcher"},"widgets_values":["float64",true]},{"id":726,"type":"CLIPTextEncode","pos":[772.4685668945312,350.9657897949219],"size":[210,88],"flags":{"collapsed":false},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2101}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2102],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["low quality, bad quality, mutated, low detail, blurry, out of focus, jpeg artifacts"]},{"id":722,"type":"ClownRegionalConditioning2","pos":[690,-370],"size":[287.75750732421875,330],"flags":{},"order":18,"mode":0,"inputs":[{"name":"conditioning_masked","localized_name":"conditioning_masked","type":"CONDITIONING","shape":7,"link":2094},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":2093},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2091},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[2104],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning2"},"widgets_values":[1,0,0,"constant",0,10,"boolean_masked",32,false]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1967,13,0,401,0,"MODEL"],[2054,709,0,710,0,"MASK"],[2065,716,0,709,1,"MASK"],[2073,715,0,676,0,"MASK"],[2083,676,0,709,0,"MASK"],[2091,709,0,722,2,"MASK"],[2092,490,0,723,0,"CLIP"],[2093,723,0,722,1,"CONDITIONING"],[2094,662,0,722,0,"CONDITIONING"],[2096,401,0,397,0,"LATENT"],[2097,724,0,725,0,"MODEL"],[2098,725,0,13,0,"*"],[2099,724,1,490,0,"*"],[2100,724,2,14,0,"*"],[2101,490,0,726,0,"CLIP"],[2102,726,0,401,2,"CONDITIONING"],[2104,722,0,401,1,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.5863092971715371,"offset":[2215.7489179851177,830.3089944212893]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/chroma txt2img.json ================================================ {"last_node_id":727,"last_link_id":2113,"nodes":[{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":6,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":5,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":2112}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":4,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2108},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2107},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2102},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2113},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,4,3,"fixed","standard",true]},{"id":662,"type":"CLIPTextEncode","pos":[770.2921752929688,-373.6678771972656],"size":[210,88],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2109}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2107],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat"]},{"id":726,"type":"CLIPTextEncode","pos":[772.46923828125,-238.8079376220703],"size":[210,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2110}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2102],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["low quality, bad quality, mutated, low detail, blurry, out of focus, jpeg artifacts"]},{"id":727,"type":"EmptyLatentImage","pos":[771.9976196289062,-98.32988739013672],"size":[213.03683471679688,106],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[2113],"slot_index":0}],"properties":{"Node name for S&R":"EmptyLatentImage"},"widgets_values":[1024,1024,1]},{"id":724,"type":"ClownModelLoader","pos":[380.5105285644531,-376.99224853515625],"size":[361.6804504394531,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2108],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2109,2110],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2112],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["chroma-unlocked-v37-detail-calibrated.safetensors","fp8_e4m3fn_fast","t5xxl_fp8_e4m3fn_scaled.safetensors",".none",".none",".none","chroma","ae.sft"]}],"links":[[1329,397,0,398,0,"IMAGE"],[2096,401,0,397,0,"LATENT"],[2102,726,0,401,2,"CONDITIONING"],[2107,662,0,401,1,"CONDITIONING"],[2108,724,0,401,0,"MODEL"],[2109,724,1,662,0,"CLIP"],[2110,724,1,726,0,"CLIP"],[2112,724,2,397,1,"VAE"],[2113,727,0,401,3,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.5863092971715371,"offset":[1675.8567061174099,917.6014919421251]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/comparison ksampler vs csksampler chain workflows.json ================================================ {"last_node_id":1423,"last_link_id":3992,"nodes":[{"id":13,"type":"Reroute","pos":[17750,830],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":3988}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":402,"type":"QuadrupleCLIPLoader","pos":[17300,870],"size":[407.7720031738281,130],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[1552],"slot_index":0}],"properties":{"Node name for S&R":"QuadrupleCLIPLoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors"]},{"id":403,"type":"UNETLoader","pos":[17390,740],"size":[320.7802429199219,82],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[3988],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn"]},{"id":404,"type":"VAELoader","pos":[17500,1060],"size":[210,58],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[1344],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ae.sft"]},{"id":1381,"type":"Reroute","pos":[18770,-310],"size":[75,26],"flags":{},"order":23,"mode":0,"inputs":[{"name":"","type":"*","link":3961}],"outputs":[{"name":"","type":"CONDITIONING","links":[3881]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1383,"type":"Reroute","pos":[18770,-420],"size":[75,26],"flags":{},"order":27,"mode":0,"inputs":[{"name":"","type":"*","link":3877}],"outputs":[{"name":"","type":"MODEL","links":[3879],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1388,"type":"Reroute","pos":[18750,410],"size":[75,26],"flags":{},"order":28,"mode":0,"inputs":[{"name":"","type":"*","link":3886}],"outputs":[{"name":"","type":"MODEL","links":[3887,3891,3896,3901],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1393,"type":"SaveImage","pos":[20400,450],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":51,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3908}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1399,"type":"Reroute","pos":[18790,1920],"size":[75,26],"flags":{},"order":22,"mode":0,"inputs":[{"name":"","type":"*","link":3967}],"outputs":[{"name":"","type":"CONDITIONING","links":[3925,3933]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1401,"type":"Reroute","pos":[18780,1870],"size":[75,26],"flags":{},"order":30,"mode":0,"inputs":[{"name":"","type":"*","link":3916}],"outputs":[{"name":"","type":"MODEL","links":[3924,3931,3932],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1408,"type":"FlipSigmas","pos":[19150,2270],"size":[140,26],"flags":{},"order":42,"mode":0,"inputs":[{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","link":3941}],"outputs":[{"name":"SIGMAS","localized_name":"SIGMAS","type":"SIGMAS","links":[3929]}],"properties":{"Node name for S&R":"FlipSigmas"},"widgets_values":[]},{"id":1394,"type":"SamplerCustom","pos":[18940,1910],"size":[253.52972412109375,230],"flags":{},"order":46,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3924},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3925},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3926},{"name":"sampler","localized_name":"sampler","type":"SAMPLER","link":3928},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","link":3929},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3979}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3938],"slot_index":0},{"name":"denoised_output","localized_name":"denoised_output","type":"LATENT","links":null}],"properties":{"Node name for S&R":"SamplerCustom"},"widgets_values":[false,0,"fixed",1]},{"id":1411,"type":"SplitSigmas","pos":[19030,2350],"size":[210,78],"flags":{},"order":38,"mode":0,"inputs":[{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","link":3940}],"outputs":[{"name":"high_sigmas","localized_name":"high_sigmas","type":"SIGMAS","links":null},{"name":"low_sigmas","localized_name":"low_sigmas","type":"SIGMAS","links":[3941,3942],"slot_index":1}],"properties":{"Node name for S&R":"SplitSigmas"},"widgets_values":[15]},{"id":1409,"type":"BetaSamplingScheduler","pos":[18780,2360],"size":[210,106],"flags":{},"order":34,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3931}],"outputs":[{"name":"SIGMAS","localized_name":"SIGMAS","type":"SIGMAS","links":[3940],"slot_index":0}],"properties":{"Node name for S&R":"BetaSamplingScheduler"},"widgets_values":[30,0.5,0.7]},{"id":1407,"type":"KSamplerSelect","pos":[18720,2210],"size":[210,58],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"SAMPLER","localized_name":"SAMPLER","type":"SAMPLER","links":[3928,3935]}],"properties":{"Node name for S&R":"KSamplerSelect"},"widgets_values":["euler"]},{"id":1395,"type":"Reroute","pos":[18750,1110],"size":[75,26],"flags":{},"order":21,"mode":0,"inputs":[{"name":"","type":"*","link":3965}],"outputs":[{"name":"","type":"CONDITIONING","links":[3949],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1405,"type":"VAEDecode","pos":[19650,1810],"size":[210,46],"flags":{},"order":52,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3992},{"name":"vae","localized_name":"vae","type":"VAE","link":3922}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3923],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1403,"type":"VAEDecode","pos":[19650,990],"size":[210,46],"flags":{},"order":41,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3991},{"name":"vae","localized_name":"vae","type":"VAE","link":3919}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3920],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1263,"type":"VAEDecode","pos":[20410,-500],"size":[210,46],"flags":{},"order":47,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3989},{"name":"vae","localized_name":"vae","type":"VAE","link":3429}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3430],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":490,"type":"Reroute","pos":[17750,870],"size":[75,26],"flags":{},"order":8,"mode":0,"inputs":[{"name":"","type":"*","link":1552}],"outputs":[{"name":"","type":"CLIP","links":[3959,3960],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1385,"type":"Reroute","pos":[18750,520],"size":[75,26],"flags":{},"order":24,"mode":0,"inputs":[{"name":"","type":"*","link":3964}],"outputs":[{"name":"","type":"CONDITIONING","links":[3889,3893,3898,3903],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1415,"type":"CLIPTextEncode","pos":[17860,1070],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3960}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3961,3964,3966,3968],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":1414,"type":"CLIPTextEncode","pos":[17860,870],"size":[271.3465270996094,126.98572540283203],"flags":{},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3959}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3962,3963,3965,3967],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a photo of a doghead cannibal holding a sign that says \"the clown jumped the shark\" in a landfill at night"]},{"id":1397,"type":"Reroute","pos":[18750,1060],"size":[75,26],"flags":{},"order":29,"mode":0,"inputs":[{"name":"","type":"*","link":3912}],"outputs":[{"name":"","type":"MODEL","links":[3948],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1402,"type":"Reroute","pos":[18780,1980],"size":[75,26],"flags":{},"order":26,"mode":0,"inputs":[{"name":"","type":"*","link":3968}],"outputs":[{"name":"","type":"CONDITIONING","links":[3926,3934],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1417,"type":"LoadImage","pos":[18263.712890625,1364.093017578125],"size":[315,314],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3973]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["00107-496528661.png","image"]},{"id":1420,"type":"VAEEncode","pos":[18710,2080],"size":[140,46],"flags":{},"order":18,"mode":0,"inputs":[{"name":"pixels","localized_name":"pixels","type":"IMAGE","link":3977},{"name":"vae","localized_name":"vae","type":"VAE","link":3980}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3979],"slot_index":0}],"properties":{"Node name for S&R":"VAEEncode"},"widgets_values":[]},{"id":1419,"type":"ImageResize+","pos":[18460,2080],"size":[210,218],"flags":{},"order":11,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3976}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3977],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[1024,1024,"bicubic","stretch","always",0]},{"id":14,"type":"Reroute","pos":[17750,910],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":1344}],"outputs":[{"name":"","type":"VAE","links":[3429,3907,3919,3922,3969,3980],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1418,"type":"LoadImage","pos":[18120,2080],"size":[315,314],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3976],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["00107-496528661.png","image"]},{"id":1398,"type":"Reroute","pos":[18750,1160],"size":[75,26],"flags":{},"order":25,"mode":0,"inputs":[{"name":"","type":"*","link":3966}],"outputs":[{"name":"","type":"CONDITIONING","links":[3950],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1416,"type":"VAEEncodeAdvanced","pos":[18620,1370],"size":[253.78292846679688,278],"flags":{},"order":17,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3973},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":3969}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[3975],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":1423,"type":"FluxLoader","pos":[16942.298828125,795.814208984375],"size":[315,282],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null},{"name":"clip","localized_name":"clip","type":"CLIP","links":null},{"name":"vae","localized_name":"vae","type":"VAE","links":null},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":null},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":null}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae",".none",".none"]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[17868.26953125,666.623046875],"size":[260.3999938964844,126],"flags":{},"order":16,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3987}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3877,3886,3912,3916],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":1422,"type":"EmptyLatentImage","pos":[17486.916015625,540.6340942382812],"size":[315,106],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3985,3986,3987],"slot_index":0}],"properties":{"Node name for S&R":"EmptyLatentImage"},"widgets_values":[1024,1024,1]},{"id":1380,"type":"Reroute","pos":[18768.1875,-255.9905242919922],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":3985}],"outputs":[{"name":"","type":"LATENT","links":[3882],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1382,"type":"Reroute","pos":[18769.365234375,-367.63720703125],"size":[75,26],"flags":{},"order":19,"mode":0,"inputs":[{"name":"","type":"*","link":3962}],"outputs":[{"name":"","type":"CONDITIONING","links":[3880]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1386,"type":"Reroute","pos":[18750.548828125,467.08831787109375],"size":[75,26],"flags":{},"order":20,"mode":0,"inputs":[{"name":"","type":"*","link":3963}],"outputs":[{"name":"","type":"CONDITIONING","links":[3888,3892,3897,3902]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1387,"type":"Reroute","pos":[18747.00390625,569.2838745117188],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":3986}],"outputs":[{"name":"","type":"LATENT","links":[3890]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1264,"type":"SaveImage","pos":[20410,-410],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":50,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3430}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1392,"type":"VAEDecode","pos":[20400,360],"size":[210,46],"flags":{},"order":48,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3990},{"name":"vae","localized_name":"vae","type":"VAE","link":3907}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3908],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1410,"type":"SamplerCustom","pos":[19300,1900],"size":[272.0888977050781,230],"flags":{},"order":49,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3932},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3933},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3934},{"name":"sampler","localized_name":"sampler","type":"SAMPLER","link":3935},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","link":3942},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3938}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3992],"slot_index":0},{"name":"denoised_output","localized_name":"denoised_output","type":"LATENT","links":null}],"properties":{"Node name for S&R":"SamplerCustom"},"widgets_values":[false,0,"fixed",4]},{"id":1261,"type":"ClownsharKSampler_Beta","pos":[18944.17578125,-390],"size":[283.8435974121094,418],"flags":{},"order":31,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3879},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3880},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3881},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3882},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3427],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","beta57",30,5,1,4,0,"fixed","standard",true]},{"id":1262,"type":"ClownsharkChainsampler_Beta","pos":[19310.083984375,-402.36279296875],"size":[285.8560485839844,298],"flags":{},"order":35,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3427},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3435],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",5,4,"resample",true]},{"id":1266,"type":"ClownsharkChainsampler_Beta","pos":[19679.115234375,-407.62518310546875],"size":[269.3165283203125,298],"flags":{},"order":39,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3435},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3436],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",5,4,"resample",true]},{"id":1265,"type":"ClownsharkChainsampler_Beta","pos":[20054.2421875,-408.6135559082031],"size":[271.6801452636719,298],"flags":{},"order":43,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3436},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3989],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",-1,4,"resample",true]},{"id":1384,"type":"KSamplerAdvanced","pos":[18936.240234375,444.8757019042969],"size":[278.3764343261719,334],"flags":{},"order":32,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3887},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3888},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3889},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3890}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3895],"slot_index":0}],"properties":{"Node name for S&R":"KSamplerAdvanced"},"widgets_values":["enable",0,"fixed",30,4,"euler","beta57",0,5,"enable"]},{"id":1391,"type":"KSamplerAdvanced","pos":[20044.978515625,449.22869873046875],"size":[278.3769226074219,334],"flags":{},"order":44,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3901},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3902},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3903},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3905}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3990],"slot_index":0}],"properties":{"Node name for S&R":"KSamplerAdvanced"},"widgets_values":["disable",15,"fixed",30,4,"euler","beta57",15,10000,"disable"]},{"id":1390,"type":"KSamplerAdvanced","pos":[19672.99609375,448.818603515625],"size":[273.651123046875,334],"flags":{},"order":40,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3896},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3897},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3898},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3900}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3905],"slot_index":0}],"properties":{"Node name for S&R":"KSamplerAdvanced"},"widgets_values":["disable",10,"fixed",30,4,"euler","beta57",10,15,"enable"]},{"id":1389,"type":"KSamplerAdvanced","pos":[19308.921875,451.14801025390625],"size":[273.652099609375,334],"flags":{},"order":36,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3891},{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":3892},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":3893},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":3895}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3900],"slot_index":0}],"properties":{"Node name for S&R":"KSamplerAdvanced"},"widgets_values":["disable",5,"fixed",30,4,"euler","beta57",5,10,"enable"]},{"id":1413,"type":"ClownsharkChainsampler_Beta","pos":[19294.095703125,1089.451171875],"size":[275.2236328125,298],"flags":{},"order":37,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3947},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3991],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,4,"resample",true]},{"id":1412,"type":"ClownsharKSampler_Beta","pos":[18922.447265625,1091.1812744140625],"size":[281.48095703125,418],"flags":{},"order":33,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3948},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3949},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3950},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3975},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3947],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","beta57",30,15,1,1,0,"fixed","unsample",true]},{"id":1406,"type":"SaveImage","pos":[19650,1900],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":53,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3923}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1404,"type":"SaveImage","pos":[19650,1080],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":45,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3920}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]}],"links":[[1344,404,0,14,0,"*"],[1395,13,0,431,0,"MODEL"],[1552,402,0,490,0,"*"],[3427,1261,0,1262,4,"LATENT"],[3429,14,0,1263,1,"VAE"],[3430,1263,0,1264,0,"IMAGE"],[3435,1262,0,1266,4,"LATENT"],[3436,1266,0,1265,4,"LATENT"],[3877,431,0,1383,0,"*"],[3879,1383,0,1261,0,"MODEL"],[3880,1382,0,1261,1,"CONDITIONING"],[3881,1381,0,1261,2,"CONDITIONING"],[3882,1380,0,1261,3,"LATENT"],[3886,431,0,1388,0,"*"],[3887,1388,0,1384,0,"MODEL"],[3888,1386,0,1384,1,"CONDITIONING"],[3889,1385,0,1384,2,"CONDITIONING"],[3890,1387,0,1384,3,"LATENT"],[3891,1388,0,1389,0,"MODEL"],[3892,1386,0,1389,1,"CONDITIONING"],[3893,1385,0,1389,2,"CONDITIONING"],[3895,1384,0,1389,3,"LATENT"],[3896,1388,0,1390,0,"MODEL"],[3897,1386,0,1390,1,"CONDITIONING"],[3898,1385,0,1390,2,"CONDITIONING"],[3900,1389,0,1390,3,"LATENT"],[3901,1388,0,1391,0,"MODEL"],[3902,1386,0,1391,1,"CONDITIONING"],[3903,1385,0,1391,2,"CONDITIONING"],[3905,1390,0,1391,3,"LATENT"],[3907,14,0,1392,1,"VAE"],[3908,1392,0,1393,0,"IMAGE"],[3912,431,0,1397,0,"*"],[3916,431,0,1401,0,"*"],[3919,14,0,1403,1,"VAE"],[3920,1403,0,1404,0,"IMAGE"],[3922,14,0,1405,1,"VAE"],[3923,1405,0,1406,0,"IMAGE"],[3924,1401,0,1394,0,"MODEL"],[3925,1399,0,1394,1,"CONDITIONING"],[3926,1402,0,1394,2,"CONDITIONING"],[3928,1407,0,1394,3,"SAMPLER"],[3929,1408,0,1394,4,"SIGMAS"],[3931,1401,0,1409,0,"MODEL"],[3932,1401,0,1410,0,"MODEL"],[3933,1399,0,1410,1,"CONDITIONING"],[3934,1402,0,1410,2,"CONDITIONING"],[3935,1407,0,1410,3,"SAMPLER"],[3938,1394,0,1410,5,"LATENT"],[3940,1409,0,1411,0,"SIGMAS"],[3941,1411,1,1408,0,"SIGMAS"],[3942,1411,1,1410,4,"SIGMAS"],[3947,1412,0,1413,4,"LATENT"],[3948,1397,0,1412,0,"MODEL"],[3949,1395,0,1412,1,"CONDITIONING"],[3950,1398,0,1412,2,"CONDITIONING"],[3959,490,0,1414,0,"CLIP"],[3960,490,0,1415,0,"CLIP"],[3961,1415,0,1381,0,"*"],[3962,1414,0,1382,0,"*"],[3963,1414,0,1386,0,"*"],[3964,1415,0,1385,0,"*"],[3965,1414,0,1395,0,"*"],[3966,1415,0,1398,0,"*"],[3967,1414,0,1399,0,"*"],[3968,1415,0,1402,0,"*"],[3969,14,0,1416,4,"VAE"],[3973,1417,0,1416,0,"IMAGE"],[3975,1416,0,1412,3,"LATENT"],[3976,1418,0,1419,0,"IMAGE"],[3977,1419,0,1420,0,"IMAGE"],[3979,1420,0,1394,5,"LATENT"],[3980,14,0,1420,1,"VAE"],[3985,1422,0,1380,0,"*"],[3986,1422,0,1387,0,"*"],[3987,1422,0,431,1,"LATENT"],[3988,403,0,13,0,"*"],[3989,1265,0,1263,0,"LATENT"],[3990,1391,0,1392,0,"LATENT"],[3991,1413,0,1403,0,"LATENT"],[3992,1410,0,1405,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":0.9849732675807865,"offset":[-14560.618477888858,-446.28944651783576]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux faceswap sync pulid.json ================================================ {"last_node_id":1741,"last_link_id":6622,"nodes":[{"id":490,"type":"Reroute","pos":[-1346.8087158203125,-823.3269653320312],"size":[75,26],"flags":{},"order":39,"mode":0,"inputs":[{"name":"","type":"*","link":6398}],"outputs":[{"name":"","type":"CLIP","links":[4157,6103],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1162,"type":"Reroute","pos":[1930.0975341796875,-817.45556640625],"size":[75,26],"flags":{},"order":78,"mode":0,"inputs":[{"name":"","type":"*","link":4185}],"outputs":[{"name":"","type":"IMAGE","links":[4186],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":744,"type":"SaveImage","pos":[1276.456787109375,-719.9273681640625],"size":[424.53594970703125,455.0760192871094],"flags":{},"order":72,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2241}],"outputs":[],"title":"Save Patch","properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"],"color":"#332922","bgcolor":"#593930"},{"id":1022,"type":"ImageBlend","pos":[2313.7607421875,-792.44091796875],"size":[210,102],"flags":{"collapsed":true},"order":73,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3568},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3570}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3569],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":729,"type":"SetImageSize","pos":[-812.6932373046875,-86.24114227294922],"size":[210,102],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[2104,2108,4998],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[2105,2109,4999],"slot_index":1}],"title":"Inpaint Tile Size","properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1024,1024]},{"id":1161,"type":"Image Save","pos":[2186.75634765625,-722.2388916015625],"size":[351.4677734375,796.8805541992188],"flags":{},"order":79,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":4186}],"outputs":[{"name":"images","localized_name":"images","type":"IMAGE","links":null},{"name":"files","localized_name":"files","type":"STRING","links":null}],"properties":{"Node name for S&R":"Image Save"},"widgets_values":["[time(%Y-%m-%d)]","ComfyUI","_",4,"false","jpeg",300,100,"true","false","false","false","true","true","true"],"color":"#232","bgcolor":"#353"},{"id":1024,"type":"PreviewImage","pos":[1286.05859375,-198.6599884033203],"size":[413.7582092285156,445.8081359863281],"flags":{},"order":76,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3569}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[],"color":"#332922","bgcolor":"#593930"},{"id":758,"type":"ImageResize+","pos":[1468.4384765625,-790.391845703125],"size":[210,218],"flags":{"collapsed":true},"order":71,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":2201},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":2204},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":2205}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2198],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":1369,"type":"ImageResize+","pos":[2183.37109375,151.09762573242188],"size":[210,218],"flags":{"collapsed":true},"order":44,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":4996},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":4998},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":4999}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[5000],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":1407,"type":"Reroute","pos":[-914.50390625,-361.0196533203125],"size":[75,26],"flags":{},"order":37,"mode":0,"inputs":[{"name":"","type":"*","link":6620}],"outputs":[{"name":"","type":"MASK","links":[5021],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":725,"type":"Reroute","pos":[-914.8554077148438,-440.6482238769531],"size":[75,26],"flags":{},"order":36,"mode":0,"inputs":[{"name":"","type":"*","link":6619}],"outputs":[{"name":"","type":"IMAGE","links":[2210,2211,5054],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[-1346.8087158203125,-783.3269653320312],"size":[75,26],"flags":{},"order":35,"mode":0,"inputs":[{"name":"","type":"*","link":5447}],"outputs":[{"name":"","type":"VAE","links":[2153,3508],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1667,"type":"GrowMask","pos":[-302.060302734375,-164.22067260742188],"size":[210,82],"flags":{},"order":53,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":6360}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6361],"slot_index":0}],"properties":{"Node name for S&R":"GrowMask"},"widgets_values":[-10,false]},{"id":1039,"type":"ImageBlend","pos":[-769.9498901367188,220.86917114257812],"size":[210,102],"flags":{"collapsed":true},"order":50,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3606},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3605}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3607],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":731,"type":"SimpleMath+","pos":[-776.4415893554688,126.82145690917969],"size":[315,98],"flags":{"collapsed":true},"order":33,"mode":0,"inputs":[{"name":"a","localized_name":"a","type":"*","shape":7,"link":2108},{"name":"b","localized_name":"b","type":"*","shape":7,"link":2109},{"name":"c","localized_name":"c","type":"*","shape":7,"link":null}],"outputs":[{"name":"INT","localized_name":"INT","type":"INT","links":null},{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[2100],"slot_index":1}],"properties":{"Node name for S&R":"SimpleMath+"},"widgets_values":["a/b"]},{"id":728,"type":"MaskToImage","pos":[-791.0198364257812,176.82147216796875],"size":[176.39999389648438,26],"flags":{"collapsed":true},"order":45,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2106}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2103,3605],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":765,"type":"MaskToImage","pos":[2080.868896484375,-792.6943359375],"size":[182.28543090820312,26],"flags":{"collapsed":true},"order":46,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":5529}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3570],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":761,"type":"Image Comparer (rgthree)","pos":[1747.432373046875,-712.1251220703125],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":77,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2210},{"name":"image_b","type":"IMAGE","dir":3,"link":2200}],"outputs":[],"title":"Compare Output","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lonqd_00061_.png&type=temp&subfolder=&rand=0.1196562401371497"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lonqd_00062_.png&type=temp&subfolder=&rand=0.958614793318614"}]],"color":"#232","bgcolor":"#353"},{"id":1569,"type":"ClownGuides_Sync_Advanced","pos":[261.355224609375,-1000.5784912109375],"size":[315,1938],"flags":{"collapsed":true},"order":56,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":6201},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":6202},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6223},{"name":"mask_sync","localized_name":"mask_sync","type":"MASK","shape":7,"link":6224},{"name":"mask_drift_x","localized_name":"mask_drift_x","type":"MASK","shape":7,"link":6225},{"name":"mask_drift_y","localized_name":"mask_drift_y","type":"MASK","shape":7,"link":6226},{"name":"mask_lure_x","localized_name":"mask_lure_x","type":"MASK","shape":7,"link":6227},{"name":"mask_lure_y","localized_name":"mask_lure_y","type":"MASK","shape":7,"link":6228},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_masked","localized_name":"syncs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_unmasked","localized_name":"syncs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_masked","localized_name":"drift_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_unmasked","localized_name":"drift_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_masked","localized_name":"drift_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_unmasked","localized_name":"drift_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_masked","localized_name":"lure_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_unmasked","localized_name":"lure_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_masked","localized_name":"lure_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_unmasked","localized_name":"lure_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_x_data","type":"FLOAT","pos":[10,800],"widget":{"name":"drift_x_data"},"link":6239},{"name":"drift_y_guide","type":"FLOAT","pos":[10,1088],"widget":{"name":"drift_y_guide"},"link":6240},{"name":"sync_masked","type":"FLOAT","pos":[10,608],"widget":{"name":"sync_masked"},"link":6241}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6411],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Sync_Advanced"},"widgets_values":[1,1,"constant","constant",0,0,-1,-1,0,1,"constant","constant",0,0,-1,-1,0.2,0,1,0,"constant","constant",0,0,-1,-1,0,0,0.2,1,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,"y -> x",false,false,false,false,false,false]},{"id":1571,"type":"Reroute","pos":[141.35520935058594,-1030.5784912109375],"size":[75,26],"flags":{},"order":52,"mode":0,"inputs":[{"name":"","type":"*","link":6222}],"outputs":[{"name":"","type":"MASK","links":[6223,6224,6225,6226,6227,6228,6342,6584],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1368,"type":"Image Comparer (rgthree)","pos":[1744.9150390625,-199.16920471191406],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":74,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":4997},{"name":"image_b","type":"IMAGE","dir":3,"link":5000}],"outputs":[],"title":"Compare Patch","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_fyekd_00061_.png&type=temp&subfolder=&rand=0.6117808776963016"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_fyekd_00062_.png&type=temp&subfolder=&rand=0.2735573488508416"}]],"color":"#232","bgcolor":"#353"},{"id":1673,"type":"Note","pos":[1824.9287109375,-1010.687744140625],"size":[322.34954833984375,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Preview of first stage output: sometimes it can be worth manually (or automatically, using DINO, etc.) adjusting your mask for the second stage, based on this output."],"color":"#432","bgcolor":"#653"},{"id":1539,"type":"GrowMask","pos":[573.4215698242188,-1145.86767578125],"size":[214.5684051513672,82],"flags":{},"order":57,"mode":4,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":6342}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6343,6344,6345,6346,6347,6348],"slot_index":0}],"properties":{"Node name for S&R":"GrowMask"},"widgets_values":[10,false]},{"id":1383,"type":"Note","pos":[216.7359161376953,340.25775146484375],"size":[291.67218017578125,232.2296142578125],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["eta > 0.0 means you are using SDE/ancestral sampling. With this guide mode you will generally want to use bongmath = true.\n\nSamplers such as res_2s and res_3s will be very accurate. Try res_5s and res_8s if you really want to go crazy with it. They run 2x (2s), 3x (3s), etc slower than Euler.\n\nres_2m and 3m will be fast and also good, and run at the same speed as Euler.\n\neta_substep will increase the power of bongmath. If it is set to 0.0, you can turn bongmath off without any effect."],"color":"#432","bgcolor":"#653"},{"id":1380,"type":"Note","pos":[544.9375610351562,342.0576477050781],"size":[290.1026611328125,231.5842742919922],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Setting denoise to a negative value is equivalent to just scaling it. For example:\n\nDenoise = -0.90 is the same as multiplying every sigma value in the entire schedule by 0.9.\n\nI find this is a lot easier to control than the regular denoise scale. The difference between -0.95 and -0.9 is much more predictable than with 0.95 and 0.9. Most of us have seen how different denoise 0.8 might be with Karras vs. exponential. \n\nTry a denoise between -0.95 and -0.85. "],"color":"#432","bgcolor":"#653"},{"id":759,"type":"ImageCompositeMasked","pos":[1697.19140625,-790.8740844726562],"size":[210,186],"flags":{"collapsed":true},"order":75,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"IMAGE","link":2211},{"name":"source","localized_name":"source","type":"IMAGE","link":2198},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6447},{"name":"x","type":"INT","pos":[10,76],"widget":{"name":"x"},"link":2206},{"name":"y","type":"INT","pos":[10,100],"widget":{"name":"y"},"link":2207}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2200,4185],"slot_index":0}],"properties":{"Node name for S&R":"ImageCompositeMasked"},"widgets_values":[712,800,false]},{"id":1687,"type":"Note","pos":[-101.33948516845703,339.7750244140625],"size":[286.97723388671875,180.28128051757812],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The cycles node causes the connected sampler to loop between sampling and unsampling steps. (Unsampling is running the sampler backwards, where it predicts the noise that would lead to a given output).\n\nWhen unsample_eta is set to -1, it simply uses the same settings for eta as in the connected node. "],"color":"#432","bgcolor":"#653"},{"id":745,"type":"VAEDecode","pos":[1297.53369140625,-791.137939453125],"size":[140,46],"flags":{"collapsed":true},"order":70,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":6478},{"name":"vae","localized_name":"vae","type":"VAE","link":2153}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2201,2241,3568,4997],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1678,"type":"Note","pos":[-422.92510986328125,-333.6911926269531],"size":[324.0018005371094,113.63665771484375],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ReduxAdvanced is used to help get things on track. Bypass if you're having problems with it disrupting character likeness.\n\nThe SDE Mask ensures SDE noise is used only in the masked area, limiting change in unmasked areas that could lead to seams. "],"color":"#432","bgcolor":"#653"},{"id":1572,"type":"ClownGuides_Sync_Advanced","pos":[581.355224609375,-1000.5784912109375],"size":[315,1878],"flags":{"collapsed":true},"order":62,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":6229},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":6230},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6343},{"name":"mask_sync","localized_name":"mask_sync","type":"MASK","shape":7,"link":6344},{"name":"mask_drift_x","localized_name":"mask_drift_x","type":"MASK","shape":7,"link":6345},{"name":"mask_drift_y","localized_name":"mask_drift_y","type":"MASK","shape":7,"link":6346},{"name":"mask_lure_x","localized_name":"mask_lure_x","type":"MASK","shape":7,"link":6347},{"name":"mask_lure_y","localized_name":"mask_lure_y","type":"MASK","shape":7,"link":6348},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_masked","localized_name":"syncs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_unmasked","localized_name":"syncs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_masked","localized_name":"drift_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_unmasked","localized_name":"drift_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_masked","localized_name":"drift_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_unmasked","localized_name":"drift_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_masked","localized_name":"lure_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_unmasked","localized_name":"lure_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_masked","localized_name":"lure_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_unmasked","localized_name":"lure_ys_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6414],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Sync_Advanced"},"widgets_values":[0,1,"constant","constant",0,0,-1,-1,0,1,"constant","constant",0,0,-1,-1,0,0,1,0,"constant","constant",0,0,-1,-1,0,0,0,1,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,"y -> x",false,false,false,false,false,false]},{"id":1693,"type":"Note","pos":[-1535.57666015625,-641.8590087890625],"size":[276.7918701171875,88],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Padding can be very important. Some models/loras/IPadapter embeds etc. are going to respond very differently if the shot is close up vs. farther away."],"color":"#432","bgcolor":"#653"},{"id":1694,"type":"Note","pos":[-441.5133056640625,-999.14990234375],"size":[291.2616882324219,189.98562622070312],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase character likeness by: \n\nDecreasing \"Similarity\"\nIncreasing \"Drift Toward Target\"\nIncreasing cycles\nIncreasing eta (max 1.0)\nIncreasing denoise\n\nIncrease adherence to the input image by:\n\nDoing the opposite of any of the above\nIncreasing \"Drift Toward Guide\"\nEnabling the ReduxAdvanced node\n"],"color":"#432","bgcolor":"#653"},{"id":1277,"type":"SharkOptions_GuideCond_Beta","pos":[575.9444580078125,221.88970947265625],"size":[315,98],"flags":{"collapsed":true},"order":51,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":5653},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":4650},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[5493],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[1]},{"id":1040,"type":"PreviewImage","pos":[-1267.6248779296875,-30.252229690551758],"size":[304.98114013671875,265.58380126953125],"flags":{},"order":55,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3607}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1698,"type":"Note","pos":[-1623.859375,-355.951416015625],"size":[276.7918701171875,88],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Draw a mask over the face in the Load Image node. Ideally, try stopping precisely at the hairline, and just above or just below the chin."],"color":"#432","bgcolor":"#653"},{"id":1477,"type":"LoraLoader","pos":[-1684.5245361328125,-845.994140625],"size":[315,126],"flags":{},"order":34,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":5439},{"name":"clip","localized_name":"clip","type":"CLIP","link":5440}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[6397],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[6398],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["FLUX/Kirsten_Dunst_Flux_V1.safetensors",1,1]},{"id":1279,"type":"TorchCompileModels","pos":[-2086.55322265625,-1090.6181640625],"size":[285.9945068359375,179.0001983642578],"flags":{},"order":38,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6397}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[6396],"slot_index":0}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":1478,"type":"ModelSamplingAdvancedResolution","pos":[-1773.91259765625,-1030.6773681640625],"size":[260.3999938964844,126],"flags":{},"order":54,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6396},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":5442}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[6383],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":1454,"type":"ClownOptions_Cycles_Beta","pos":[-74.8967514038086,24.043270111083984],"size":[261.7955627441406,202],"flags":{},"order":9,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[6402],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[20,1,-1,"none",-1,1,true]},{"id":726,"type":"Mask Bounding Box Aspect Ratio","pos":[-828.6614990234375,-412.50946044921875],"size":[252,250],"flags":{"collapsed":false},"order":40,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","shape":7,"link":5054},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":5021},{"name":"aspect_ratio","type":"FLOAT","pos":[10,204],"widget":{"name":"aspect_ratio"},"link":2100}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2101,2102,3606,3721,4996,6543],"slot_index":0},{"name":"mask","localized_name":"mask","type":"MASK","links":[2106,5529],"slot_index":1},{"name":"mask_blurred","localized_name":"mask_blurred","type":"MASK","links":[6447],"slot_index":2},{"name":"x","localized_name":"x","type":"INT","links":[2206],"slot_index":3},{"name":"y","localized_name":"y","type":"INT","links":[2207],"slot_index":4},{"name":"width","localized_name":"width","type":"INT","links":[2204],"slot_index":5},{"name":"height","localized_name":"height","type":"INT","links":[2205],"slot_index":6}],"properties":{"Node name for S&R":"Mask Bounding Box Aspect Ratio"},"widgets_values":[100,40,1.75,false]},{"id":1702,"type":"PulidFluxInsightFaceLoader","pos":[-1150,-1080],"size":[365.4000244140625,58],"flags":{"collapsed":true},"order":10,"mode":0,"inputs":[],"outputs":[{"name":"FACEANALYSIS","localized_name":"FACEANALYSIS","type":"FACEANALYSIS","shape":3,"links":[6526],"slot_index":0}],"properties":{"Node name for S&R":"PulidFluxInsightFaceLoader"},"widgets_values":["CPU"]},{"id":1524,"type":"ReFluxPatcher","pos":[-1486.33251953125,-986.468505859375],"size":[210,82],"flags":{},"order":60,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6383}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[6547],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":13,"type":"Reroute","pos":[-1346.8087158203125,-863.3270874023438],"size":[75,26],"flags":{},"order":64,"mode":0,"inputs":[{"name":"","type":"*","link":6547}],"outputs":[{"name":"","type":"MODEL","links":[6548],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1703,"type":"PulidFluxModelLoader","pos":[-1140,-970],"size":[315,58],"flags":{"collapsed":true},"order":11,"mode":0,"inputs":[],"outputs":[{"name":"PULIDFLUX","localized_name":"PULIDFLUX","type":"PULIDFLUX","shape":3,"links":[6524],"slot_index":0}],"properties":{"Node name for S&R":"PulidFluxModelLoader"},"widgets_values":["pulid_flux_v0.9.0.safetensors"]},{"id":1688,"type":"Note","pos":[-1527.4205322265625,-1311.8199462890625],"size":[274.47601318359375,104.34856414794922],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ReFluxPatcher is required to use the \"Style\" nodes. Different \"Re...Patcher\" nodes are available for many other models, from SD1.5/SDXL to SD3.5, HiDream, AuraFlow, Chroma, WAN, and LTXV."],"color":"#432","bgcolor":"#653"},{"id":1071,"type":"CLIPVisionEncode","pos":[586.1533203125,119.24115753173828],"size":[253.60000610351562,78],"flags":{"collapsed":true},"order":43,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":6552},{"name":"image","localized_name":"image","type":"IMAGE","link":3721}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[3720],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":1073,"type":"CLIPTextEncode","pos":[575.77001953125,186.9269256591797],"size":[263.280517578125,88.73566436767578],"flags":{"collapsed":true},"order":41,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":4157}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[4650,4980],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":1476,"type":"FluxLoader","pos":[-2094.3544921875,-847.2406005859375],"size":[385.17449951171875,282],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[5439],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[5440],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[5447],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[6550,6552],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[6551,6553],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","siglip2-so400m-patch16-512.safetensors","flex1_redux_siglip2_512.safetensors"]},{"id":1716,"type":"Note","pos":[-2101.239013671875,-463.0836486816406],"size":[395.2708740234375,177.91754150390625],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["To use the 512x512 Redux models, download and place in the following paths:\n\ncomfy/models/style_models:\nhttps://huggingface.co/ostris/Flex.1-alpha-Redux/blob/main/flex1_redux_siglip2_512.safetensors\n\ncomfy/models/clip_vision:\nhttps://huggingface.co/google/siglip2-so400m-patch16-512/blob/main/model.safetensors\n\nRename the latter as siglip2-so400m-patch16-512.safetensors"],"color":"#432","bgcolor":"#653"},{"id":1701,"type":"PulidFluxEvaClipLoader","pos":[-1145.7685546875,-1024.2314453125],"size":[327.5999755859375,26],"flags":{"collapsed":true},"order":15,"mode":0,"inputs":[],"outputs":[{"name":"EVA_CLIP","localized_name":"EVA_CLIP","type":"EVA_CLIP","shape":3,"links":[6525],"slot_index":0}],"properties":{"Node name for S&R":"PulidFluxEvaClipLoader"},"widgets_values":[]},{"id":1548,"type":"ReduxAdvanced","pos":[-69.81456756591797,-498.3502502441406],"size":[248.6250457763672,234],"flags":{},"order":47,"mode":4,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":6422},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":6551},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":6550},{"name":"image","localized_name":"image","type":"IMAGE","link":6543},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[6421],"slot_index":0},{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"ReduxAdvanced"},"widgets_values":[3,"area","center crop (square)",1,0.1]},{"id":1072,"type":"StyleModelApply","pos":[596.4773559570312,153.7720947265625],"size":[262,122],"flags":{"collapsed":true},"order":48,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":4980},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":6553},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":3720}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[5653],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":1714,"type":"Note","pos":[-816.8351440429688,-725.0016479492188],"size":[252.3572998046875,162.81890869140625],"flags":{},"order":16,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The repo for PuLID Flux is currently broken, but the ReFluxPatcher node will repair the issues and make it usable. You must have ReFluxPatcher enabled to use this. Aside from that, install as instructed:\n\nhttps://github.com/balazik/ComfyUI-PuLID-Flux\n\n"],"color":"#432","bgcolor":"#653"},{"id":1575,"type":"PrimitiveFloat","pos":[11.355203628540039,-940.5784912109375],"size":[210,58],"flags":{},"order":17,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6241],"slot_index":0}],"title":"Similarity","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[1]},{"id":1573,"type":"PrimitiveFloat","pos":[10.393571853637695,-834.4251708984375],"size":[210,58],"flags":{},"order":18,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6239],"slot_index":0}],"title":"Drift Toward Target","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[0.2]},{"id":1574,"type":"PrimitiveFloat","pos":[11.355203628540039,-720.5784912109375],"size":[210,58],"flags":{},"order":19,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6240],"slot_index":0}],"title":"Drift Toward Guide","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[0.2]},{"id":727,"type":"VAEEncodeAdvanced","pos":[-789.0958862304688,67.53204345703125],"size":[262.4812927246094,298],"flags":{"collapsed":true},"order":49,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2101},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":2102},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":2103},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":3508},{"name":"width","type":"INT","pos":[10,160],"widget":{"name":"width"},"link":2104},{"name":"height","type":"INT","pos":[10,184],"widget":{"name":"height"},"link":2105}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[5373,5715,6201,6202,6229,6230,6412],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[6222,6360,6569,6570],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[5442],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[]}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1674,"type":"Note","pos":[170.8737030029297,-1390.4803466796875],"size":[322.6287841796875,128.15802001953125],"flags":{},"order":20,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Activate the style nodes if you are having issues with color, detail, light, blurriness or pixelation drifting too far from your source input.\n\nIf end_step is too high, you may get faint halos and an oversharpened look."],"color":"#432","bgcolor":"#653"},{"id":1689,"type":"Note","pos":[525.9268798828125,-1349.89794921875],"size":[263.00439453125,88],"flags":{},"order":21,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Expanding the mask for the second pass can sometimes help prevent seams."],"color":"#432","bgcolor":"#653"},{"id":1525,"type":"ClownGuide_Style_Beta","pos":[251.35520935058594,-950.5784912109375],"size":[252.0535430908203,286],"flags":{},"order":61,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":5715},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6569},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6411}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6051],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","scattersort",1,1,"constant",0,-1,false]},{"id":1672,"type":"ClownGuide_Style_Beta","pos":[561.355224609375,-950.5784912109375],"size":[252.0535430908203,286],"flags":{},"order":65,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":6412},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6570},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6414}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6415,6476],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","scattersort",1,1,"constant",0,-1,false]},{"id":1516,"type":"ClownOptions_SDE_Mask_Beta","pos":[-68.4439468383789,-163.1180877685547],"size":[252.8383331298828,126],"flags":{},"order":59,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6361},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[5776],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Mask_Beta"},"widgets_values":[1,0,false]},{"id":1731,"type":"ClownOptions_SDE_Mask_Beta","pos":[898.4906005859375,-756.2548217773438],"size":[252.8383331298828,126],"flags":{},"order":63,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6586},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[6585,6587],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Mask_Beta"},"widgets_values":[1,0,false]},{"id":1730,"type":"MaskEdge","pos":[903.2994384765625,-949.55322265625],"size":[248.64459228515625,130],"flags":{},"order":58,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":6584}],"outputs":[{"name":"edge_mask","localized_name":"edge_mask","type":"MASK","links":[6586],"slot_index":0}],"properties":{"Node name for S&R":"MaskEdge"},"widgets_values":[10,"percent",1,1]},{"id":1677,"type":"Note","pos":[-439.5185241699219,-738.3756713867188],"size":[290.3874816894531,88],"flags":{},"order":22,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Try setting both drift values to 0.0 or 0.2 as a starting point.\n"],"color":"#432","bgcolor":"#653"},{"id":1552,"type":"ClownOptions_SDE_Beta","pos":[-271.7193603515625,259.6875915527344],"size":[315,266],"flags":{"collapsed":true},"order":23,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",1,1,-1,"fixed"]},{"id":1726,"type":"ClownOptions_ImplicitSteps_Beta","pos":[-493.06549072265625,258.3205871582031],"size":[300.7710876464844,130],"flags":{"collapsed":true},"order":24,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownOptions_ImplicitSteps_Beta"},"widgets_values":["bongmath","bongmath",10,0]},{"id":1722,"type":"ClownOptions_DetailBoost_Beta","pos":[-302.6524963378906,-24.413410186767578],"size":[210.1761016845703,218],"flags":{"collapsed":false},"order":25,"mode":0,"inputs":[{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[6589,6590,6591],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_DetailBoost_Beta"},"widgets_values":[1,"model","hard",0.5,3,10]},{"id":1732,"type":"Note","pos":[890.6793823242188,-1148.8226318359375],"size":[290.3854675292969,122.62060546875],"flags":{},"order":26,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The mask below allows the SDE/ancestral noise used in the last two samplers to only hit the seams around the inpainted area.\n\nTry bypassing the SDE mask and see if you like the results - it lets the entire face be affected by noise."],"color":"#432","bgcolor":"#653"},{"id":1727,"type":"Note","pos":[-453.12371826171875,343.8135681152344],"size":[296.5935363769531,187.9747314453125],"flags":{},"order":27,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownOptions Detail gives a boost to detail a lot like the \"Detail Daemon\" node, though I think with somewhat less risk of mutations and loss of saturation. Change \"weight\", \"eta\", or \"end_step\" to control strength.\n\nImplicit steps can be used in place of \"Cycles\". Try setting steps_to_run to 3 or 4 if you use it.\n\nClownOptions SDE contains extra settings for noise, so you can change the type, amount, etc. with more precision."],"color":"#432","bgcolor":"#653"},{"id":1733,"type":"Note","pos":[-819.1915893554688,-1111.3170166015625],"size":[251.92019653320312,88],"flags":{},"order":28,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Try changing the weight or end_at if results look plastic."],"color":"#432","bgcolor":"#653"},{"id":1704,"type":"ApplyPulidFlux","pos":[-805.7684326171875,-986.1819458007812],"size":[219.79336547851562,206],"flags":{},"order":66,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6548},{"name":"pulid_flux","localized_name":"pulid_flux","type":"PULIDFLUX","link":6524},{"name":"eva_clip","localized_name":"eva_clip","type":"EVA_CLIP","link":6525},{"name":"face_analysis","localized_name":"face_analysis","type":"FACEANALYSIS","link":6526},{"name":"image","localized_name":"image","type":"IMAGE","link":null},{"name":"attn_mask","localized_name":"attn_mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","shape":3,"links":[6549],"slot_index":0}],"properties":{"Node name for S&R":"ApplyPulidFlux"},"widgets_values":[1,0,1]},{"id":1737,"type":"Note","pos":[-1184.4395751953125,-1304.4234619140625],"size":[251.92019653320312,88],"flags":{},"order":29,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The image you choose is very important. The face should have its proportions clearly distinguishable."],"color":"#432","bgcolor":"#653"},{"id":1717,"type":"LoadImage","pos":[-603.783203125,-1602.01904296875],"size":[315,314],"flags":{},"order":30,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (812).png","image"]},{"id":1446,"type":"ClownsharKSampler_Beta","pos":[214.812255859375,-508.00537109375],"size":[277.5089111328125,735.1378784179688],"flags":{},"order":67,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":6549},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":6421},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":5373},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6051},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":5493},{"name":"options 2","type":"OPTIONS","link":5776},{"name":"options 3","type":"OPTIONS","link":6402},{"name":"options 4","type":"OPTIONS","link":6589},{"name":"options 5","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6380],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[1,"exponential/res_2s","bong_tangent",30,1,0.65,1,100,"fixed","standard",true],"color":"#332922","bgcolor":"#593930"},{"id":1556,"type":"CLIPTextEncode","pos":[-392.6881408691406,-498.2940979003906],"size":[289.0962829589844,113.79679870605469],"flags":{"collapsed":false},"order":42,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":6103}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[6422],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1707,"type":"LoadImage","pos":[-1272.3699951171875,-406.4196472167969],"size":[315,314],"flags":{},"order":31,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[6619],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6620],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-18464655.700000048.png [input]","image"]},{"id":1740,"type":"Note","pos":[-892.4718627929688,-1299.925048828125],"size":[251.92019653320312,88],"flags":{},"order":32,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["PuLID will copy much of the lighting and especially position/angle of the face. Keep this in mind."],"color":"#432","bgcolor":"#653"},{"id":1690,"type":"ClownsharkChainsampler_Beta","pos":[865.4187622070312,-518.0064086914062],"size":[281.7781677246094,571.74853515625],"flags":{},"order":69,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":6566},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6476},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":6587},{"name":"options 2","type":"OPTIONS","link":6591},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6478],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"multistep/res_3m",-1,1,"resample",false]},{"id":1479,"type":"ClownsharkChainsampler_Beta","pos":[536.1533203125,-510.75872802734375],"size":[288.1370544433594,571.74853515625],"flags":{},"order":68,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":6380},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6415},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":6585},{"name":"options 2","type":"OPTIONS","link":6590},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6566],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"exponential/res_2s",2,1,"resample",true]}],"links":[[2100,731,1,726,2,"FLOAT"],[2101,726,0,727,0,"IMAGE"],[2102,726,0,727,1,"IMAGE"],[2103,728,0,727,2,"IMAGE"],[2104,729,0,727,5,"INT"],[2105,729,1,727,6,"INT"],[2106,726,1,728,0,"MASK"],[2108,729,0,731,0,"*"],[2109,729,1,731,1,"*"],[2153,14,0,745,1,"VAE"],[2198,758,0,759,1,"IMAGE"],[2200,759,0,761,1,"IMAGE"],[2201,745,0,758,0,"IMAGE"],[2204,726,5,758,1,"INT"],[2205,726,6,758,2,"INT"],[2206,726,3,759,3,"INT"],[2207,726,4,759,4,"INT"],[2210,725,0,761,0,"IMAGE"],[2211,725,0,759,0,"IMAGE"],[2241,745,0,744,0,"IMAGE"],[3508,14,0,727,4,"VAE"],[3568,745,0,1022,0,"IMAGE"],[3569,1022,0,1024,0,"IMAGE"],[3570,765,0,1022,1,"IMAGE"],[3605,728,0,1039,1,"IMAGE"],[3606,726,0,1039,0,"IMAGE"],[3607,1039,0,1040,0,"IMAGE"],[3720,1071,0,1072,2,"CLIP_VISION_OUTPUT"],[3721,726,0,1071,1,"IMAGE"],[4157,490,0,1073,0,"CLIP"],[4185,759,0,1162,0,"*"],[4186,1162,0,1161,0,"IMAGE"],[4650,1073,0,1277,1,"CONDITIONING"],[4980,1073,0,1072,0,"CONDITIONING"],[4996,726,0,1369,0,"IMAGE"],[4997,745,0,1368,0,"IMAGE"],[4998,729,0,1369,1,"INT"],[4999,729,1,1369,2,"INT"],[5000,1369,0,1368,1,"IMAGE"],[5021,1407,0,726,1,"MASK"],[5054,725,0,726,0,"IMAGE"],[5373,727,0,1446,3,"LATENT"],[5439,1476,0,1477,0,"MODEL"],[5440,1476,1,1477,1,"CLIP"],[5442,727,3,1478,1,"LATENT"],[5447,1476,2,14,0,"*"],[5493,1277,0,1446,6,"OPTIONS"],[5529,726,1,765,0,"MASK"],[5653,1072,0,1277,0,"CONDITIONING"],[5715,727,0,1525,0,"LATENT"],[5776,1516,0,1446,7,"OPTIONS"],[6051,1525,0,1446,5,"GUIDES"],[6103,490,0,1556,0,"CLIP"],[6201,727,0,1569,0,"LATENT"],[6202,727,0,1569,1,"LATENT"],[6222,727,2,1571,0,"*"],[6223,1571,0,1569,2,"MASK"],[6224,1571,0,1569,3,"MASK"],[6225,1571,0,1569,4,"MASK"],[6226,1571,0,1569,5,"MASK"],[6227,1571,0,1569,6,"MASK"],[6228,1571,0,1569,7,"MASK"],[6229,727,0,1572,0,"LATENT"],[6230,727,0,1572,1,"LATENT"],[6239,1573,0,1569,20,"FLOAT"],[6240,1574,0,1569,21,"FLOAT"],[6241,1575,0,1569,22,"FLOAT"],[6342,1571,0,1539,0,"MASK"],[6343,1539,0,1572,2,"MASK"],[6344,1539,0,1572,3,"MASK"],[6345,1539,0,1572,4,"MASK"],[6346,1539,0,1572,5,"MASK"],[6347,1539,0,1572,6,"MASK"],[6348,1539,0,1572,7,"MASK"],[6360,727,2,1667,0,"MASK"],[6361,1667,0,1516,0,"MASK"],[6380,1446,0,1479,4,"LATENT"],[6383,1478,0,1524,0,"MODEL"],[6396,1279,0,1478,0,"MODEL"],[6397,1477,0,1279,0,"MODEL"],[6398,1477,1,490,0,"*"],[6402,1454,0,1446,8,"OPTIONS"],[6411,1569,0,1525,3,"GUIDES"],[6412,727,0,1672,0,"LATENT"],[6414,1572,0,1672,3,"GUIDES"],[6415,1672,0,1479,5,"GUIDES"],[6421,1548,0,1446,1,"CONDITIONING"],[6422,1556,0,1548,0,"CONDITIONING"],[6447,726,2,759,2,"MASK"],[6476,1672,0,1690,5,"GUIDES"],[6478,1690,0,745,0,"LATENT"],[6524,1703,0,1704,1,"PULIDFLUX"],[6525,1701,0,1704,2,"EVA_CLIP"],[6526,1702,0,1704,3,"FACEANALYSIS"],[6543,726,0,1548,3,"IMAGE"],[6547,1524,0,13,0,"*"],[6548,13,0,1704,0,"MODEL"],[6549,1704,0,1446,0,"MODEL"],[6550,1476,3,1548,2,"CLIP_VISION"],[6551,1476,4,1548,1,"STYLE_MODEL"],[6552,1476,3,1071,0,"CLIP_VISION"],[6553,1476,4,1072,1,"STYLE_MODEL"],[6566,1479,0,1690,4,"LATENT"],[6569,727,2,1525,1,"MASK"],[6570,727,2,1672,1,"MASK"],[6584,1571,0,1730,0,"MASK"],[6585,1731,0,1479,6,"OPTIONS"],[6586,1730,0,1731,0,"MASK"],[6587,1731,0,1690,6,"OPTIONS"],[6589,1722,0,1446,9,"OPTIONS"],[6590,1722,0,1479,7,"OPTIONS"],[6591,1722,0,1690,7,"OPTIONS"],[6619,1707,0,725,0,"*"],[6620,1707,1,1407,0,"*"]],"groups":[{"id":1,"title":"Prepare Input","bounding":[-1310.92529296875,-489.52618408203125,755.7755737304688,762.867431640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Patch and Stitch","bounding":[1250.695068359375,-877.5091552734375,1320.4892578125,1148.6859130859375],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Loaders","bounding":[-2115.099853515625,-1180.8953857421875,881.3677368164062,646.2952880859375],"color":"#3f789e","font_size":24,"flags":{}},{"id":5,"title":"Sampling","bounding":[-510.548828125,-602.9613037109375,1686.064208984375,874.1248168945312],"color":"#3f789e","font_size":24,"flags":{}},{"id":6,"title":"Guides","bounding":[-37.0714225769043,-1229.123046875,888.9586791992188,587.7683715820312],"color":"#3f789e","font_size":24,"flags":{}},{"id":7,"title":"PuLID","bounding":[-1191.9031982421875,-1177.2020263671875,649.8841552734375,641.718994140625],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.3310000000000006,"offset":[4741.826990245036,1361.8744550803772]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux faceswap sync.json ================================================ {"last_node_id":1698,"last_link_id":6519,"nodes":[{"id":490,"type":"Reroute","pos":[-669.7835083007812,-822.2691040039062],"size":[75,26],"flags":{},"order":28,"mode":0,"inputs":[{"name":"","type":"*","link":6398}],"outputs":[{"name":"","type":"CLIP","links":[4157,6103],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1162,"type":"Reroute","pos":[1930.0975341796875,-817.45556640625],"size":[75,26],"flags":{},"order":66,"mode":0,"inputs":[{"name":"","type":"*","link":4185}],"outputs":[{"name":"","type":"IMAGE","links":[4186],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":744,"type":"SaveImage","pos":[1276.456787109375,-719.9273681640625],"size":[424.53594970703125,455.0760192871094],"flags":{},"order":60,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2241}],"outputs":[],"title":"Save Patch","properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"],"color":"#332922","bgcolor":"#593930"},{"id":1022,"type":"ImageBlend","pos":[2313.7607421875,-792.44091796875],"size":[210,102],"flags":{"collapsed":true},"order":61,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3568},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3570}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3569],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":729,"type":"SetImageSize","pos":[-812.6932373046875,-86.24114227294922],"size":[210,102],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[2104,2108,4998],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[2105,2109,4999],"slot_index":1}],"title":"Inpaint Tile Size","properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1024,1024]},{"id":1161,"type":"Image Save","pos":[2186.75634765625,-722.2388916015625],"size":[351.4677734375,796.8805541992188],"flags":{},"order":67,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":4186}],"outputs":[{"name":"images","localized_name":"images","type":"IMAGE","links":null},{"name":"files","localized_name":"files","type":"STRING","links":null}],"properties":{"Node name for S&R":"Image Save"},"widgets_values":["[time(%Y-%m-%d)]","ComfyUI","_",4,"false","jpeg",300,100,"true","false","false","false","true","true","true"],"color":"#232","bgcolor":"#353"},{"id":1024,"type":"PreviewImage","pos":[1286.05859375,-198.6599884033203],"size":[413.7582092285156,445.8081359863281],"flags":{},"order":64,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3569}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[],"color":"#332922","bgcolor":"#593930"},{"id":758,"type":"ImageResize+","pos":[1468.4384765625,-790.391845703125],"size":[210,218],"flags":{"collapsed":true},"order":59,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":2201},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":2204},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":2205}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2198],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":1369,"type":"ImageResize+","pos":[2183.37109375,151.09762573242188],"size":[210,218],"flags":{"collapsed":true},"order":33,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":4996},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":4998},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":4999}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[5000],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":1407,"type":"Reroute","pos":[-914.50390625,-361.0196533203125],"size":[75,26],"flags":{},"order":26,"mode":0,"inputs":[{"name":"","type":"*","link":6519}],"outputs":[{"name":"","type":"MASK","links":[5021],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":725,"type":"Reroute","pos":[-914.8554077148438,-440.6482238769531],"size":[75,26],"flags":{},"order":25,"mode":0,"inputs":[{"name":"","type":"*","link":6518}],"outputs":[{"name":"","type":"IMAGE","links":[2210,2211,5054],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1071,"type":"CLIPVisionEncode","pos":[586.1533203125,119.24115753173828],"size":[253.60000610351562,78],"flags":{"collapsed":true},"order":32,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":5443},{"name":"image","localized_name":"image","type":"IMAGE","link":3721}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[3720],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":1575,"type":"PrimitiveFloat","pos":[11.355203628540039,-940.5784912109375],"size":[210,58],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6241],"slot_index":0}],"title":"Similarity","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[1]},{"id":1654,"type":"LoadImage","pos":[773.8897705078125,1813.0185546875],"size":[315,314],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["7c2a2a772675a224-photo.JPG","image"]},{"id":1478,"type":"ModelSamplingAdvancedResolution","pos":[-1096.887451171875,-1029.6195068359375],"size":[260.3999938964844,126],"flags":{},"order":43,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6396},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":5442}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[6383],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":1279,"type":"TorchCompileModels","pos":[-1409.527587890625,-1089.560302734375],"size":[285.9945068359375,179.0001983642578],"flags":{},"order":27,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6397}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[6396],"slot_index":0}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":14,"type":"Reroute","pos":[-669.7835083007812,-782.2691040039062],"size":[75,26],"flags":{},"order":24,"mode":0,"inputs":[{"name":"","type":"*","link":5447}],"outputs":[{"name":"","type":"VAE","links":[2153,3508,6353],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":13,"type":"Reroute","pos":[-669.7835083007812,-862.2692260742188],"size":[75,26],"flags":{},"order":51,"mode":0,"inputs":[{"name":"","type":"*","link":5845}],"outputs":[{"name":"","type":"MODEL","links":[5846],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1516,"type":"ClownOptions_SDE_Mask_Beta","pos":[-68.4439468383789,-163.1180877685547],"size":[252.8383331298828,126],"flags":{},"order":47,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6361},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[5776,6016,6477],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Mask_Beta"},"widgets_values":[1,0,false]},{"id":1667,"type":"GrowMask","pos":[-302.060302734375,-164.22067260742188],"size":[210,82],"flags":{},"order":42,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":6360}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6361],"slot_index":0}],"properties":{"Node name for S&R":"GrowMask"},"widgets_values":[-10,false]},{"id":1039,"type":"ImageBlend","pos":[-769.9498901367188,220.86917114257812],"size":[210,102],"flags":{"collapsed":true},"order":39,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3606},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3605}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3607],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":727,"type":"VAEEncodeAdvanced","pos":[-789.0958862304688,67.53204345703125],"size":[262.4812927246094,298],"flags":{"collapsed":true},"order":38,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2101},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":2102},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":2103},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":3508},{"name":"width","type":"INT","pos":[10,160],"widget":{"name":"width"},"link":2104},{"name":"height","type":"INT","pos":[10,184],"widget":{"name":"height"},"link":2105}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[5373,5715,6201,6202,6229,6230,6412],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[6222,6360],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[5442],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[]}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":731,"type":"SimpleMath+","pos":[-776.4415893554688,126.82145690917969],"size":[315,98],"flags":{"collapsed":true},"order":22,"mode":0,"inputs":[{"name":"a","localized_name":"a","type":"*","shape":7,"link":2108},{"name":"b","localized_name":"b","type":"*","shape":7,"link":2109},{"name":"c","localized_name":"c","type":"*","shape":7,"link":null}],"outputs":[{"name":"INT","localized_name":"INT","type":"INT","links":null},{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[2100],"slot_index":1}],"properties":{"Node name for S&R":"SimpleMath+"},"widgets_values":["a/b"]},{"id":728,"type":"MaskToImage","pos":[-791.0198364257812,176.82147216796875],"size":[176.39999389648438,26],"flags":{"collapsed":true},"order":34,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2106}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2103,3605],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":765,"type":"MaskToImage","pos":[2080.868896484375,-792.6943359375],"size":[182.28543090820312,26],"flags":{"collapsed":true},"order":35,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":5529}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3570],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":761,"type":"Image Comparer (rgthree)","pos":[1747.432373046875,-712.1251220703125],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":65,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2210},{"name":"image_b","type":"IMAGE","dir":3,"link":2200}],"outputs":[],"title":"Compare Output","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_udooi_00119_.png&type=temp&subfolder=&rand=0.4602348825653009"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_udooi_00120_.png&type=temp&subfolder=&rand=0.24695456359911838"}]],"color":"#232","bgcolor":"#353"},{"id":1072,"type":"StyleModelApply","pos":[591.9240112304688,151.93089294433594],"size":[262,122],"flags":{"collapsed":true},"order":37,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":4980},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":5444},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":3720}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[5653],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":1073,"type":"CLIPTextEncode","pos":[575.77001953125,186.9269256591797],"size":[263.280517578125,88.73566436767578],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":4157}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[4650,4980],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":1569,"type":"ClownGuides_Sync_Advanced","pos":[261.355224609375,-1000.5784912109375],"size":[315,1938],"flags":{"collapsed":true},"order":45,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":6201},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":6202},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6223},{"name":"mask_sync","localized_name":"mask_sync","type":"MASK","shape":7,"link":6224},{"name":"mask_drift_x","localized_name":"mask_drift_x","type":"MASK","shape":7,"link":6225},{"name":"mask_drift_y","localized_name":"mask_drift_y","type":"MASK","shape":7,"link":6226},{"name":"mask_lure_x","localized_name":"mask_lure_x","type":"MASK","shape":7,"link":6227},{"name":"mask_lure_y","localized_name":"mask_lure_y","type":"MASK","shape":7,"link":6228},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_masked","localized_name":"syncs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_unmasked","localized_name":"syncs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_masked","localized_name":"drift_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_unmasked","localized_name":"drift_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_masked","localized_name":"drift_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_unmasked","localized_name":"drift_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_masked","localized_name":"lure_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_unmasked","localized_name":"lure_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_masked","localized_name":"lure_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_unmasked","localized_name":"lure_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_x_data","type":"FLOAT","pos":[10,800],"widget":{"name":"drift_x_data"},"link":6239},{"name":"drift_y_guide","type":"FLOAT","pos":[10,1088],"widget":{"name":"drift_y_guide"},"link":6240},{"name":"sync_masked","type":"FLOAT","pos":[10,608],"widget":{"name":"sync_masked"},"link":6241}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6411],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Sync_Advanced"},"widgets_values":[1,1,"constant","constant",0,0,-1,-1,0,1,"constant","constant",0,0,-1,-1,0.2,0,1,0,"constant","constant",0,0,-1,-1,0,0,0.2,1,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,"y -> x",false,false,false,false,false,false]},{"id":1571,"type":"Reroute","pos":[141.35520935058594,-1030.5784912109375],"size":[75,26],"flags":{},"order":41,"mode":0,"inputs":[{"name":"","type":"*","link":6222}],"outputs":[{"name":"","type":"MASK","links":[6223,6224,6225,6226,6227,6228,6342],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1664,"type":"VAEDecode","pos":[1440,-1320],"size":[140,46],"flags":{"collapsed":true},"order":55,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":6354},{"name":"vae","localized_name":"vae","type":"VAE","link":6353}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[6355],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1368,"type":"Image Comparer (rgthree)","pos":[1744.9150390625,-199.16920471191406],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":62,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":4997},{"name":"image_b","type":"IMAGE","dir":3,"link":5000}],"outputs":[],"title":"Compare Patch","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_sgbfj_00119_.png&type=temp&subfolder=&rand=0.4913573783056806"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_sgbfj_00120_.png&type=temp&subfolder=&rand=0.2366457814945162"}]],"color":"#232","bgcolor":"#353"},{"id":1665,"type":"PreviewImage","pos":[1430,-1270],"size":[343.7617492675781,360.52777099609375],"flags":{},"order":57,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":6355}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1673,"type":"Note","pos":[1824.9287109375,-1010.687744140625],"size":[322.34954833984375,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Preview of first stage output: sometimes it can be worth manually (or automatically, using DINO, etc.) adjusting your mask for the second stage, based on this output."],"color":"#432","bgcolor":"#653"},{"id":1539,"type":"GrowMask","pos":[573.4215698242188,-1145.86767578125],"size":[214.5684051513672,82],"flags":{},"order":46,"mode":4,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":6342}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6343,6344,6345,6346,6347,6348],"slot_index":0}],"properties":{"Node name for S&R":"GrowMask"},"widgets_values":[10,false]},{"id":1383,"type":"Note","pos":[216.7359161376953,340.25775146484375],"size":[291.67218017578125,232.2296142578125],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["eta > 0.0 means you are using SDE/ancestral sampling. With this guide mode you will generally want to use bongmath = true.\n\nSamplers such as res_2s and res_3s will be very accurate. Try res_5s and res_8s if you really want to go crazy with it. They run 2x (2s), 3x (3s), etc slower than Euler.\n\nres_2m and 3m will be fast and also good, and run at the same speed as Euler.\n\neta_substep will increase the power of bongmath. If it is set to 0.0, you can turn bongmath off without any effect."],"color":"#432","bgcolor":"#653"},{"id":1380,"type":"Note","pos":[544.9375610351562,342.0576477050781],"size":[290.1026611328125,231.5842742919922],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Setting denoise to a negative value is equivalent to just scaling it. For example:\n\nDenoise = -0.90 is the same as multiplying every sigma value in the entire schedule by 0.9.\n\nI find this is a lot easier to control than the regular denoise scale. The difference between -0.95 and -0.9 is much more predictable than with 0.95 and 0.9. Most of us have seen how different denoise 0.8 might be with Karras vs. exponential. \n\nTry a denoise between -0.95 and -0.85. "],"color":"#432","bgcolor":"#653"},{"id":759,"type":"ImageCompositeMasked","pos":[1697.19140625,-790.8740844726562],"size":[210,186],"flags":{"collapsed":true},"order":63,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"IMAGE","link":2211},{"name":"source","localized_name":"source","type":"IMAGE","link":2198},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6447},{"name":"x","type":"INT","pos":[10,76],"widget":{"name":"x"},"link":2206},{"name":"y","type":"INT","pos":[10,100],"widget":{"name":"y"},"link":2207}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2200,4185],"slot_index":0}],"properties":{"Node name for S&R":"ImageCompositeMasked"},"widgets_values":[712,800,false]},{"id":1552,"type":"ClownOptions_SDE_Beta","pos":[-275.5662841796875,211.60325622558594],"size":[315,266],"flags":{"collapsed":true},"order":6,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",1,1,-1,"fixed"]},{"id":1619,"type":"LoadImage","pos":[79.17283630371094,1820.8131103515625],"size":[315,314],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["9319202660b0e794-photo.JPG","image"]},{"id":1476,"type":"FluxLoader","pos":[-1417.3287353515625,-846.1827392578125],"size":[385.17449951171875,282],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[5439],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[5440],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[5447],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[5443,5993],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[5444,5994],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["flux1-dev.sft","fp8_e4m3fn_fast","clip_l_flux.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","ae.sft","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":1687,"type":"Note","pos":[-101.33948516845703,339.7750244140625],"size":[286.97723388671875,180.28128051757812],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The cycles node causes the connected sampler to loop between sampling and unsampling steps. (Unsampling is running the sampler backwards, where it predicts the noise that would lead to a given output).\n\nWhen unsample_eta is set to -1, it simply uses the same settings for eta as in the connected node. "],"color":"#432","bgcolor":"#653"},{"id":745,"type":"VAEDecode","pos":[1297.53369140625,-791.137939453125],"size":[140,46],"flags":{"collapsed":true},"order":58,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":6478},{"name":"vae","localized_name":"vae","type":"VAE","link":2153}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2201,2241,3568,4997],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1689,"type":"Note","pos":[525.9268798828125,-1349.89794921875],"size":[263.00439453125,88],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Expanding the mask for the second pass can sometimes help prevent seams."],"color":"#432","bgcolor":"#653"},{"id":1688,"type":"Note","pos":[-838.7593994140625,-1316.05126953125],"size":[274.47601318359375,104.34856414794922],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ReFluxPatcher is required to use the \"Style\" nodes. Different \"Re...Patcher\" nodes are available for many other models, from SD1.5/SDXL to SD3.5, HiDream, AuraFlow, Chroma, WAN, and LTXV."],"color":"#432","bgcolor":"#653"},{"id":1678,"type":"Note","pos":[-422.92510986328125,-333.6911926269531],"size":[324.0018005371094,113.63665771484375],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ReduxAdvanced is used to help get things on track. Bypass if you're having problems with it disrupting character likeness.\n\nThe SDE Mask ensures SDE noise is used only in the masked area, limiting change in unmasked areas that could lead to seams. "],"color":"#432","bgcolor":"#653"},{"id":1572,"type":"ClownGuides_Sync_Advanced","pos":[581.355224609375,-1000.5784912109375],"size":[315,1878],"flags":{"collapsed":true},"order":50,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":6229},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":6230},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":6343},{"name":"mask_sync","localized_name":"mask_sync","type":"MASK","shape":7,"link":6344},{"name":"mask_drift_x","localized_name":"mask_drift_x","type":"MASK","shape":7,"link":6345},{"name":"mask_drift_y","localized_name":"mask_drift_y","type":"MASK","shape":7,"link":6346},{"name":"mask_lure_x","localized_name":"mask_lure_x","type":"MASK","shape":7,"link":6347},{"name":"mask_lure_y","localized_name":"mask_lure_y","type":"MASK","shape":7,"link":6348},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_masked","localized_name":"syncs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"syncs_unmasked","localized_name":"syncs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_masked","localized_name":"drift_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_xs_unmasked","localized_name":"drift_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_masked","localized_name":"drift_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"drift_ys_unmasked","localized_name":"drift_ys_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_masked","localized_name":"lure_xs_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_xs_unmasked","localized_name":"lure_xs_unmasked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_masked","localized_name":"lure_ys_masked","type":"SIGMAS","shape":7,"link":null},{"name":"lure_ys_unmasked","localized_name":"lure_ys_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6414],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Sync_Advanced"},"widgets_values":[0,1,"constant","constant",0,0,-1,-1,0,1,"constant","constant",0,0,-1,-1,0,0,1,0,"constant","constant",0,0,-1,-1,0,0,0,1,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,0,"constant","constant",0,0,-1,-1,0,"y -> x",false,false,false,false,false,false]},{"id":1674,"type":"Note","pos":[170.8737030029297,-1390.4803466796875],"size":[322.6287841796875,128.15802001953125],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Activate the style nodes if you are having issues with color, detail, light, blurriness or pixelation drifting too far from your source input.\n\nIf end_step is too high, you may get faint halos and an oversharpened look."],"color":"#432","bgcolor":"#653"},{"id":1524,"type":"ReFluxPatcher","pos":[-809.3073120117188,-985.41064453125],"size":[210,82],"flags":{},"order":48,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":6383}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[5845],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":1690,"type":"ClownsharkChainsampler_Beta","pos":[865.4187622070312,-518.0064086914062],"size":[281.7781677246094,571.74853515625],"flags":{},"order":56,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":6479},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6476},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":6477},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6478],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"multistep/res_3m",-1,1,"resample",false]},{"id":1479,"type":"ClownsharkChainsampler_Beta","pos":[536.1533203125,-510.75872802734375],"size":[288.1370544433594,571.74853515625],"flags":{},"order":54,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":6380},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6415},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":6016},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6479],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"exponential/res_2s",2,1,"resample",true]},{"id":1693,"type":"Note","pos":[-858.5514526367188,-640.8011474609375],"size":[276.7918701171875,88],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Padding can be very important. Some models/loras/IPadapter embeds etc. are going to respond very differently if the shot is close up vs. farther away."],"color":"#432","bgcolor":"#653"},{"id":1525,"type":"ClownGuide_Style_Beta","pos":[251.35520935058594,-950.5784912109375],"size":[252.0535430908203,286],"flags":{},"order":49,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":5715},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6411}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6051],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":1672,"type":"ClownGuide_Style_Beta","pos":[561.355224609375,-950.5784912109375],"size":[252.0535430908203,286],"flags":{},"order":52,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":6412},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6414}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[6415,6476],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,5,false]},{"id":726,"type":"Mask Bounding Box Aspect Ratio","pos":[-828.6614990234375,-412.50946044921875],"size":[252,250],"flags":{"collapsed":false},"order":29,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","shape":7,"link":5054},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":5021},{"name":"aspect_ratio","type":"FLOAT","pos":[10,204],"widget":{"name":"aspect_ratio"},"link":2100}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2101,2102,3606,3721,4996,5995],"slot_index":0},{"name":"mask","localized_name":"mask","type":"MASK","links":[2106,5529],"slot_index":1},{"name":"mask_blurred","localized_name":"mask_blurred","type":"MASK","links":[6447],"slot_index":2},{"name":"x","localized_name":"x","type":"INT","links":[2206],"slot_index":3},{"name":"y","localized_name":"y","type":"INT","links":[2207],"slot_index":4},{"name":"width","localized_name":"width","type":"INT","links":[2204],"slot_index":5},{"name":"height","localized_name":"height","type":"INT","links":[2205],"slot_index":6}],"properties":{"Node name for S&R":"Mask Bounding Box Aspect Ratio"},"widgets_values":[100,40,1.75,false]},{"id":1677,"type":"Note","pos":[-439.5185241699219,-738.3756713867188],"size":[290.3874816894531,88],"flags":{},"order":15,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Try setting both drift values to 0.0 or 0.2 as a starting point.\n"],"color":"#432","bgcolor":"#653"},{"id":1694,"type":"Note","pos":[-441.5133056640625,-999.14990234375],"size":[291.2616882324219,189.98562622070312],"flags":{},"order":16,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase character likeness by: \n\nDecreasing \"Similarity\"\nIncreasing \"Drift Toward Target\"\nIncreasing cycles\nIncreasing eta (max 1.0)\nIncreasing denoise\n\nIncrease adherence to the input image by:\n\nDoing the opposite of any of the above\nIncreasing \"Drift Toward Guide\"\nEnabling the ReduxAdvanced node\n"],"color":"#432","bgcolor":"#653"},{"id":1277,"type":"SharkOptions_GuideCond_Beta","pos":[575.9444580078125,221.88970947265625],"size":[315,98],"flags":{"collapsed":true},"order":40,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":5653},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":4650},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[5493],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[1]},{"id":1548,"type":"ReduxAdvanced","pos":[-69.81456756591797,-498.3502502441406],"size":[248.6250457763672,234],"flags":{},"order":36,"mode":4,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":6422},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":5994},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":5993},{"name":"image","localized_name":"image","type":"IMAGE","link":5995},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[6421],"slot_index":0},{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"ReduxAdvanced"},"widgets_values":[3,"area","center crop (square)",1,0.1]},{"id":1446,"type":"ClownsharKSampler_Beta","pos":[214.812255859375,-508.00537109375],"size":[277.5089111328125,735.1378784179688],"flags":{},"order":53,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":5846},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":6421},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":5373},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":6051},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":5493},{"name":"options 2","type":"OPTIONS","link":5776},{"name":"options 3","type":"OPTIONS","link":6402},{"name":"options 4","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[6380],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[6354],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[1,"exponential/res_2s","bong_tangent",30,1,0.55,1,100,"fixed","standard",true],"color":"#332922","bgcolor":"#593930"},{"id":1454,"type":"ClownOptions_Cycles_Beta","pos":[-74.8967514038086,24.043270111083984],"size":[261.7955627441406,202],"flags":{},"order":17,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[6402],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[20,1,-1,"none",-1,1,true]},{"id":1573,"type":"PrimitiveFloat","pos":[10.393571853637695,-834.4251708984375],"size":[210,58],"flags":{},"order":18,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6239],"slot_index":0}],"title":"Drift Toward Target","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[0.2]},{"id":1574,"type":"PrimitiveFloat","pos":[11.355203628540039,-720.5784912109375],"size":[210,58],"flags":{},"order":19,"mode":0,"inputs":[],"outputs":[{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[6240],"slot_index":0}],"title":"Drift Toward Guide","properties":{"Node name for S&R":"PrimitiveFloat"},"widgets_values":[0.2]},{"id":1477,"type":"LoraLoader","pos":[-1007.4993896484375,-844.936279296875],"size":[315,126],"flags":{},"order":23,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":5439},{"name":"clip","localized_name":"clip","type":"CLIP","link":5440}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[6397],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[6398],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["FLUX/Kirsten_Dunst_Flux_V1.safetensors",1,1]},{"id":1556,"type":"CLIPTextEncode","pos":[-392.6881408691406,-498.2940979003906],"size":[289.0962829589844,113.79679870605469],"flags":{"collapsed":false},"order":31,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":6103}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[6422],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["kirsten dunst"],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1451,"type":"LoadImage","pos":[-1267.7357177734375,-412.5631103515625],"size":[315,314],"flags":{},"order":20,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[6518],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[6519],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-54212258.30000001.png [input]","image"]},{"id":1040,"type":"PreviewImage","pos":[-1267.6248779296875,-30.252229690551758],"size":[304.98114013671875,265.58380126953125],"flags":{},"order":44,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3607}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1698,"type":"Note","pos":[-1623.859375,-355.951416015625],"size":[276.7918701171875,88],"flags":{},"order":21,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Draw a mask over the face in the Load Image node. Ideally, try stopping precisely at the hairline, and just above or just below the chin."],"color":"#432","bgcolor":"#653"}],"links":[[2100,731,1,726,2,"FLOAT"],[2101,726,0,727,0,"IMAGE"],[2102,726,0,727,1,"IMAGE"],[2103,728,0,727,2,"IMAGE"],[2104,729,0,727,5,"INT"],[2105,729,1,727,6,"INT"],[2106,726,1,728,0,"MASK"],[2108,729,0,731,0,"*"],[2109,729,1,731,1,"*"],[2153,14,0,745,1,"VAE"],[2198,758,0,759,1,"IMAGE"],[2200,759,0,761,1,"IMAGE"],[2201,745,0,758,0,"IMAGE"],[2204,726,5,758,1,"INT"],[2205,726,6,758,2,"INT"],[2206,726,3,759,3,"INT"],[2207,726,4,759,4,"INT"],[2210,725,0,761,0,"IMAGE"],[2211,725,0,759,0,"IMAGE"],[2241,745,0,744,0,"IMAGE"],[3508,14,0,727,4,"VAE"],[3568,745,0,1022,0,"IMAGE"],[3569,1022,0,1024,0,"IMAGE"],[3570,765,0,1022,1,"IMAGE"],[3605,728,0,1039,1,"IMAGE"],[3606,726,0,1039,0,"IMAGE"],[3607,1039,0,1040,0,"IMAGE"],[3720,1071,0,1072,2,"CLIP_VISION_OUTPUT"],[3721,726,0,1071,1,"IMAGE"],[4157,490,0,1073,0,"CLIP"],[4185,759,0,1162,0,"*"],[4186,1162,0,1161,0,"IMAGE"],[4650,1073,0,1277,1,"CONDITIONING"],[4980,1073,0,1072,0,"CONDITIONING"],[4996,726,0,1369,0,"IMAGE"],[4997,745,0,1368,0,"IMAGE"],[4998,729,0,1369,1,"INT"],[4999,729,1,1369,2,"INT"],[5000,1369,0,1368,1,"IMAGE"],[5021,1407,0,726,1,"MASK"],[5054,725,0,726,0,"IMAGE"],[5373,727,0,1446,3,"LATENT"],[5439,1476,0,1477,0,"MODEL"],[5440,1476,1,1477,1,"CLIP"],[5442,727,3,1478,1,"LATENT"],[5443,1476,3,1071,0,"CLIP_VISION"],[5444,1476,4,1072,1,"STYLE_MODEL"],[5447,1476,2,14,0,"*"],[5493,1277,0,1446,6,"OPTIONS"],[5529,726,1,765,0,"MASK"],[5653,1072,0,1277,0,"CONDITIONING"],[5715,727,0,1525,0,"LATENT"],[5776,1516,0,1446,7,"OPTIONS"],[5845,1524,0,13,0,"*"],[5846,13,0,1446,0,"MODEL"],[5993,1476,3,1548,2,"CLIP_VISION"],[5994,1476,4,1548,1,"STYLE_MODEL"],[5995,726,0,1548,3,"IMAGE"],[6016,1516,0,1479,6,"OPTIONS"],[6051,1525,0,1446,5,"GUIDES"],[6103,490,0,1556,0,"CLIP"],[6201,727,0,1569,0,"LATENT"],[6202,727,0,1569,1,"LATENT"],[6222,727,2,1571,0,"*"],[6223,1571,0,1569,2,"MASK"],[6224,1571,0,1569,3,"MASK"],[6225,1571,0,1569,4,"MASK"],[6226,1571,0,1569,5,"MASK"],[6227,1571,0,1569,6,"MASK"],[6228,1571,0,1569,7,"MASK"],[6229,727,0,1572,0,"LATENT"],[6230,727,0,1572,1,"LATENT"],[6239,1573,0,1569,20,"FLOAT"],[6240,1574,0,1569,21,"FLOAT"],[6241,1575,0,1569,22,"FLOAT"],[6342,1571,0,1539,0,"MASK"],[6343,1539,0,1572,2,"MASK"],[6344,1539,0,1572,3,"MASK"],[6345,1539,0,1572,4,"MASK"],[6346,1539,0,1572,5,"MASK"],[6347,1539,0,1572,6,"MASK"],[6348,1539,0,1572,7,"MASK"],[6353,14,0,1664,1,"VAE"],[6354,1446,1,1664,0,"LATENT"],[6355,1664,0,1665,0,"IMAGE"],[6360,727,2,1667,0,"MASK"],[6361,1667,0,1516,0,"MASK"],[6380,1446,0,1479,4,"LATENT"],[6383,1478,0,1524,0,"MODEL"],[6396,1279,0,1478,0,"MODEL"],[6397,1477,0,1279,0,"MODEL"],[6398,1477,1,490,0,"*"],[6402,1454,0,1446,8,"OPTIONS"],[6411,1569,0,1525,3,"GUIDES"],[6412,727,0,1672,0,"LATENT"],[6414,1572,0,1672,3,"GUIDES"],[6415,1672,0,1479,5,"GUIDES"],[6421,1548,0,1446,1,"CONDITIONING"],[6422,1556,0,1548,0,"CONDITIONING"],[6447,726,2,759,2,"MASK"],[6476,1672,0,1690,5,"GUIDES"],[6477,1516,0,1690,6,"OPTIONS"],[6478,1690,0,745,0,"LATENT"],[6479,1479,0,1690,4,"LATENT"],[6518,1451,0,725,0,"*"],[6519,1451,1,1407,0,"*"]],"groups":[{"id":1,"title":"Prepare Input","bounding":[-1310.92529296875,-489.52618408203125,755.7755737304688,762.867431640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Patch and Stitch","bounding":[1250.695068359375,-877.5091552734375,1320.4892578125,1148.6859130859375],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Loaders","bounding":[-1438.07421875,-1179.8375244140625,881.3677368164062,646.2952880859375],"color":"#3f789e","font_size":24,"flags":{}},{"id":5,"title":"Sampling","bounding":[-510.548828125,-602.9613037109375,1686.064208984375,874.1248168945312],"color":"#3f789e","font_size":24,"flags":{}},{"id":6,"title":"Guides","bounding":[-37.0714225769043,-1229.123046875,888.9586791992188,587.7683715820312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.2100000000000002,"offset":[4241.572246240033,1450.4856076460571]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux faceswap.json ================================================ {"last_node_id":1153,"last_link_id":4163,"nodes":[{"id":758,"type":"ImageResize+","pos":[1987.2191162109375,-351.3092041015625],"size":[210,218],"flags":{"collapsed":true},"order":31,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":2201},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":2204},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":2205}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2198],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":490,"type":"Reroute","pos":[-693.37158203125,-93.71382904052734],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":4149}],"outputs":[{"name":"","type":"CLIP","links":[4157],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":728,"type":"MaskToImage","pos":[219.2652130126953,854.9601440429688],"size":[176.39999389648438,26],"flags":{"collapsed":true},"order":14,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2106}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2103,3605],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":765,"type":"MaskToImage","pos":[2707.509765625,226.7833709716797],"size":[182.28543090820312,26],"flags":{"collapsed":true},"order":19,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2233}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3570],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":1024,"type":"PreviewImage","pos":[2707.52197265625,-277.8296203613281],"size":[413.7582092285156,445.8081359863281],"flags":{},"order":36,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3569}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[],"color":"#332922","bgcolor":"#593930"},{"id":744,"type":"SaveImage","pos":[1807.2188720703125,-291.30926513671875],"size":[424.53594970703125,455.0760192871094],"flags":{},"order":33,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2241}],"outputs":[],"title":"Save Patch","properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"],"color":"#332922","bgcolor":"#593930"},{"id":1040,"type":"PreviewImage","pos":[-195.9951934814453,694.224609375],"size":[304.98114013671875,265.58380126953125],"flags":{},"order":23,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3607}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":731,"type":"SimpleMath+","pos":[219.2652130126953,804.9601440429688],"size":[315,98],"flags":{"collapsed":true},"order":5,"mode":0,"inputs":[{"name":"a","localized_name":"a","type":"*","shape":7,"link":2108},{"name":"b","localized_name":"b","type":"*","shape":7,"link":2109},{"name":"c","localized_name":"c","type":"*","shape":7,"link":null}],"outputs":[{"name":"INT","localized_name":"INT","type":"INT","links":null},{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[2100],"slot_index":1}],"properties":{"Node name for S&R":"SimpleMath+"},"widgets_values":["a/b"]},{"id":14,"type":"Reroute","pos":[-693.37158203125,-53.713836669921875],"size":[75,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"","type":"*","link":4146}],"outputs":[{"name":"","type":"VAE","links":[2153,3508],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1039,"type":"ImageBlend","pos":[219.2652130126953,954.9601440429688],"size":[210,102],"flags":{"collapsed":true},"order":17,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3606},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3605}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3607],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":1022,"type":"ImageBlend","pos":[2710.7275390625,275.91143798828125],"size":[210,102],"flags":{"collapsed":true},"order":34,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3568},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3570}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3569],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"multiply"]},{"id":726,"type":"Mask Bounding Box Aspect Ratio","pos":[216.9475860595703,323.4888610839844],"size":[252,250],"flags":{"collapsed":false},"order":11,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","shape":7,"link":2338},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":4158},{"name":"aspect_ratio","type":"FLOAT","pos":[10,204],"widget":{"name":"aspect_ratio"},"link":2100}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2101,2102,2209,3606,3721],"slot_index":0},{"name":"mask","localized_name":"mask","type":"MASK","links":[2106],"slot_index":1},{"name":"mask_blurred","localized_name":"mask_blurred","type":"MASK","links":[3884],"slot_index":2},{"name":"x","localized_name":"x","type":"INT","links":[2206],"slot_index":3},{"name":"y","localized_name":"y","type":"INT","links":[2207],"slot_index":4},{"name":"width","localized_name":"width","type":"INT","links":[2204],"slot_index":5},{"name":"height","localized_name":"height","type":"INT","links":[2205],"slot_index":6}],"properties":{"Node name for S&R":"Mask Bounding Box Aspect Ratio"},"widgets_values":[100,40,1.75,false]},{"id":760,"type":"SaveImage","pos":[1807.2188720703125,218.6908721923828],"size":[418.26055908203125,456.04608154296875],"flags":{},"order":37,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2199}],"outputs":[],"title":"Save Output","properties":{},"widgets_values":["ComfyUI"],"color":"#232","bgcolor":"#353"},{"id":761,"type":"Image Comparer (rgthree)","pos":[2257.2197265625,228.6908416748047],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":38,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2210},{"name":"image_b","type":"IMAGE","dir":3,"link":2200}],"outputs":[],"title":"Compare Output","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_dluyj_00015_.png&type=temp&subfolder=&rand=0.8734695511873163"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_dluyj_00016_.png&type=temp&subfolder=&rand=0.23774072803641766"}]],"color":"#232","bgcolor":"#353"},{"id":1074,"type":"ClownOptions_SDE_Beta","pos":[790.0368041992188,-161.93728637695312],"size":[315,266],"flags":{"collapsed":true},"order":0,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":13,"type":"Reroute","pos":[-693.37158203125,-133.7138214111328],"size":[75,26],"flags":{},"order":26,"mode":0,"inputs":[{"name":"","type":"*","link":4163}],"outputs":[{"name":"","type":"MODEL","links":[3812],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":745,"type":"VAEDecode","pos":[1818.999755859375,-349.32073974609375],"size":[140,46],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":4031},{"name":"vae","localized_name":"vae","type":"VAE","link":2153}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2201,2208,2241,3568],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":727,"type":"VAEEncodeAdvanced","pos":[219.2652130126953,904.9601440429688],"size":[262.4812927246094,298],"flags":{"collapsed":true},"order":16,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2101},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":2102},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":2103},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":3508},{"name":"width","type":"INT","pos":[10,160],"widget":{"name":"width"},"link":2104},{"name":"height","type":"INT","pos":[10,184],"widget":{"name":"height"},"link":2105}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[3602,3603,3700,3785,3786,4097],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[2233,3604,3901],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2125],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[]}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":729,"type":"SetImageSize","pos":[257.9150695800781,633.8616333007812],"size":[210,102],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[2104,2108],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[2105,2109],"slot_index":1}],"title":"Inpaint Tile Size","properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1024,1024]},{"id":1072,"type":"StyleModelApply","pos":[618.7158813476562,-201.9373016357422],"size":[262,122],"flags":{"collapsed":true},"order":15,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":3724},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":4151},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":3720}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[4088,4102],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":1071,"type":"CLIPVisionEncode","pos":[618.708251953125,-160.76882934570312],"size":[253.60000610351562,78],"flags":{"collapsed":true},"order":13,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":4152},{"name":"image","localized_name":"image","type":"IMAGE","link":3721}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[3720],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":1152,"type":"FluxLoader","pos":[-1424.1221923828125,-136.28652954101562],"size":[315,282],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[4144],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[4150],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[4146],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[4152],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[4151],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["flux1-dev.sft","fp8_e4m3fn_fast","clip_l_flux.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","ae.sft","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":1145,"type":"SharkOptions_GuideCond_Beta","pos":[623.8969116210938,-288.85443115234375],"size":[315,98],"flags":{"collapsed":true},"order":18,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":4088},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":4087},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[4086,4089],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[1]},{"id":762,"type":"Image Comparer (rgthree)","pos":[2254.142822265625,-285.88934326171875],"size":[402.1800842285156,455.1059875488281],"flags":{},"order":32,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2208},{"name":"image_b","type":"IMAGE","dir":3,"link":2209}],"outputs":[],"title":"Compare Inpaint Patch","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_glyrv_00015_.png&type=temp&subfolder=&rand=0.6304345035966803"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_glyrv_00016_.png&type=temp&subfolder=&rand=0.03317535764596258"}]],"color":"#332922","bgcolor":"#593930"},{"id":1073,"type":"CLIPTextEncode","pos":[618.718017578125,-243.58985900878906],"size":[263.280517578125,88.73566436767578],"flags":{"collapsed":true},"order":12,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":4157}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3724,4087],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":759,"type":"ImageCompositeMasked","pos":[2182.82080078125,-351.82415771484375],"size":[210,186],"flags":{"collapsed":true},"order":35,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"IMAGE","link":2211},{"name":"source","localized_name":"source","type":"IMAGE","link":2198},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":3884},{"name":"x","type":"INT","pos":[10,76],"widget":{"name":"x"},"link":2206},{"name":"y","type":"INT","pos":[10,100],"widget":{"name":"y"},"link":2207}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2199,2200],"slot_index":0}],"properties":{"Node name for S&R":"ImageCompositeMasked"},"widgets_values":[712,800,false]},{"id":1102,"type":"LoadImage","pos":[-205.95057678222656,316.025390625],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[4156],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[4158],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-67304674.png [input]","image"]},{"id":725,"type":"Reroute","pos":[126.9476318359375,319.6999206542969],"size":[75,26],"flags":{},"order":8,"mode":0,"inputs":[{"name":"","type":"*","link":4156}],"outputs":[{"name":"","type":"IMAGE","links":[2210,2211,2338],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1038,"type":"ClownGuides_Beta","pos":[-491.9494934082031,-334.2093505859375],"size":[315,450],"flags":{},"order":20,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":3602},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":3603},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":3604},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[4095],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["flow",false,false,1,1,1,1,"constant","constant",0,0,8,8,false],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1069,"type":"ClownsharkChainsampler_Beta","pos":[1011.0429077148438,-95.05850219726562],"size":[315,570],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3711},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":4155},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":4089},{"name":"options 2","type":"OPTIONS","link":4159},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[4104],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"exponential/res_3s",1,1,"resample",true],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1066,"type":"ClownsharKSampler_Beta","pos":[620.0368041992188,-101.93733215332031],"size":[340.55120849609375,730],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3812},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":4102},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3700},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":4096},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":4086},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3711],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0,"exponential/res_3s","beta57",30,7,1,1,0,"fixed","standard",false],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1070,"type":"ClownsharkChainsampler_Beta","pos":[1361.5435791015625,-100.98193359375],"size":[315,570],"flags":{},"order":29,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":4104},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3832},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[4031],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0,"exponential/res_3s",-1,1,"resample",true],"color":"#232","bgcolor":"#353"},{"id":1143,"type":"ClownOptions_Cycles_Beta","pos":[1023.6978149414062,-356.53753662109375],"size":[282.6300964355469,202],"flags":{},"order":4,"mode":4,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[4159],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0,"none",1,1,false],"color":"#2a363b","bgcolor":"#3f5159"},{"id":1153,"type":"LoraLoader","pos":[-1079.3297119140625,-135.3394012451172],"size":[315,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":4144},{"name":"clip","localized_name":"clip","type":"CLIP","link":4150}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[4160],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[4149],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["FLUX/Raura.safetensors",1,1]},{"id":737,"type":"ModelSamplingAdvancedResolution","pos":[-1125.156005859375,-356.12274169921875],"size":[260.3999938964844,126],"flags":{},"order":22,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":4161},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2125}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[4162],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":1149,"type":"ReFluxPatcher","pos":[-828.3265380859375,-352.2313232421875],"size":[210,82],"flags":{},"order":25,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":4162}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[4163],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true],"color":"#223","bgcolor":"#335"},{"id":1142,"type":"TorchCompileModels","pos":[-1416.9853515625,-362.2281799316406],"size":[210,178],"flags":{},"order":9,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":4160}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[4161],"slot_index":0}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":1150,"type":"ClownGuide_Style_Beta","pos":[-140.6088409423828,-331.50213623046875],"size":[248.69369506835938,286],"flags":{"collapsed":false},"order":24,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":4097},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":4095}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[4096,4155],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false],"color":"#223","bgcolor":"#335"},{"id":1088,"type":"ClownGuides_Beta","pos":[145.70831298828125,-329.6731872558594],"size":[315,450],"flags":{},"order":21,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":3785},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":3786},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":3901},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3832],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["inversion",false,false,0,1,1,1,"constant","constant",0,0,30,30,false],"color":"#232","bgcolor":"#353"}],"links":[[2100,731,1,726,2,"FLOAT"],[2101,726,0,727,0,"IMAGE"],[2102,726,0,727,1,"IMAGE"],[2103,728,0,727,2,"IMAGE"],[2104,729,0,727,5,"INT"],[2105,729,1,727,6,"INT"],[2106,726,1,728,0,"MASK"],[2108,729,0,731,0,"*"],[2109,729,1,731,1,"*"],[2125,727,3,737,1,"LATENT"],[2153,14,0,745,1,"VAE"],[2198,758,0,759,1,"IMAGE"],[2199,759,0,760,0,"IMAGE"],[2200,759,0,761,1,"IMAGE"],[2201,745,0,758,0,"IMAGE"],[2204,726,5,758,1,"INT"],[2205,726,6,758,2,"INT"],[2206,726,3,759,3,"INT"],[2207,726,4,759,4,"INT"],[2208,745,0,762,0,"IMAGE"],[2209,726,0,762,1,"IMAGE"],[2210,725,0,761,0,"IMAGE"],[2211,725,0,759,0,"IMAGE"],[2233,727,2,765,0,"MASK"],[2241,745,0,744,0,"IMAGE"],[2338,725,0,726,0,"IMAGE"],[3508,14,0,727,4,"VAE"],[3568,745,0,1022,0,"IMAGE"],[3569,1022,0,1024,0,"IMAGE"],[3570,765,0,1022,1,"IMAGE"],[3602,727,0,1038,0,"LATENT"],[3603,727,0,1038,1,"LATENT"],[3604,727,2,1038,2,"MASK"],[3605,728,0,1039,1,"IMAGE"],[3606,726,0,1039,0,"IMAGE"],[3607,1039,0,1040,0,"IMAGE"],[3700,727,0,1066,3,"LATENT"],[3711,1066,0,1069,4,"LATENT"],[3720,1071,0,1072,2,"CLIP_VISION_OUTPUT"],[3721,726,0,1071,1,"IMAGE"],[3724,1073,0,1072,0,"CONDITIONING"],[3785,727,0,1088,0,"LATENT"],[3786,727,0,1088,1,"LATENT"],[3812,13,0,1066,0,"MODEL"],[3832,1088,0,1070,5,"GUIDES"],[3884,726,2,759,2,"MASK"],[3901,727,2,1088,2,"MASK"],[4031,1070,0,745,0,"LATENT"],[4086,1145,0,1066,6,"OPTIONS"],[4087,1073,0,1145,1,"CONDITIONING"],[4088,1072,0,1145,0,"CONDITIONING"],[4089,1145,0,1069,6,"OPTIONS"],[4095,1038,0,1150,3,"GUIDES"],[4096,1150,0,1066,5,"GUIDES"],[4097,727,0,1150,0,"LATENT"],[4102,1072,0,1066,1,"CONDITIONING"],[4104,1069,0,1070,4,"LATENT"],[4144,1152,0,1153,0,"MODEL"],[4146,1152,2,14,0,"*"],[4149,1153,1,490,0,"*"],[4150,1152,1,1153,1,"CLIP"],[4151,1152,4,1072,1,"STYLE_MODEL"],[4152,1152,3,1071,0,"CLIP_VISION"],[4155,1150,0,1069,5,"GUIDES"],[4156,1102,0,725,0,"*"],[4157,490,0,1073,0,"CLIP"],[4158,1102,1,726,1,"MASK"],[4159,1143,0,1069,7,"OPTIONS"],[4160,1153,0,1142,0,"MODEL"],[4161,1142,0,737,0,"MODEL"],[4162,737,0,1149,0,"MODEL"],[4163,1149,0,13,0,"*"]],"groups":[{"id":1,"title":"Prepare Input","bounding":[-240.3173828125,230.5765838623047,755.7755737304688,762.867431640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Patch and Stitch","bounding":[1762.0626220703125,-449.59136962890625,1387.1339111328125,1156.21923828125],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Loaders","bounding":[-1451.647216796875,-453.5611877441406,862.5447998046875,635.2009887695312],"color":"#3f789e","font_size":24,"flags":{}},{"id":5,"title":"Sampling","bounding":[565.7752685546875,-449.1409606933594,1147.30712890625,1118.83447265625],"color":"#3f789e","font_size":24,"flags":{}},{"id":6,"title":"Guides","bounding":[-538.6279296875,-451.06854248046875,1052.895263671875,634.7589721679688],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.351305709310398,"offset":[2774.203337270875,600.0170992273368]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux inpaint area.json ================================================ {"last_node_id":698,"last_link_id":1968,"nodes":[{"id":670,"type":"SaveImage","pos":[5481.20751953125,763.7216186523438],"size":[315,270],"flags":{},"order":21,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1883}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":663,"type":"VAEEncodeAdvanced","pos":[4030,1370],"size":[262.4812927246094,278],"flags":{},"order":10,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1957},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1968}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1885,1886],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1854,1869],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":651,"type":"PreviewImage","pos":[4060,1710],"size":[210,246],"flags":{},"order":11,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1963}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":624,"type":"CLIPTextEncode","pos":[4329.92578125,1015.7978515625],"size":[306.2455749511719,162.64158630371094],"flags":{},"order":9,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1966}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1860],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a close up shot of a red coffee mug on a wooden table"]},{"id":346,"type":"ModelSamplingAdvancedResolution","pos":[4034.77978515625,820.2175903320312],"size":[260.3999938964844,126],"flags":{},"order":14,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1965},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1869}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1870],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":674,"type":"Note","pos":[4999.462890625,1603.108642578125],"size":[378.7174377441406,179.35989379882812],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["eta is the amount of noise added after each step. It allows the model to change things more aggressively. Try comparing 0.0 vs 0.75.\n\nres_2m and res_3m will be sufficient quality samplers in most cases. Try res_2s and res_3s (which are 2x and 3x slower) if you want an extra quality boost.\n\nYou can get away with fewer than 40 steps in most cases, but 40 gives the model more time to correct any errors. Mileage may vary, experiment!"],"color":"#432","bgcolor":"#653"},{"id":677,"type":"Note","pos":[3783.440185546875,820.546142578125],"size":[210.66668701171875,91.33430480957031],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I have found these values often work quite well with img2img work with the beta57 scheduler.\n\n"],"color":"#432","bgcolor":"#653"},{"id":678,"type":"Note","pos":[3748.428466796875,1012.2677612304688],"size":[210,107.33900451660156],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["If you wish to inpaint with another model, just replace the model loader and be sure to change CFG to whatever is appropriate for that model.\n\n"],"color":"#432","bgcolor":"#653"},{"id":672,"type":"Note","pos":[3747.097412109375,1187.65576171875],"size":[210,104.00474548339844],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Padding will increase or decrease the amount of area included around your mask that will give the model more context."],"color":"#432","bgcolor":"#653"},{"id":637,"type":"Note","pos":[3412.000732421875,1202.6614990234375],"size":[280.681884765625,88],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Draw your mask on your image for the area you would like to inpaint."],"color":"#432","bgcolor":"#653"},{"id":658,"type":"Image Comparer (rgthree)","pos":[5007.734375,1021.2513427734375],"size":[450.5037841796875,521.7816162109375],"flags":{},"order":20,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1829},{"name":"image_b","type":"IMAGE","dir":3,"link":1823}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_sxifa_00003_.png&type=temp&subfolder=&rand=0.14849022700275727"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_sxifa_00004_.png&type=temp&subfolder=&rand=0.8022985498723256"}]]},{"id":673,"type":"Note","pos":[4330.9345703125,1766.158203125],"size":[488.01611328125,234.97633361816406],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The parameters for \"masked\" will affect your inpainting area. \n\nTry changing weight_masked and end_step_masked. Lower values will allow the model to inpaint more aggressively. Higher will use more information from the original image. \n\n *** You can think of these like a \"denoise\" slider! *** \n\n(With lower weight, lower end_step acting like higher denoise).\n\nweight_scheduler_masked will change how quickly the value in weight_masked drops to zero. \"constant\" will never drop. Try linear_quadratic (drops very gradually, then suddenly at the end) or beta57 (drops earlier). These can make the inpainting process a bit smoother.\n\nHaving some information from the original image helps the model place objects more accurately, if you are replacing something that is already there."],"color":"#432","bgcolor":"#653"},{"id":617,"type":"ClownsharKSampler_Beta","pos":[4660,1020],"size":[315,690],"flags":{},"order":15,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1870},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1860},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1854},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1884},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1936],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","bong_tangent",40,-1,1,1,17,"fixed","standard",true]},{"id":619,"type":"VAEDecode","pos":[4830.248046875,919.5529174804688],"size":[140,46],"flags":{},"order":16,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1936},{"name":"vae","localized_name":"vae","type":"VAE","link":1967}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1882,1902],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":638,"type":"LoadImage","pos":[3390,1370],"size":[315,314],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1828,1829,1955],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1956],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-147694527.20000002.png [input]","image"]},{"id":667,"type":"ImageResize+","pos":[5008.23974609375,755.5714111328125],"size":[210,218],"flags":{},"order":17,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1882},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":1949},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":1950}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1876],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":657,"type":"ImageCompositeMasked","pos":[5242.94482421875,761.7905883789062],"size":[210,186],"flags":{},"order":19,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"IMAGE","link":1828},{"name":"source","localized_name":"source","type":"IMAGE","link":1876},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":1953},{"name":"x","type":"INT","pos":[10,76],"widget":{"name":"x"},"link":1952},{"name":"y","type":"INT","pos":[10,100],"widget":{"name":"y"},"link":1951}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1823,1883],"slot_index":0}],"properties":{"Node name for S&R":"ImageCompositeMasked"},"widgets_values":[712,800,false]},{"id":650,"type":"MaskPreview","pos":[3778.59765625,1707.707763671875],"size":[181.5970001220703,246],"flags":{},"order":12,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1962}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":671,"type":"ClownGuides_Beta","pos":[4331.12109375,1240.1927490234375],"size":[303.2622985839844,450],"flags":{},"order":13,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":1885},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":1886},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":1954},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1884],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["epsilon",false,true,0.5,1,1,1,"beta57","constant",0,0,10,-1,false]},{"id":679,"type":"Image Comparer (rgthree)","pos":[5488.171875,1085.4603271484375],"size":[402.1800842285156,455.1059875488281],"flags":{},"order":18,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1964},{"name":"image_b","type":"IMAGE","dir":3,"link":1902}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_ejvlo_00001_.png&type=temp&subfolder=&rand=0.5455521700112449"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_ejvlo_00002_.png&type=temp&subfolder=&rand=0.8898066636829509"}]]},{"id":676,"type":"Mask Bounding Box Aspect Ratio","pos":[3742.82421875,1383.6278076171875],"size":[252,250],"flags":{},"order":8,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","shape":7,"link":1955},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":1956}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[1957,1963,1964],"slot_index":0},{"name":"mask","localized_name":"mask","type":"MASK","links":[1954,1962],"slot_index":1},{"name":"mask_blurred","localized_name":"mask_blurred","type":"MASK","links":[1953],"slot_index":2},{"name":"x","localized_name":"x","type":"INT","links":[1952],"slot_index":3},{"name":"y","localized_name":"y","type":"INT","links":[1951],"slot_index":4},{"name":"width","localized_name":"width","type":"INT","links":[1949],"slot_index":5},{"name":"height","localized_name":"height","type":"INT","links":[1950],"slot_index":6}],"properties":{"Node name for S&R":"Mask Bounding Box Aspect Ratio"},"widgets_values":[20,20,1,false]},{"id":615,"type":"FluxLoader","pos":[3992.056396484375,1016.4193725585938],"size":[315,282],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1965],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1966],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1967,1968],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":null},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":null}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae",".none",".none"]}],"links":[[1823,657,0,658,1,"IMAGE"],[1828,638,0,657,0,"IMAGE"],[1829,638,0,658,0,"IMAGE"],[1854,663,3,617,3,"LATENT"],[1860,624,0,617,1,"CONDITIONING"],[1869,663,3,346,1,"LATENT"],[1870,346,0,617,0,"MODEL"],[1876,667,0,657,1,"IMAGE"],[1882,619,0,667,0,"IMAGE"],[1883,657,0,670,0,"IMAGE"],[1884,671,0,617,5,"GUIDES"],[1885,663,0,671,0,"LATENT"],[1886,663,0,671,1,"LATENT"],[1902,619,0,679,1,"IMAGE"],[1936,617,0,619,0,"LATENT"],[1949,676,5,667,1,"INT"],[1950,676,6,667,2,"INT"],[1951,676,4,657,4,"INT"],[1952,676,3,657,3,"INT"],[1953,676,2,657,2,"MASK"],[1954,676,1,671,2,"MASK"],[1955,638,0,676,0,"IMAGE"],[1956,638,1,676,1,"MASK"],[1957,676,0,663,0,"IMAGE"],[1962,676,1,650,0,"MASK"],[1963,676,0,651,0,"IMAGE"],[1964,676,0,679,0,"IMAGE"],[1965,615,0,346,0,"MODEL"],[1966,615,1,624,0,"CLIP"],[1967,615,2,619,1,"VAE"],[1968,615,2,663,4,"VAE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.4864362802414468,"offset":[-1333.621998147027,-469.4579733585599]},"node_versions":{"comfy-core":"0.3.26","comfyui_controlnet_aux":"1e9eac6377c882da8bb360c7544607036904362c","ComfyUI-VideoHelperSuite":"c36626c6028faca912eafcedbc71f1d342fb4d2a"},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux inpaint bongmath.json ================================================ {"last_node_id":1057,"last_link_id":3666,"nodes":[{"id":758,"type":"ImageResize+","pos":[1304.9573974609375,-352.7953796386719],"size":[210,218],"flags":{"collapsed":true},"order":24,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":2201},{"name":"width","type":"INT","pos":[10,76],"widget":{"name":"width"},"link":2204},{"name":"height","type":"INT","pos":[10,100],"widget":{"name":"height"},"link":2205}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2198],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[512,512,"lanczos","stretch","always",0]},{"id":759,"type":"ImageCompositeMasked","pos":[1494.957763671875,-352.7953796386719],"size":[210,186],"flags":{"collapsed":true},"order":28,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"IMAGE","link":2211},{"name":"source","localized_name":"source","type":"IMAGE","link":2198},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2301},{"name":"x","type":"INT","pos":[10,76],"widget":{"name":"x"},"link":2206},{"name":"y","type":"INT","pos":[10,100],"widget":{"name":"y"},"link":2207}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2199,2200],"slot_index":0}],"properties":{"Node name for S&R":"ImageCompositeMasked"},"widgets_values":[712,800,false]},{"id":13,"type":"Reroute","pos":[-792.117919921875,-60.3060188293457],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[2317],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[-792.117919921875,-20.30602264404297],"size":[75,26],"flags":{},"order":6,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[3656],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":737,"type":"ModelSamplingAdvancedResolution","pos":[-972.117919921875,-330.3060302734375],"size":[260.3999938964844,126],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2318},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2125}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3661],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":786,"type":"TorchCompileModels","pos":[-1262.117919921875,-360.3060302734375],"size":[256.248779296875,178],"flags":{},"order":13,"mode":4,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2317}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2318],"slot_index":0}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":664,"type":"ReFluxPatcher","pos":[-857.81005859375,-103.69645690917969],"size":[210,82],"flags":{"collapsed":true},"order":5,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":14,"type":"Reroute","pos":[-792.117919921875,19.69397735595703],"size":[75,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[2153,3508],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":745,"type":"VAEDecode","pos":[1136.7379150390625,-350.8069152832031],"size":[140,46],"flags":{"collapsed":true},"order":23,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3665},{"name":"vae","localized_name":"vae","type":"VAE","link":2153}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2201,2208,2241,3568],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":663,"type":"FluxLoader","pos":[-1262.117919921875,-130.3060302734375],"size":[374.41741943359375,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":726,"type":"Mask Bounding Box Aspect Ratio","pos":[-153.93637084960938,317.7193298339844],"size":[252,250],"flags":{"collapsed":false},"order":12,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","shape":7,"link":2338},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":3659},{"name":"aspect_ratio","type":"FLOAT","pos":[10,204],"widget":{"name":"aspect_ratio"},"link":2100}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2101,2102,2209,3606],"slot_index":0},{"name":"mask","localized_name":"mask","type":"MASK","links":[2106],"slot_index":1},{"name":"mask_blurred","localized_name":"mask_blurred","type":"MASK","links":[2301],"slot_index":2},{"name":"x","localized_name":"x","type":"INT","links":[2206],"slot_index":3},{"name":"y","localized_name":"y","type":"INT","links":[2207],"slot_index":4},{"name":"width","localized_name":"width","type":"INT","links":[2204],"slot_index":5},{"name":"height","localized_name":"height","type":"INT","links":[2205],"slot_index":6}],"properties":{"Node name for S&R":"Mask Bounding Box Aspect Ratio"},"widgets_values":[300,40,1.75,false]},{"id":728,"type":"MaskToImage","pos":[-151.61874389648438,849.1907348632812],"size":[176.39999389648438,26],"flags":{"collapsed":true},"order":14,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2106}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2103,3605],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":762,"type":"Image Comparer (rgthree)","pos":[1584.957763671875,-282.7954406738281],"size":[402.1800842285156,455.1059875488281],"flags":{},"order":25,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2208},{"name":"image_b","type":"IMAGE","dir":3,"link":2209}],"outputs":[],"title":"Compare Inpaint Patch","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_hkrer_00001_.png&type=temp&subfolder=&rand=0.04538135261092524"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_hkrer_00002_.png&type=temp&subfolder=&rand=0.5206493331921973"}]],"color":"#332922","bgcolor":"#593930"},{"id":765,"type":"MaskToImage","pos":[2025.24755859375,225.29702758789062],"size":[182.28543090820312,26],"flags":{"collapsed":true},"order":17,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2233}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3570],"slot_index":0}],"properties":{"Node name for S&R":"MaskToImage"},"widgets_values":[]},{"id":1022,"type":"ImageBlend","pos":[2028.46533203125,274.42523193359375],"size":[210,102],"flags":{"collapsed":true},"order":27,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3568},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3570}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3569],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"overlay"]},{"id":1024,"type":"PreviewImage","pos":[2025.259765625,-279.3157958984375],"size":[413.7582092285156,445.8081359863281],"flags":{},"order":29,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3569}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[],"color":"#332922","bgcolor":"#593930"},{"id":760,"type":"SaveImage","pos":[1124.9569091796875,217.20452880859375],"size":[418.26055908203125,456.04608154296875],"flags":{},"order":30,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2199}],"outputs":[],"title":"Save Output","properties":{},"widgets_values":["ComfyUI"],"color":"#232","bgcolor":"#353"},{"id":761,"type":"Image Comparer (rgthree)","pos":[1574.957763671875,227.20449829101562],"size":[410.4466247558594,447.8973388671875],"flags":{},"order":31,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":2210},{"name":"image_b","type":"IMAGE","dir":3,"link":2200}],"outputs":[],"title":"Compare Output","properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_eoplx_00001_.png&type=temp&subfolder=&rand=0.7495673665351654"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_eoplx_00002_.png&type=temp&subfolder=&rand=0.17529967707052396"}]],"color":"#232","bgcolor":"#353"},{"id":744,"type":"SaveImage","pos":[1124.9569091796875,-292.7954406738281],"size":[424.53594970703125,455.0760192871094],"flags":{},"order":26,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2241}],"outputs":[],"title":"Save Patch","properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"],"color":"#332922","bgcolor":"#593930"},{"id":725,"type":"Reroute","pos":[-243.93637084960938,317.7193298339844],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":3658}],"outputs":[{"name":"","type":"IMAGE","links":[2210,2211,2338],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1040,"type":"PreviewImage","pos":[-566.879150390625,688.4552001953125],"size":[304.98114013671875,265.58380126953125],"flags":{},"order":20,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3607}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1039,"type":"ImageBlend","pos":[-151.61874389648438,949.1907348632812],"size":[210,102],"flags":{"collapsed":true},"order":16,"mode":0,"inputs":[{"name":"image1","localized_name":"image1","type":"IMAGE","link":3606},{"name":"image2","localized_name":"image2","type":"IMAGE","link":3605}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3607],"slot_index":0}],"properties":{"Node name for S&R":"ImageBlend"},"widgets_values":[0.5,"overlay"]},{"id":731,"type":"SimpleMath+","pos":[-151.61874389648438,799.1907348632812],"size":[315,98],"flags":{"collapsed":true},"order":8,"mode":0,"inputs":[{"name":"a","localized_name":"a","type":"*","shape":7,"link":2108},{"name":"b","localized_name":"b","type":"*","shape":7,"link":2109},{"name":"c","localized_name":"c","type":"*","shape":7,"link":null}],"outputs":[{"name":"INT","localized_name":"INT","type":"INT","links":null},{"name":"FLOAT","localized_name":"FLOAT","type":"FLOAT","links":[2100],"slot_index":1}],"properties":{"Node name for S&R":"SimpleMath+"},"widgets_values":["a/b"]},{"id":729,"type":"SetImageSize","pos":[-152.42828369140625,628.09228515625],"size":[210,102],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[2104,2108],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[2105,2109],"slot_index":1}],"title":"Inpaint Tile Size","properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1024,1024]},{"id":727,"type":"VAEEncodeAdvanced","pos":[-151.61874389648438,899.1907348632812],"size":[262.4812927246094,298],"flags":{"collapsed":true},"order":15,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2101},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":2102},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":2103},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":3508},{"name":"width","type":"INT","pos":[10,160],"widget":{"name":"width"},"link":2104},{"name":"height","type":"INT","pos":[10,184],"widget":{"name":"height"},"link":2105}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[3602,3603,3611],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[2233,3604],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2125,3660],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[]}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1038,"type":"ClownGuides_Beta","pos":[-570,-350],"size":[315,450],"flags":{},"order":18,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":3602},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":3603},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":3604},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3609,3641],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["inversion",false,false,0,1,1,1,"constant","constant",0,0,1,-1,false]},{"id":1055,"type":"LoadImage","pos":[-588.5657958984375,310.53521728515625],"size":[315,314],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3658],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[3659],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-264573735.png [input]","image"]},{"id":1041,"type":"ClownGuide_Style_Beta","pos":[-210,-350],"size":[315,286],"flags":{},"order":21,"mode":4,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3611},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3609}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,10,false]},{"id":1056,"type":"CLIPTextEncode","pos":[251.24851989746094,-166.23118591308594],"size":[311.10028076171875,154.46998596191406],"flags":{"collapsed":false},"order":11,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3656}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3662],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a soviet T72 tank driving down the middle of a road in a city, crossing over the crosswalk, aiming its gun at the camera"]},{"id":1043,"type":"ClownOptions_SDE_Beta","pos":[249.46791076660156,47.537593841552734],"size":[315,266],"flags":{},"order":3,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3643],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1018,"type":"ClownOptions_ImplicitSteps_Beta","pos":[611.24853515625,-371.7803649902344],"size":[340.20001220703125,130],"flags":{},"order":4,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3664],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_ImplicitSteps_Beta"},"widgets_values":["bongmath","bongmath",2,0]},{"id":1053,"type":"ClownsharKSampler_Beta","pos":[611.24853515625,-181.78033447265625],"size":[340.55120849609375,730],"flags":{},"order":22,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3661},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3662},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3660},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3641},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3664},{"name":"options 2","type":"OPTIONS","link":3643},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3665],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","beta57",30,-1,1,1,0,"fixed","standard",true]}],"links":[[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[2100,731,1,726,2,"FLOAT"],[2101,726,0,727,0,"IMAGE"],[2102,726,0,727,1,"IMAGE"],[2103,728,0,727,2,"IMAGE"],[2104,729,0,727,5,"INT"],[2105,729,1,727,6,"INT"],[2106,726,1,728,0,"MASK"],[2108,729,0,731,0,"*"],[2109,729,1,731,1,"*"],[2125,727,3,737,1,"LATENT"],[2153,14,0,745,1,"VAE"],[2198,758,0,759,1,"IMAGE"],[2199,759,0,760,0,"IMAGE"],[2200,759,0,761,1,"IMAGE"],[2201,745,0,758,0,"IMAGE"],[2204,726,5,758,1,"INT"],[2205,726,6,758,2,"INT"],[2206,726,3,759,3,"INT"],[2207,726,4,759,4,"INT"],[2208,745,0,762,0,"IMAGE"],[2209,726,0,762,1,"IMAGE"],[2210,725,0,761,0,"IMAGE"],[2211,725,0,759,0,"IMAGE"],[2233,727,2,765,0,"MASK"],[2241,745,0,744,0,"IMAGE"],[2301,726,2,759,2,"MASK"],[2317,13,0,786,0,"MODEL"],[2318,786,0,737,0,"MODEL"],[2338,725,0,726,0,"IMAGE"],[3508,14,0,727,4,"VAE"],[3568,745,0,1022,0,"IMAGE"],[3569,1022,0,1024,0,"IMAGE"],[3570,765,0,1022,1,"IMAGE"],[3602,727,0,1038,0,"LATENT"],[3603,727,0,1038,1,"LATENT"],[3604,727,2,1038,2,"MASK"],[3605,728,0,1039,1,"IMAGE"],[3606,726,0,1039,0,"IMAGE"],[3607,1039,0,1040,0,"IMAGE"],[3609,1038,0,1041,3,"GUIDES"],[3611,727,0,1041,0,"LATENT"],[3641,1038,0,1053,5,"GUIDES"],[3643,1043,0,1053,7,"OPTIONS"],[3656,490,0,1056,0,"CLIP"],[3658,1055,0,725,0,"*"],[3659,1055,1,726,1,"MASK"],[3660,727,3,1053,3,"LATENT"],[3661,737,0,1053,0,"MODEL"],[3662,1056,0,1053,1,"CONDITIONING"],[3664,1018,0,1053,6,"OPTIONS"],[3665,1053,0,745,0,"LATENT"]],"groups":[{"id":1,"title":"Prepare Input","bounding":[-611.2013549804688,224.80706787109375,755.7755737304688,762.867431640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Patch and Stitch","bounding":[1079.80078125,-451.0775451660156,1387.1339111328125,1156.21923828125],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Loaders","bounding":[-1311.103515625,-459.84735107421875,645.1646118164062,640.0969848632812],"color":"#3f789e","font_size":24,"flags":{}},{"id":5,"title":"Sampling","bounding":[204.55885314941406,-455.63134765625,812.3118896484375,1071.2481689453125],"color":"#3f789e","font_size":24,"flags":{}},{"id":6,"title":"Guides","bounding":[-611.8231811523438,-457.95751953125,755.8380737304688,634.3353271484375],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.3072020475058177,"offset":[3303.9392897394673,741.4045019633804]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux inpainting.json ================================================ {"last_node_id":637,"last_link_id":1778,"nodes":[{"id":617,"type":"ClownsharKSampler_Beta","pos":[4647.0654296875,1012.7097778320312],"size":[315,690],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1730},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1754},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1733},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1744},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1756],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",40,30,1,1,15,"fixed","standard",true]},{"id":619,"type":"VAEDecode","pos":[5354.6103515625,907.4140014648438],"size":[210,46],"flags":{},"order":11,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1771},{"name":"vae","localized_name":"vae","type":"VAE","link":1740}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1765],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":631,"type":"SaveImage","pos":[5357.8349609375,1012.29443359375],"size":[315,270],"flags":{},"order":12,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1765}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":624,"type":"CLIPTextEncode","pos":[4233.03955078125,1015.2553100585938],"size":[380.6268615722656,114.73346710205078],"flags":{},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1753}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1754],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a weird alien tripod with a purple woman's head on top "]},{"id":615,"type":"FluxLoader","pos":[3883.31982421875,1018.0260620117188],"size":[315,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1766],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1753],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1723,1740],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":null},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":null}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae",".none",".none"]},{"id":346,"type":"ModelSamplingAdvancedResolution","pos":[3940.993408203125,831.2357177734375],"size":[260.3999938964844,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1766},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1721}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1730],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":620,"type":"ClownGuide_Beta","pos":[4355.02392578125,1383.0733642578125],"size":[264.49530029296875,290],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1767},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":1745},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1744],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["flow",false,false,1,1,"constant",0,40,false]},{"id":626,"type":"ClownsharkChainsampler_Beta","pos":[4988.4580078125,1015.6370239257812],"size":[340.20001220703125,509.99993896484375],"flags":{},"order":10,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1756},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1770},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1771],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,1,"resample",true]},{"id":422,"type":"VAEEncodeAdvanced","pos":[4080.7021484375,1383.7640380859375],"size":[240.29074096679688,278],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1777},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1723}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1767,1772],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1721,1733],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":627,"type":"ClownGuide_Beta","pos":[4701.61572265625,1776.4569091796875],"size":[264.49530029296875,290],"flags":{},"order":8,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1772},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":1778},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1770],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["flow",false,false,1,1,"constant",0,40,false]},{"id":634,"type":"GrowMask","pos":[4102.16650390625,1794.78857421875],"size":[210,82],"flags":{},"order":5,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1774}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1778],"slot_index":0}],"properties":{"Node name for S&R":"GrowMask"},"widgets_values":[20,false]},{"id":621,"type":"LoadImage","pos":[3718.762939453125,1384.687255859375],"size":[319.33538818359375,313.277587890625],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1777],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1745,1774],"slot_index":1}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["clipspace/clipspace-mask-150185841.8.png [input]","image"]},{"id":637,"type":"Note","pos":[3731.639892578125,1771.010009765625],"size":[282.0154113769531,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Draw your mask on your image for the area you would like to inpaint."],"color":"#432","bgcolor":"#653"}],"links":[[1721,422,3,346,1,"LATENT"],[1723,615,2,422,4,"VAE"],[1730,346,0,617,0,"MODEL"],[1733,422,3,617,3,"LATENT"],[1740,615,2,619,1,"VAE"],[1744,620,0,617,5,"GUIDES"],[1745,621,1,620,1,"MASK"],[1753,615,1,624,0,"CLIP"],[1754,624,0,617,1,"CONDITIONING"],[1756,617,0,626,4,"LATENT"],[1765,619,0,631,0,"IMAGE"],[1766,615,0,346,0,"MODEL"],[1767,422,0,620,0,"LATENT"],[1770,627,0,626,5,"GUIDES"],[1771,626,0,619,0,"LATENT"],[1772,422,0,627,0,"LATENT"],[1774,621,1,634,0,"MASK"],[1777,621,0,422,0,"IMAGE"],[1778,634,0,627,1,"MASK"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.3109994191500227,"offset":[-1810.8840558767379,-650.1028379746496]},"node_versions":{"comfy-core":"0.3.26","comfyui_controlnet_aux":"1e9eac6377c882da8bb360c7544607036904362c","ComfyUI-VideoHelperSuite":"c36626c6028faca912eafcedbc71f1d342fb4d2a"},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux regional antiblur.json ================================================ {"last_node_id":723,"last_link_id":2096,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":8,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1939,2092],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":20,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":701,"type":"Note","pos":[80,-520],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[-210,-520],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":676,"type":"InvertMask","pos":[20,-370],"size":[142.42074584960938,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2073}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2083],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":663,"type":"FluxLoader","pos":[630,-720],"size":[374.41741943359375,282],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae",".none",".none"]},{"id":662,"type":"CLIPTextEncode","pos":[460,-370],"size":[210,88],"flags":{"collapsed":false},"order":12,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2094],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat"]},{"id":723,"type":"CLIPTextEncode","pos":[460,-240],"size":[210,88],"flags":{"collapsed":false},"order":13,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2092}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2093],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a college campus"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[719.6110229492188,16.752899169921875],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":14,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":710,"type":"MaskPreview","pos":[180,-190],"size":[210,246],"flags":{},"order":16,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":664,"type":"ReFluxPatcher","pos":[1040,-720],"size":[210,82],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":19,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":715,"type":"SolidMask","pos":[-220,-370],"size":[210,106],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2073],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1024,1024]},{"id":716,"type":"SolidMask","pos":[-220,-220],"size":[210,106],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2065],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,384,864]},{"id":709,"type":"MaskComposite","pos":[190,-370],"size":[210,126],"flags":{},"order":15,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2083},{"name":"source","localized_name":"source","type":"MASK","link":2065}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2091],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[256,160,"add"]},{"id":704,"type":"Note","pos":[101.74818420410156,112.67951965332031],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":18,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2095},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,1,3,"fixed","standard",true]},{"id":722,"type":"ClownRegionalConditioning2","pos":[690,-370],"size":[287.75750732421875,330],"flags":{},"order":17,"mode":0,"inputs":[{"name":"conditioning_masked","localized_name":"conditioning_masked","type":"CONDITIONING","shape":7,"link":2094},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":2093},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2091},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[2095],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning2"},"widgets_values":[1,0,0,"constant",0,-1,"boolean_masked",32,false]},{"id":703,"type":"Note","pos":[423.10699462890625,-96.14085388183594],"size":[241.9689483642578,386.7543640136719],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask.\n\nboolean_masked means that the masked area can \"see\" the rest of the image, but the unmasked area cannot. \"boolean\" would mean neither area could see the rest of the image.\n\nTry setting to boolean_unmasked and see what happens!\n\nIf you still have blur, try reducing edge_width (and if you have seams, try increasing it, or setting end_step to something like 20). \n\nAlso verify that you can generate the background prompt alone without blur (if you can't, this won't work). And don't get stuck on one seed.\n\nVaguely human-shaped masks also tend to work better than the blocky one used here."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[2054,709,0,710,0,"MASK"],[2065,716,0,709,1,"MASK"],[2073,715,0,676,0,"MASK"],[2083,676,0,709,0,"MASK"],[2091,709,0,722,2,"MASK"],[2092,490,0,723,0,"CLIP"],[2093,723,0,722,1,"CONDITIONING"],[2094,662,0,722,0,"CONDITIONING"],[2095,722,0,401,1,"CONDITIONING"],[2096,401,0,397,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.91943424957756,"offset":[1680.6010824178522,841.7668875984083]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux regional redux (2 zone).json ================================================ {"last_node_id":704,"last_link_id":2042,"nodes":[{"id":13,"type":"Reroute","pos":[1300,-790],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1300,-750],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1706,1939],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":541,"type":"CLIPTextEncode","pos":[692.1508178710938,183.7528839111328],"size":[265.775390625,113.01970672607422],"flags":{},"order":17,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1706}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1732],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":14,"type":"Reroute","pos":[1300,-710],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":397,"type":"VAEDecode","pos":[1403.6392822265625,-371.9699401855469],"size":[210,46],"flags":{},"order":31,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1988},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":680,"type":"Reroute","pos":[1310,-660],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":2001}],"outputs":[{"name":"","type":"CLIP_VISION","links":[2004,2009]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":678,"type":"StyleModelApply","pos":[101.3630142211914,-560.2020874023438],"size":[262,122],"flags":{},"order":24,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2005},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1999},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2003}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2002],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":683,"type":"CLIPVisionEncode","pos":[-170,-220],"size":[253.60000610351562,78],"flags":{},"order":21,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2009},{"name":"image","localized_name":"image","type":"IMAGE","link":2035}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2008]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":682,"type":"StyleModelApply","pos":[100,-250],"size":[262,122],"flags":{},"order":25,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2006},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2007},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2008}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2020],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":681,"type":"CLIPVisionEncode","pos":[-173.92124938964844,-524.1537475585938],"size":[253.60000610351562,78],"flags":{},"order":20,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2004},{"name":"image","localized_name":"image","type":"IMAGE","link":2028}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2003]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":694,"type":"LoadImage","pos":[-536.0714111328125,-640.6544189453125],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2028],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image Apr 29, 2025, 07_47_12 PM.png","image"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[696.7778930664062,-164.97328186035156],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":19,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",768,1344,"red",false,"16_channels"]},{"id":596,"type":"ClownRegionalConditioning","pos":[425.9762268066406,-243.12513732910156],"size":[211.60000610351562,122],"flags":{},"order":27,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":null},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2042}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1937],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":30,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1735},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1732},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1988],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","bong_tangent",20,-1,1,1,109,"fixed","standard",true]},{"id":560,"type":"ClownRegionalConditionings","pos":[676.1644897460938,-499.31219482421875],"size":[278.4758605957031,266],"flags":{},"order":29,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1938},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[1735],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditionings"},"widgets_values":[0.5,1,14,"beta57",0,20,"boolean",false]},{"id":690,"type":"LoadImage","pos":[-531.4011840820312,-234.04151916503906],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2035],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00452_.png","image"]},{"id":676,"type":"InvertMask","pos":[-1270,-450],"size":[140,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1990}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1991],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":666,"type":"SolidMask","pos":[-1500,-450],"size":[210,106],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1990],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1344,768]},{"id":667,"type":"MaskPreview","pos":[-840,-570],"size":[210,246],"flags":{},"order":22,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1969}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":670,"type":"MaskPreview","pos":[-840,-280],"size":[210,246],"flags":{},"order":26,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2041}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":661,"type":"ClownRegionalConditioning","pos":[411.9298095703125,-539.053955078125],"size":[211.60000610351562,122],"flags":{},"order":28,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1937},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2002},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2036}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1938],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":665,"type":"MaskComposite","pos":[-1100,-450],"size":[210,126],"flags":{},"order":15,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":1991},{"name":"source","localized_name":"source","type":"MASK","link":1995}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1969,2036,2038],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[0,0,"add"]},{"id":700,"type":"MaskFlip+","pos":[-1098.6136474609375,-267.628173828125],"size":[210,58],"flags":{},"order":23,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2038}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2041,2042],"slot_index":0}],"properties":{"Node name for S&R":"MaskFlip+"},"widgets_values":["x"]},{"id":668,"type":"SolidMask","pos":[-1502.6644287109375,-289.3330993652344],"size":[210,106],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1995],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,768,768]},{"id":701,"type":"Note","pos":[-1378.6959228515625,-637.0702514648438],"size":[342.05950927734375,88],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":663,"type":"FluxLoader","pos":[654.6221923828125,-858.3792724609375],"size":[374.41741943359375,282],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[2001],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[2000],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":664,"type":"ReFluxPatcher","pos":[1064.7325439453125,-863.0516967773438],"size":[210,82],"flags":{},"order":10,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":679,"type":"Reroute","pos":[1300,-610],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":2000}],"outputs":[{"name":"","type":"STYLE_MODEL","links":[1999,2007]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":662,"type":"CLIPTextEncode","pos":[-140.3179168701172,-670.337158203125],"size":[210,88],"flags":{"collapsed":false},"order":18,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2005,2006],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":702,"type":"Note","pos":[-1222.3177490234375,-134.59034729003906],"size":[278.04071044921875,88],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Note that these masks are overlapping."],"color":"#432","bgcolor":"#653"},{"id":703,"type":"Note","pos":[358.4803466796875,-41.564422607421875],"size":[278.04071044921875,88],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask."],"color":"#432","bgcolor":"#653"},{"id":704,"type":"Note","pos":[324.8023986816406,-781.4505004882812],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step, and end_step."],"color":"#432","bgcolor":"#653"},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":32,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1706,490,0,541,0,"CLIP"],[1732,541,0,401,2,"CONDITIONING"],[1735,560,0,401,1,"CONDITIONING"],[1937,596,0,661,0,"COND_REGIONS"],[1938,661,0,560,0,"COND_REGIONS"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[1969,665,0,667,0,"MASK"],[1988,401,0,397,0,"LATENT"],[1990,666,0,676,0,"MASK"],[1991,676,0,665,0,"MASK"],[1995,668,0,665,1,"MASK"],[1999,679,0,678,1,"STYLE_MODEL"],[2000,663,4,679,0,"*"],[2001,663,3,680,0,"*"],[2002,678,0,661,1,"CONDITIONING"],[2003,681,0,678,2,"CLIP_VISION_OUTPUT"],[2004,680,0,681,0,"CLIP_VISION"],[2005,662,0,678,0,"CONDITIONING"],[2006,662,0,682,0,"CONDITIONING"],[2007,679,0,682,1,"STYLE_MODEL"],[2008,683,0,682,2,"CLIP_VISION_OUTPUT"],[2009,680,0,683,0,"CLIP_VISION"],[2020,682,0,596,1,"CONDITIONING"],[2028,694,0,681,1,"IMAGE"],[2035,690,0,683,1,"IMAGE"],[2036,665,0,661,2,"MASK"],[2038,665,0,700,0,"MASK"],[2041,700,0,670,0,"MASK"],[2042,700,0,596,2,"MASK"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7449402268886907,"offset":[2753.5015634091214,978.5823037629943]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux regional redux (3 zone, nested).json ================================================ {"last_node_id":720,"last_link_id":2082,"nodes":[{"id":13,"type":"Reroute","pos":[1300,-790],"size":[75,26],"flags":{},"order":18,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1300,-750],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1706,1939],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":541,"type":"CLIPTextEncode","pos":[692.1508178710938,183.7528839111328],"size":[265.775390625,113.01970672607422],"flags":{},"order":19,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1706}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1732],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":14,"type":"Reroute","pos":[1300,-710],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":680,"type":"Reroute","pos":[1310,-660],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":2001}],"outputs":[{"name":"","type":"CLIP_VISION","links":[2004,2009,2043]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":678,"type":"StyleModelApply","pos":[101.3630142211914,-560.2020874023438],"size":[262,122],"flags":{},"order":28,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2005},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1999},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2003}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2002],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":681,"type":"CLIPVisionEncode","pos":[-173.92124938964844,-524.1537475585938],"size":[253.60000610351562,78],"flags":{},"order":22,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2004},{"name":"image","localized_name":"image","type":"IMAGE","link":2082}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2003]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":663,"type":"FluxLoader","pos":[654.6221923828125,-858.3792724609375],"size":[374.41741943359375,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[2001],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[2000],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":664,"type":"ReFluxPatcher","pos":[1064.7325439453125,-863.0516967773438],"size":[210,82],"flags":{},"order":11,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":679,"type":"Reroute","pos":[1300,-610],"size":[75,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"","type":"*","link":2000}],"outputs":[{"name":"","type":"STYLE_MODEL","links":[1999,2007,2046]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":662,"type":"CLIPTextEncode","pos":[-140.3179168701172,-670.337158203125],"size":[210,88],"flags":{"collapsed":false},"order":20,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2005,2006,2045],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":40,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":683,"type":"CLIPVisionEncode","pos":[-170,-220],"size":[253.60000610351562,78],"flags":{},"order":23,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2009},{"name":"image","localized_name":"image","type":"IMAGE","link":2062}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2008]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":682,"type":"StyleModelApply","pos":[100,-250],"size":[262,122],"flags":{},"order":29,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2006},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2007},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2008}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2020],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":706,"type":"CLIPVisionEncode","pos":[-180,180],"size":[253.60000610351562,78],"flags":{},"order":24,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2043},{"name":"image","localized_name":"image","type":"IMAGE","link":2081}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2047]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[696.7778930664062,-164.97328186035156],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":21,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":690,"type":"LoadImage","pos":[-549.7396240234375,-227.43971252441406],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2062],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00464_.png","image"]},{"id":704,"type":"Note","pos":[324.8023986816406,-781.4505004882812],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":703,"type":"Note","pos":[384.9622802734375,346.1895751953125],"size":[278.04071044921875,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask."],"color":"#432","bgcolor":"#653"},{"id":397,"type":"VAEDecode","pos":[1403.6392822265625,-371.9699401855469],"size":[210,46],"flags":{},"order":39,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2077},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":710,"type":"MaskPreview","pos":[-809.6506958007812,-582.2230834960938],"size":[210,246],"flags":{},"order":26,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":715,"type":"SolidMask","pos":[-1501.8455810546875,-483.931884765625],"size":[210,106],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2064,2073],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1536,1536]},{"id":667,"type":"MaskPreview","pos":[-800.4617309570312,225.60794067382812],"size":[210,246],"flags":{},"order":31,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1969}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":676,"type":"InvertMask","pos":[-1225.793212890625,220.8433380126953],"size":[140,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2073}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1991],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":719,"type":"MaskPreview","pos":[-806.2830810546875,-181.18017578125],"size":[210,246],"flags":{},"order":34,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2072}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":717,"type":"MaskComposite","pos":[-1232.8262939453125,-171.98712158203125],"size":[210,126],"flags":{},"order":27,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2068},{"name":"source","localized_name":"source","type":"MASK","link":2069}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2071],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[512,512,"add"]},{"id":718,"type":"SolidMask","pos":[-1510.0887451171875,-5.13049840927124],"size":[210,106],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2069,2076],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,512,512]},{"id":716,"type":"SolidMask","pos":[-1504.66015625,-322.68243408203125],"size":[210,106],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2065],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1024,1024]},{"id":701,"type":"Note","pos":[-1262.5018310546875,-634.6495971679688],"size":[342.05950927734375,88],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[-1551.669921875,-639.0407104492188],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":720,"type":"InvertMask","pos":[-989.771240234375,-173.28375244140625],"size":[140,26],"flags":{},"order":32,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2071}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2072,2078],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":709,"type":"MaskComposite","pos":[-1250.3681640625,-473.0709228515625],"size":[210,126],"flags":{},"order":17,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2064},{"name":"source","localized_name":"source","type":"MASK","link":2065}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2068,2079],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[256,256,"subtract"]},{"id":665,"type":"MaskComposite","pos":[-1049.337646484375,223.26406860351562],"size":[210,126],"flags":{},"order":25,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":1991},{"name":"source","localized_name":"source","type":"MASK","link":2076}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1969,2080],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[512,512,"add"]},{"id":705,"type":"LoadImage","pos":[-548.5830688476562,-622.7470092773438],"size":[315,314],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2082],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00479_.png","image"]},{"id":694,"type":"LoadImage","pos":[-545.7549438476562,175.12576293945312],"size":[315,314],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2081],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image Apr 29, 2025, 08_07_01 PM.png","image"]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":38,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1735},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1732},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2077],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","bong_tangent",30,-1,1,1,109,"fixed","standard",true]},{"id":560,"type":"ClownRegionalConditionings","pos":[676.1644897460938,-499.31219482421875],"size":[278.4758605957031,266],"flags":{},"order":37,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1938},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[1735],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditionings"},"widgets_values":[0.5,1,15,"beta57",0,30,"boolean",false]},{"id":707,"type":"StyleModelApply","pos":[95.6487045288086,150],"size":[262,122],"flags":{},"order":30,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2045},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2046},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2047}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2048],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":708,"type":"ClownRegionalConditioning","pos":[404.6683044433594,155.1585693359375],"size":[211.60000610351562,122],"flags":{},"order":33,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":null},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2048},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2080}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[2050],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]},{"id":661,"type":"ClownRegionalConditioning","pos":[409.5088806152344,-556.8058471679688],"size":[211.60000610351562,122],"flags":{},"order":36,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1937},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2002},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2079}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1938],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]},{"id":596,"type":"ClownRegionalConditioning","pos":[407.416748046875,-245.54579162597656],"size":[211.60000610351562,122],"flags":{},"order":35,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":2050},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2078}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1937],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1706,490,0,541,0,"CLIP"],[1732,541,0,401,2,"CONDITIONING"],[1735,560,0,401,1,"CONDITIONING"],[1937,596,0,661,0,"COND_REGIONS"],[1938,661,0,560,0,"COND_REGIONS"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[1969,665,0,667,0,"MASK"],[1991,676,0,665,0,"MASK"],[1999,679,0,678,1,"STYLE_MODEL"],[2000,663,4,679,0,"*"],[2001,663,3,680,0,"*"],[2002,678,0,661,1,"CONDITIONING"],[2003,681,0,678,2,"CLIP_VISION_OUTPUT"],[2004,680,0,681,0,"CLIP_VISION"],[2005,662,0,678,0,"CONDITIONING"],[2006,662,0,682,0,"CONDITIONING"],[2007,679,0,682,1,"STYLE_MODEL"],[2008,683,0,682,2,"CLIP_VISION_OUTPUT"],[2009,680,0,683,0,"CLIP_VISION"],[2020,682,0,596,1,"CONDITIONING"],[2043,680,0,706,0,"CLIP_VISION"],[2045,662,0,707,0,"CONDITIONING"],[2046,679,0,707,1,"STYLE_MODEL"],[2047,706,0,707,2,"CLIP_VISION_OUTPUT"],[2048,707,0,708,1,"CONDITIONING"],[2050,708,0,596,0,"COND_REGIONS"],[2054,709,0,710,0,"MASK"],[2062,690,0,683,1,"IMAGE"],[2064,715,0,709,0,"MASK"],[2065,716,0,709,1,"MASK"],[2068,709,0,717,0,"MASK"],[2069,718,0,717,1,"MASK"],[2071,717,0,720,0,"MASK"],[2072,720,0,719,0,"MASK"],[2073,715,0,676,0,"MASK"],[2076,718,0,665,1,"MASK"],[2077,401,0,397,0,"LATENT"],[2078,720,0,596,2,"MASK"],[2079,709,0,661,2,"MASK"],[2080,665,0,708,2,"MASK"],[2081,694,0,706,1,"IMAGE"],[2082,705,0,681,1,"IMAGE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.4420993610650337,"offset":[3089.9291694729854,951.347346350063]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux regional redux (3 zone, overlapping).json ================================================ {"last_node_id":715,"last_link_id":2063,"nodes":[{"id":13,"type":"Reroute","pos":[1300,-790],"size":[75,26],"flags":{},"order":17,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1300,-750],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1706,1939],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":541,"type":"CLIPTextEncode","pos":[692.1508178710938,183.7528839111328],"size":[265.775390625,113.01970672607422],"flags":{},"order":18,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1706}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1732],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":14,"type":"Reroute","pos":[1300,-710],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":680,"type":"Reroute","pos":[1310,-660],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":2001}],"outputs":[{"name":"","type":"CLIP_VISION","links":[2004,2009,2043]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":678,"type":"StyleModelApply","pos":[101.3630142211914,-560.2020874023438],"size":[262,122],"flags":{},"order":26,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2005},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1999},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2003}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2002],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":681,"type":"CLIPVisionEncode","pos":[-173.92124938964844,-524.1537475585938],"size":[253.60000610351562,78],"flags":{},"order":21,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2004},{"name":"image","localized_name":"image","type":"IMAGE","link":2028}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2003]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":676,"type":"InvertMask","pos":[-1270,-450],"size":[140,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1990}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1991,2051],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":667,"type":"MaskPreview","pos":[-840,-570],"size":[210,246],"flags":{},"order":29,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1969}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":661,"type":"ClownRegionalConditioning","pos":[411.9298095703125,-539.053955078125],"size":[211.60000610351562,122],"flags":{},"order":35,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1937},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2002},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2036}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1938],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":701,"type":"Note","pos":[-1378.6959228515625,-637.0702514648438],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":663,"type":"FluxLoader","pos":[654.6221923828125,-858.3792724609375],"size":[374.41741943359375,282],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[2001],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[2000],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":664,"type":"ReFluxPatcher","pos":[1064.7325439453125,-863.0516967773438],"size":[210,82],"flags":{},"order":11,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":679,"type":"Reroute","pos":[1300,-610],"size":[75,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"","type":"*","link":2000}],"outputs":[{"name":"","type":"STYLE_MODEL","links":[1999,2007,2046]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":662,"type":"CLIPTextEncode","pos":[-140.3179168701172,-670.337158203125],"size":[210,88],"flags":{"collapsed":false},"order":19,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2005,2006,2045],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":39,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":683,"type":"CLIPVisionEncode","pos":[-170,-220],"size":[253.60000610351562,78],"flags":{},"order":22,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2009},{"name":"image","localized_name":"image","type":"IMAGE","link":2062}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2008]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":682,"type":"StyleModelApply","pos":[100,-250],"size":[262,122],"flags":{},"order":27,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2006},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2007},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2008}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2020],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":596,"type":"ClownRegionalConditioning","pos":[425.9762268066406,-243.12513732910156],"size":[211.60000610351562,122],"flags":{},"order":34,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":2050},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2042}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1937],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":706,"type":"CLIPVisionEncode","pos":[-180,180],"size":[253.60000610351562,78],"flags":{},"order":23,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2043},{"name":"image","localized_name":"image","type":"IMAGE","link":2061}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2047]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":707,"type":"StyleModelApply","pos":[90,150],"size":[262,122],"flags":{},"order":28,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2045},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2046},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2047}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2048],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":708,"type":"ClownRegionalConditioning","pos":[420,160],"size":[211.60000610351562,122],"flags":{},"order":32,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":null},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2048},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2057}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[2050],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":665,"type":"MaskComposite","pos":[-1100,-450],"size":[210,126],"flags":{},"order":24,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":1991},{"name":"source","localized_name":"source","type":"MASK","link":1995}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1969,2036,2038],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[0,0,"add"]},{"id":670,"type":"MaskPreview","pos":[-840.8076782226562,-235.62042236328125],"size":[210,246],"flags":{},"order":33,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2041}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":700,"type":"MaskFlip+","pos":[-1099.420166015625,-236.15890502929688],"size":[210,58],"flags":{},"order":30,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2038}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2041,2042],"slot_index":0}],"properties":{"Node name for S&R":"MaskFlip+"},"widgets_values":["x"]},{"id":710,"type":"MaskPreview","pos":[-847.5751953125,166.58413696289062],"size":[210,246],"flags":{},"order":31,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":397,"type":"VAEDecode","pos":[1403.6392822265625,-371.9699401855469],"size":[210,46],"flags":{},"order":38,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2056},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":37,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1735},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1732},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2056],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","bong_tangent",20,-1,1,1,109,"fixed","standard",true]},{"id":7,"type":"VAEEncodeAdvanced","pos":[696.7778930664062,-164.97328186035156],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":20,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":694,"type":"LoadImage","pos":[-536.0714111328125,-640.6544189453125],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2028],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image Apr 29, 2025, 08_07_01 PM.png","image"]},{"id":666,"type":"SolidMask","pos":[-1500,-450],"size":[210,106],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1990],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1536,512]},{"id":712,"type":"Note","pos":[-1511.985107421875,-66.87181854248047],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":668,"type":"SolidMask","pos":[-1502.6644287109375,-289.3330993652344],"size":[210,106],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1995],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,512,512]},{"id":690,"type":"LoadImage","pos":[-549.7396240234375,-227.43971252441406],"size":[315,314],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2062],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00464_.png","image"]},{"id":705,"type":"LoadImage","pos":[-551.003662109375,157.5296173095703],"size":[315,314],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2061],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00479_.png","image"]},{"id":560,"type":"ClownRegionalConditionings","pos":[676.1644897460938,-499.31219482421875],"size":[278.4758605957031,266],"flags":{},"order":36,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1938},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[1735],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditionings"},"widgets_values":[0.5,1,10,"beta57",0,20,"boolean",false]},{"id":704,"type":"Note","pos":[324.8023986816406,-781.4505004882812],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":715,"type":"SolidMask","pos":[-1486.6612548828125,192.47415161132812],"size":[210,106],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2063],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1280,512]},{"id":709,"type":"MaskComposite","pos":[-1104.1712646484375,170.6186981201172],"size":[210,126],"flags":{},"order":25,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2051},{"name":"source","localized_name":"source","type":"MASK","link":2063}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2057],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[128,0,"add"]},{"id":703,"type":"Note","pos":[384.9622802734375,346.1895751953125],"size":[278.04071044921875,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1706,490,0,541,0,"CLIP"],[1732,541,0,401,2,"CONDITIONING"],[1735,560,0,401,1,"CONDITIONING"],[1937,596,0,661,0,"COND_REGIONS"],[1938,661,0,560,0,"COND_REGIONS"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[1969,665,0,667,0,"MASK"],[1990,666,0,676,0,"MASK"],[1991,676,0,665,0,"MASK"],[1995,668,0,665,1,"MASK"],[1999,679,0,678,1,"STYLE_MODEL"],[2000,663,4,679,0,"*"],[2001,663,3,680,0,"*"],[2002,678,0,661,1,"CONDITIONING"],[2003,681,0,678,2,"CLIP_VISION_OUTPUT"],[2004,680,0,681,0,"CLIP_VISION"],[2005,662,0,678,0,"CONDITIONING"],[2006,662,0,682,0,"CONDITIONING"],[2007,679,0,682,1,"STYLE_MODEL"],[2008,683,0,682,2,"CLIP_VISION_OUTPUT"],[2009,680,0,683,0,"CLIP_VISION"],[2020,682,0,596,1,"CONDITIONING"],[2028,694,0,681,1,"IMAGE"],[2036,665,0,661,2,"MASK"],[2038,665,0,700,0,"MASK"],[2041,700,0,670,0,"MASK"],[2042,700,0,596,2,"MASK"],[2043,680,0,706,0,"CLIP_VISION"],[2045,662,0,707,0,"CONDITIONING"],[2046,679,0,707,1,"STYLE_MODEL"],[2047,706,0,707,2,"CLIP_VISION_OUTPUT"],[2048,707,0,708,1,"CONDITIONING"],[2050,708,0,596,0,"COND_REGIONS"],[2051,676,0,709,0,"MASK"],[2054,709,0,710,0,"MASK"],[2056,401,0,397,0,"LATENT"],[2057,709,0,708,2,"MASK"],[2061,705,0,706,1,"IMAGE"],[2062,690,0,683,1,"IMAGE"],[2063,715,0,709,1,"MASK"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.5863092971715371,"offset":[2841.6279889989714,922.4028503570233]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux regional redux (3 zones).json ================================================ {"last_node_id":714,"last_link_id":2062,"nodes":[{"id":13,"type":"Reroute","pos":[1300,-790],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1300,-750],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1706,1939],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":541,"type":"CLIPTextEncode","pos":[692.1508178710938,183.7528839111328],"size":[265.775390625,113.01970672607422],"flags":{},"order":17,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1706}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1732],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":14,"type":"Reroute","pos":[1300,-710],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":680,"type":"Reroute","pos":[1310,-660],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":2001}],"outputs":[{"name":"","type":"CLIP_VISION","links":[2004,2009,2043]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":678,"type":"StyleModelApply","pos":[101.3630142211914,-560.2020874023438],"size":[262,122],"flags":{},"order":25,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2005},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1999},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2003}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2002],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":681,"type":"CLIPVisionEncode","pos":[-173.92124938964844,-524.1537475585938],"size":[253.60000610351562,78],"flags":{},"order":20,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2004},{"name":"image","localized_name":"image","type":"IMAGE","link":2028}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2003]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":560,"type":"ClownRegionalConditionings","pos":[676.1644897460938,-499.31219482421875],"size":[278.4758605957031,266],"flags":{},"order":35,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1938},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[1735],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditionings"},"widgets_values":[0.5,1,14,"beta57",0,20,"boolean",false]},{"id":676,"type":"InvertMask","pos":[-1270,-450],"size":[140,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1990}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1991,2051],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":667,"type":"MaskPreview","pos":[-840,-570],"size":[210,246],"flags":{},"order":28,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":1969}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":661,"type":"ClownRegionalConditioning","pos":[411.9298095703125,-539.053955078125],"size":[211.60000610351562,122],"flags":{},"order":34,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1937},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2002},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2036}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1938],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":701,"type":"Note","pos":[-1378.6959228515625,-637.0702514648438],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":663,"type":"FluxLoader","pos":[654.6221923828125,-858.3792724609375],"size":[374.41741943359375,282],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[2001],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[2000],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":664,"type":"ReFluxPatcher","pos":[1064.7325439453125,-863.0516967773438],"size":[210,82],"flags":{},"order":10,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":679,"type":"Reroute","pos":[1300,-610],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":2000}],"outputs":[{"name":"","type":"STYLE_MODEL","links":[1999,2007,2046]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":662,"type":"CLIPTextEncode","pos":[-140.3179168701172,-670.337158203125],"size":[210,88],"flags":{"collapsed":false},"order":18,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2005,2006,2045],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[""]},{"id":704,"type":"Note","pos":[324.8023986816406,-781.4505004882812],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step, and end_step."],"color":"#432","bgcolor":"#653"},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":38,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":703,"type":"Note","pos":[-84.50921630859375,-859.7656860351562],"size":[278.04071044921875,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask."],"color":"#432","bgcolor":"#653"},{"id":683,"type":"CLIPVisionEncode","pos":[-170,-220],"size":[253.60000610351562,78],"flags":{},"order":21,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2009},{"name":"image","localized_name":"image","type":"IMAGE","link":2062}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2008]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":682,"type":"StyleModelApply","pos":[100,-250],"size":[262,122],"flags":{},"order":26,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2006},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2007},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2008}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2020],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":596,"type":"ClownRegionalConditioning","pos":[425.9762268066406,-243.12513732910156],"size":[211.60000610351562,122],"flags":{},"order":33,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":2050},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2042}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1937],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":706,"type":"CLIPVisionEncode","pos":[-180,180],"size":[253.60000610351562,78],"flags":{},"order":22,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":2043},{"name":"image","localized_name":"image","type":"IMAGE","link":2061}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[2047]}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":707,"type":"StyleModelApply","pos":[90,150],"size":[262,122],"flags":{},"order":27,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":2045},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":2046},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":2047}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2048],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":708,"type":"ClownRegionalConditioning","pos":[420,160],"size":[211.60000610351562,122],"flags":{},"order":31,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":null},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":2048},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2057}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[2050],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,256]},{"id":665,"type":"MaskComposite","pos":[-1100,-450],"size":[210,126],"flags":{},"order":23,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":1991},{"name":"source","localized_name":"source","type":"MASK","link":1995}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1969,2036,2038],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[0,0,"add"]},{"id":670,"type":"MaskPreview","pos":[-840.8076782226562,-235.62042236328125],"size":[210,246],"flags":{},"order":32,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2041}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":700,"type":"MaskFlip+","pos":[-1099.420166015625,-236.15890502929688],"size":[210,58],"flags":{},"order":29,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2038}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2041,2042],"slot_index":0}],"properties":{"Node name for S&R":"MaskFlip+"},"widgets_values":["x"]},{"id":710,"type":"MaskPreview","pos":[-847.5751953125,166.58413696289062],"size":[210,246],"flags":{},"order":30,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":397,"type":"VAEDecode","pos":[1403.6392822265625,-371.9699401855469],"size":[210,46],"flags":{},"order":37,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2056},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":36,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1735},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1732},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2056],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","bong_tangent",20,-1,1,1,109,"fixed","standard",true]},{"id":7,"type":"VAEEncodeAdvanced","pos":[696.7778930664062,-164.97328186035156],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":19,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":694,"type":"LoadImage","pos":[-536.0714111328125,-640.6544189453125],"size":[315,314],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2028],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image Apr 29, 2025, 08_07_01 PM.png","image"]},{"id":666,"type":"SolidMask","pos":[-1500,-450],"size":[210,106],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1990],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1536,512]},{"id":712,"type":"Note","pos":[-1511.985107421875,-66.87181854248047],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":709,"type":"MaskComposite","pos":[-1104.1712646484375,170.6186981201172],"size":[210,126],"flags":{},"order":24,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2051},{"name":"source","localized_name":"source","type":"MASK","link":2060}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2057],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[512,0,"add"]},{"id":668,"type":"SolidMask","pos":[-1502.6644287109375,-289.3330993652344],"size":[210,106],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1995,2060],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,512,512]},{"id":690,"type":"LoadImage","pos":[-549.7396240234375,-227.43971252441406],"size":[315,314],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2062],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00464_.png","image"]},{"id":705,"type":"LoadImage","pos":[-551.003662109375,157.5296173095703],"size":[315,314],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2061],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00479_.png","image"]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1706,490,0,541,0,"CLIP"],[1732,541,0,401,2,"CONDITIONING"],[1735,560,0,401,1,"CONDITIONING"],[1937,596,0,661,0,"COND_REGIONS"],[1938,661,0,560,0,"COND_REGIONS"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[1969,665,0,667,0,"MASK"],[1990,666,0,676,0,"MASK"],[1991,676,0,665,0,"MASK"],[1995,668,0,665,1,"MASK"],[1999,679,0,678,1,"STYLE_MODEL"],[2000,663,4,679,0,"*"],[2001,663,3,680,0,"*"],[2002,678,0,661,1,"CONDITIONING"],[2003,681,0,678,2,"CLIP_VISION_OUTPUT"],[2004,680,0,681,0,"CLIP_VISION"],[2005,662,0,678,0,"CONDITIONING"],[2006,662,0,682,0,"CONDITIONING"],[2007,679,0,682,1,"STYLE_MODEL"],[2008,683,0,682,2,"CLIP_VISION_OUTPUT"],[2009,680,0,683,0,"CLIP_VISION"],[2020,682,0,596,1,"CONDITIONING"],[2028,694,0,681,1,"IMAGE"],[2036,665,0,661,2,"MASK"],[2038,665,0,700,0,"MASK"],[2041,700,0,670,0,"MASK"],[2042,700,0,596,2,"MASK"],[2043,680,0,706,0,"CLIP_VISION"],[2045,662,0,707,0,"CONDITIONING"],[2046,679,0,707,1,"STYLE_MODEL"],[2047,706,0,707,2,"CLIP_VISION_OUTPUT"],[2048,707,0,708,1,"CONDITIONING"],[2050,708,0,596,0,"COND_REGIONS"],[2051,676,0,709,0,"MASK"],[2054,709,0,710,0,"MASK"],[2056,401,0,397,0,"LATENT"],[2057,709,0,708,2,"MASK"],[2060,668,0,709,1,"MASK"],[2061,705,0,706,1,"IMAGE"],[2062,690,0,683,1,"IMAGE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.586309297171537,"offset":[2736.1731738476205,939.9577246808323]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux style antiblur.json ================================================ {"last_node_id":739,"last_link_id":2113,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"","type":"*","link":1964}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":5,"mode":0,"inputs":[{"name":"","type":"*","link":1965}],"outputs":[{"name":"","type":"CLIP","links":[1939],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":6,"mode":0,"inputs":[{"name":"","type":"*","link":1966}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":13,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":663,"type":"FluxLoader","pos":[630,-720],"size":[374.41741943359375,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1963],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1965],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1966],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[],"slot_index":4}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","fp8_e4m3fn_fast",".use_ckpt_clip",".none",".use_ckpt_vae",".none",".none"]},{"id":664,"type":"ReFluxPatcher","pos":[1040,-720],"size":[210,82],"flags":{},"order":4,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1963}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1964],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":12,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":7,"type":"VAEEncodeAdvanced","pos":[412.2475280761719,-199.0681915283203],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":9,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2113},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2100],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":662,"type":"CLIPTextEncode","pos":[761.3005981445312,-357.2689208984375],"size":[210,102.54972839355469],"flags":{"collapsed":false},"order":8,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2098],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat, a college campus, brick buildings"]},{"id":727,"type":"Note","pos":[412.8926086425781,-351.8606872558594],"size":[272.4425048828125,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This approach can be combined with the regional conditioning anti-blur approach for an even more powerful effect."],"color":"#432","bgcolor":"#653"},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":11,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2098},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2099},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,1,7,"fixed","standard",true]},{"id":724,"type":"ClownGuide_Style_Beta","pos":[703.7374267578125,-198.63233947753906],"size":[262.8634033203125,286],"flags":{},"order":10,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2100},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2099],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,10,false]},{"id":739,"type":"LoadImage","pos":[70.82455444335938,-201.66342163085938],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2113],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (655).png","image"]},{"id":726,"type":"Note","pos":[415.7740478515625,153.59271240234375],"size":[364.5906677246094,164.38613891601562],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The best style guide images will share the lighting and color composition of your desired scene. Some are just inexplicably ineffective at killing blur. Just gather up a bunch of images to try, you'll find some good ones that can be reused for many things. I'm including the one used here in the example_workflows directory, be sure to check for it.\n\nAnd don't forget to change seeds. Don't optimize for one seed only. Don't get stuck on one seed! Sometimes one is just not going to work out for whatever you're doing."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1963,663,0,664,0,"MODEL"],[1964,664,0,13,0,"*"],[1965,663,1,490,0,"*"],[1966,663,2,14,0,"*"],[1967,13,0,401,0,"MODEL"],[2096,401,0,397,0,"LATENT"],[2098,662,0,401,1,"CONDITIONING"],[2099,724,0,401,5,"GUIDES"],[2100,7,0,724,0,"LATENT"],[2113,739,0,7,0,"IMAGE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.91943424957756,"offset":[1140.4413839969193,798.117449447068]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux style transfer gguf.json ================================================ {"last_node_id":1392,"last_link_id":3739,"nodes":[{"id":13,"type":"Reroute","pos":[13508.9013671875,-109.2831802368164],"size":[75,26],"flags":{},"order":20,"mode":0,"inputs":[{"name":"","type":"*","link":3737}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[13508.9013671875,-29.283178329467773],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":3739}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[13508.9013671875,-69.28317260742188],"size":[75,26],"flags":{},"order":17,"mode":0,"inputs":[{"name":"","type":"*","link":3738}],"outputs":[{"name":"","type":"CLIP","links":[2881,3581],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[14108.255859375,675.60693359375],"size":[246.31312561035156,286],"flags":{},"order":29,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3709},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3699}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3604],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[13218.9013671875,-309.28314208984375],"size":[260.3999938964844,126],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2692],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":970,"type":"CLIPTextEncode","pos":[13688.255859375,165.60690307617188],"size":[281.9206848144531,109.87118530273438],"flags":{},"order":21,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3627],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, jpeg artifacts, low quality, bad quality, unsharp"]},{"id":1378,"type":"Reroute","pos":[13184.07421875,533.128662109375],"size":[75,26],"flags":{},"order":19,"mode":0,"inputs":[{"name":"","type":"*","link":3721}],"outputs":[{"name":"","type":"IMAGE","links":[3724,3729],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1379,"type":"Reroute","pos":[13185.853515625,168.15780639648438],"size":[75,26],"flags":{},"order":18,"mode":0,"inputs":[{"name":"","type":"*","link":3725}],"outputs":[{"name":"","type":"IMAGE","links":[3726],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[15220,-259.5838928222656],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":34,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13400,560],"size":[261.2217712402344,298],"flags":{"collapsed":true},"order":26,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3688},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3727},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18},{"name":"width","type":"INT","pos":[10,160.00003051757812],"widget":{"name":"width"},"link":3732},{"name":"height","type":"INT","pos":[10,184.00003051757812],"widget":{"name":"height"},"link":3733}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983,3710],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3709],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1371,"type":"Image Repeat Tile To Size","pos":[13390,500],"size":[210,146],"flags":{"collapsed":true},"order":23,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3726},{"name":"width","type":"INT","pos":[10,36],"widget":{"name":"width"},"link":3730},{"name":"height","type":"INT","pos":[10,60],"widget":{"name":"height"},"link":3731}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3727,3728],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":1380,"type":"SetImageSize","pos":[13380,320],"size":[210,102],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[3730,3732],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[3731,3733],"slot_index":1}],"properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1344,768]},{"id":1377,"type":"Image Comparer (rgthree)","pos":[15742.4619140625,-253.3526153564453],"size":[461.9190368652344,413.5953369140625],"flags":{},"order":35,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":3720},{"name":"image_b","type":"IMAGE","dir":3,"link":3729}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_clqis_00009_.png&type=temp&subfolder=&rand=0.8606788093916207"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_clqis_00010_.png&type=temp&subfolder=&rand=0.7775594190958295"}]]},{"id":908,"type":"VAEDecode","pos":[15217.7802734375,-312.1965637207031],"size":[210,46],"flags":{"collapsed":true},"order":33,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697,3720],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1383,"type":"Note","pos":[14428.40234375,580.1749877929688],"size":[261.9539489746094,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Samplers like res_2s in this cycling node will also work and are faster. res_2m and res_3m are even faster, but sometimes the effect takes longer in wall time to fully kick in."],"color":"#432","bgcolor":"#653"},{"id":1384,"type":"Note","pos":[14793.0322265625,518.4120483398438],"size":[261.9539489746094,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_2m or res_3m can be used here instead and are faster, but are less likely to fully clean up lingering artifacts."],"color":"#432","bgcolor":"#653"},{"id":1385,"type":"Note","pos":[14398.345703125,768.2096557617188],"size":[261.9539489746094,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["method = AdaIN is faster and uses less memory, but is less accurate. Some prefer the effect."],"color":"#432","bgcolor":"#653"},{"id":1328,"type":"ClownOptions_SDE_Beta","pos":[14186.4755859375,-132.6126251220703],"size":[315,266],"flags":{"collapsed":true},"order":4,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3707],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1381,"type":"Note","pos":[13881.6279296875,-217.62835693359375],"size":[261.9539489746094,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease \"steps_to_run\" in ClownsharKSampler to change the effective denoise level."],"color":"#432","bgcolor":"#653"},{"id":1382,"type":"Note","pos":[14718.0498046875,-295.4144592285156],"size":[268.1851806640625,124.49711608886719],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increasing cycles will increase the amount of change, but take longer.\n\nCycles will rerun the same step over and over, forwards and backwards, iteratively refining an image at a controlled noise level."],"color":"#432","bgcolor":"#653"},{"id":1387,"type":"ReFluxPatcher","pos":[13262.294921875,-130.79653930664062],"size":[210,82],"flags":{},"order":15,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3736}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3737],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float64",true]},{"id":1386,"type":"UnetLoaderGGUF","pos":[12817.208984375,-323.9640808105469],"size":[315,58],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[3736],"slot_index":0}],"properties":{"Node name for S&R":"UnetLoaderGGUF"},"widgets_values":["flux1-dev-Q4_K_S.gguf"]},{"id":1389,"type":"VAELoader","pos":[12824.330078125,-56.021827697753906],"size":[315,58],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[3739],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["ae.sft"]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[14378.255859375,-64.39308166503906],"size":[340.20001220703125,570],"flags":{},"order":31,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3626},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3627},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3578},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3604},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":3707},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3698],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",1,1,"resample",true]},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14758.255859375,-64.39308166503906],"size":[340.20001220703125,510],"flags":{},"order":32,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3698},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,1,"resample",true]},{"id":1388,"type":"DualCLIPLoaderGGUF","pos":[12819.8798828125,-213.58253479003906],"size":[315,106],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[3738],"slot_index":0}],"properties":{"Node name for S&R":"DualCLIPLoaderGGUF"},"widgets_values":["clip_l_flux.safetensors","t5xxl_fp16.safetensors","flux"]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[14008.255859375,-64.39308166503906],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":30,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2692},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3602},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3708},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3578],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","beta57",20,14,1,1,201,"fixed","unsample",true]},{"id":1333,"type":"CLIPTextEncode","pos":[13688.255859375,-44.393089294433594],"size":[280.6252746582031,164.06936645507812],"flags":{"collapsed":false},"order":22,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3581}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3602,3626],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["black and white anime cartoon of the inside of a car driving down a creepy road"]},{"id":1374,"type":"LoadImage","pos":[12805.896484375,167.56053161621094],"size":[315,314],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3725],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Style Guide)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14651_.png","image"]},{"id":1373,"type":"LoadImage","pos":[12810.2314453125,534.0346069335938],"size":[315,314],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3721],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Composition)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (476).png","image"]},{"id":1362,"type":"PreviewImage","pos":[13380,620],"size":[210,246],"flags":{},"order":25,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3682}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1390,"type":"Note","pos":[13148.0439453125,257.643310546875],"size":[210,88],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Color Match SOMETIMES helps accelerate style transfer.\n"],"color":"#432","bgcolor":"#653"},{"id":1318,"type":"ClownGuide_Beta","pos":[13828.255859375,675.60693359375],"size":[263.102783203125,290],"flags":{},"order":27,"mode":4,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3710},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3699,3708],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.7,1,"constant",0,-1,false]},{"id":1376,"type":"Note","pos":[13710.3271484375,473.56817626953125],"size":[265.1909484863281,137.36415100097656],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease weight in ClownGuide to alter adherence to the input image.\n\nFor now, set to low weights or bypass if using any model except HiDream. The HiDream code was adapted so that this composition guide doesn't fight the style guide. Others will be added soon."],"color":"#432","bgcolor":"#653"},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[14418.0478515625,-325.06365966796875],"size":[265.2884826660156,202],"flags":{},"order":14,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,-1,"none",-1,1,false]},{"id":1350,"type":"ColorMatch","pos":[13380,160],"size":[210,102],"flags":{"collapsed":false},"order":24,"mode":0,"inputs":[{"name":"image_ref","localized_name":"image_ref","type":"IMAGE","link":3728},{"name":"image_target","localized_name":"image_target","type":"IMAGE","link":3724}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3682,3688],"slot_index":0}],"properties":{"Node name for S&R":"ColorMatch"},"widgets_values":["mkl",0]}],"links":[[18,14,0,7,4,"VAE"],[1395,13,0,431,0,"MODEL"],[1398,7,3,431,1,"LATENT"],[2692,431,0,907,0,"MODEL"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2983,7,0,907,3,"LATENT"],[3469,981,0,908,0,"LATENT"],[3533,1317,0,980,6,"OPTIONS"],[3578,907,0,980,4,"LATENT"],[3581,490,0,1333,0,"CLIP"],[3602,1333,0,907,1,"CONDITIONING"],[3604,1308,0,980,5,"GUIDES"],[3626,1333,0,980,1,"CONDITIONING"],[3627,970,0,980,2,"CONDITIONING"],[3682,1350,0,1362,0,"IMAGE"],[3688,1350,0,7,0,"IMAGE"],[3698,980,0,981,4,"LATENT"],[3699,1318,0,1308,3,"GUIDES"],[3707,1328,0,980,7,"OPTIONS"],[3708,1318,0,907,5,"GUIDES"],[3709,7,1,1308,0,"LATENT"],[3710,7,0,1318,0,"LATENT"],[3720,908,0,1377,0,"IMAGE"],[3721,1373,0,1378,0,"*"],[3724,1378,0,1350,1,"IMAGE"],[3725,1374,0,1379,0,"*"],[3726,1379,0,1371,0,"IMAGE"],[3727,1371,0,7,1,"IMAGE"],[3728,1371,0,1350,0,"IMAGE"],[3729,1378,0,1377,1,"IMAGE"],[3730,1380,0,1371,1,"INT"],[3731,1380,1,1371,2,"INT"],[3732,1380,0,7,5,"INT"],[3733,1380,1,7,6,"INT"],[3736,1386,0,1387,0,"MODEL"],[3737,1387,0,13,0,"*"],[3738,1388,0,490,0,"*"],[3739,1389,0,14,0,"*"]],"groups":[{"id":1,"title":"Model Loaders","bounding":[12796.72265625,-401.9004211425781,822.762451171875,436.0693359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Sampling","bounding":[13652.6533203125,-402.70721435546875,1470.8076171875,1409.0289306640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Input Prep","bounding":[12797.1396484375,77.69412231445312,817.4218139648438,820.6239624023438],"color":"#3f789e","font_size":24,"flags":{}},{"id":4,"title":"Save and Compare","bounding":[15180.705078125,-399.09112548828125,1050.6468505859375,615.8845825195312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.4379222522564015,"offset":[-11124.689104031433,546.0824398349012]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/flux upscale thumbnail large multistage.json ================================================ {"last_node_id":431,"last_link_id":1176,"nodes":[{"id":361,"type":"CLIPVisionEncode","pos":[860,820],"size":[253.60000610351562,78],"flags":{},"order":17,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":1004},{"name":"image","localized_name":"image","type":"IMAGE","link":1107}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[1006],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":364,"type":"CLIPTextEncode","pos":[899.5093383789062,952.8309936523438],"size":[210,88],"flags":{},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1007}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1008,1055],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":369,"type":"ClownGuide_Style_Beta","pos":[1138.06640625,1574.328857421875],"size":[231.30213928222656,286],"flags":{},"order":25,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1101},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1099],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":374,"type":"ClownsharkChainsampler_Beta","pos":[2403.98583984375,1081.333740234375],"size":[274.9878234863281,528.6721801757812],"flags":{},"order":30,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1134},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1097},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1088],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,1,"resample",true]},{"id":372,"type":"SaveImage","pos":[2740,1080],"size":[442.38494873046875,530.0809936523438],"flags":{},"order":32,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1030}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":355,"type":"ModelSamplingAdvancedResolution","pos":[1134.0809326171875,1057.9874267578125],"size":[260.3999938964844,126],"flags":{},"order":24,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1047},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1111}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1024],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":368,"type":"ReFluxPatcher","pos":[897.4150390625,1095.9840087890625],"size":[210,82],"flags":{},"order":12,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1022}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1047],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":349,"type":"FluxLoader","pos":[554.6767578125,1099.277099609375],"size":[315,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1022,1144],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1007,1137],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1029,1038,1058,1155,1164,1168],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[1004,1135],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[1009,1172]}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":373,"type":"ClownsharkChainsampler_Beta","pos":[1740,1080],"size":[272.9876403808594,526.665771484375],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1118},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1031},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1099},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1044},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1053],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":370,"type":"ClownsharKSampler_Beta","pos":[1417.3414306640625,1078.0023193359375],"size":[277.65570068359375,627.99951171875],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1024},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1117},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1102},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1031],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,14,1,1,0,"fixed","unsample",true]},{"id":380,"type":"ClownsharkChainsampler_Beta","pos":[2078.66015625,1080.6669921875],"size":[263.6514892578125,527.99951171875],"flags":{},"order":29,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1053},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1051},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1097],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":403,"type":"Note","pos":[2098.053466796875,680.7237548828125],"size":[215.7804412841797,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Raise cycles here if you see halos. It doesn't hurt to go as high as 20. (About 20 seconds on a 4090 at 1024x1024)."],"color":"#432","bgcolor":"#653"},{"id":402,"type":"Note","pos":[1755.3779296875,678.1484985351562],"size":[241.524658203125,132.7487030029297],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Lower cycles here if you see halos.\n\nThese step(s)/cycle(s) (that use the ClownGuide Style node) are needed to prevent blurring when upscaling tiny thumbnail images."],"color":"#432","bgcolor":"#653"},{"id":382,"type":"ControlNetApplyAdvanced","pos":[1440,830],"size":[210,186],"flags":{},"order":23,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":1108},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":1055},{"name":"control_net","localized_name":"control_net","type":"CONTROL_NET","link":1056},{"name":"image","localized_name":"image","type":"IMAGE","link":1112},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1058}],"outputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","links":[1118],"slot_index":0},{"name":"negative","localized_name":"negative","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ControlNetApplyAdvanced"},"widgets_values":[1,0,1]},{"id":404,"type":"Image Repeat Tile To Size","pos":[899.620361328125,1259.9044189453125],"size":[210,106],"flags":{},"order":18,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1123}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[1124],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,false]},{"id":375,"type":"VAEEncodeAdvanced","pos":[1140,1240],"size":[228.90342712402344,278],"flags":{},"order":21,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1113},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":1124},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1038}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1102,1111],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[1101],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":null,"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":401,"type":"LoadImage","pos":[608.10400390625,1453.0382080078125],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1122],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (579).png","image"]},{"id":359,"type":"ControlNetLoader","pos":[596.1650390625,977.5371704101562],"size":[270.0880432128906,58],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"CONTROL_NET","localized_name":"CONTROL_NET","type":"CONTROL_NET","links":[1056,1162],"slot_index":0}],"properties":{"Node name for S&R":"ControlNetLoader"},"widgets_values":["flux_tile.safetensors"]},{"id":362,"type":"StyleModelApply","pos":[1138.0474853515625,827.8412475585938],"size":[270.06890869140625,122],"flags":{},"order":20,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":1008},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1009},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":1006}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1108,1117,1134],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":408,"type":"CLIPVisionEncode","pos":[3300,810],"size":[253.60000610351562,78],"flags":{},"order":19,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":1135},{"name":"image","localized_name":"image","type":"IMAGE","link":1176}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[1173],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":409,"type":"CLIPTextEncode","pos":[3340,940],"size":[210,88],"flags":{},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1137}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1161,1171],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":410,"type":"ClownGuide_Style_Beta","pos":[3570,1560],"size":[231.30213928222656,286],"flags":{},"order":38,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1138},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1147],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":411,"type":"ClownsharkChainsampler_Beta","pos":[4840,1070],"size":[274.9878234863281,528.6721801757812],"flags":{},"order":42,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1139},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1140},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1154],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,1,"resample",true]},{"id":412,"type":"SaveImage","pos":[5180,1070],"size":[442.38494873046875,530.0809936523438],"flags":{},"order":44,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1141}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":413,"type":"ModelSamplingAdvancedResolution","pos":[3570,1050],"size":[260.3999938964844,126],"flags":{},"order":37,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1142},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1143}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1149],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":414,"type":"ReFluxPatcher","pos":[3330,1080],"size":[210,82],"flags":{},"order":13,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1144}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1142],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":415,"type":"ClownsharkChainsampler_Beta","pos":[4180,1070],"size":[272.9876403808594,526.665771484375],"flags":{},"order":40,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1145},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1146},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1147},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1148},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1152],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":417,"type":"ClownsharkChainsampler_Beta","pos":[4510,1070],"size":[263.6514892578125,527.99951171875],"flags":{},"order":41,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1152},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1153},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1140],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":418,"type":"Note","pos":[4530,670],"size":[215.7804412841797,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Raise cycles here if you see halos. It doesn't hurt to go as high as 20. (About 20 seconds on a 4090 at 1024x1024)."],"color":"#432","bgcolor":"#653"},{"id":419,"type":"Note","pos":[4190,670],"size":[241.524658203125,132.7487030029297],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Lower cycles here if you see halos.\n\nThese step(s)/cycle(s) (that use the ClownGuide Style node) are needed to prevent blurring when upscaling tiny thumbnail images."],"color":"#432","bgcolor":"#653"},{"id":420,"type":"VAEDecode","pos":[5180,960],"size":[140,46],"flags":{},"order":43,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1154},{"name":"vae","localized_name":"vae","type":"VAE","link":1155}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1141,1169],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.26","widget_ue_connectable":{}},"widgets_values":[]},{"id":421,"type":"Reroute","pos":[3470,1450],"size":[75,26],"flags":{},"order":34,"mode":0,"inputs":[{"name":"","type":"*","link":1174}],"outputs":[{"name":"","type":"IMAGE","links":[1165,1166,1170]}],"properties":{"showOutputText":false,"horizontal":false}},{"id":425,"type":"ControlNetApplyAdvanced","pos":[3880,820],"size":[210,186],"flags":{},"order":26,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":1160},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":1161},{"name":"control_net","localized_name":"control_net","type":"CONTROL_NET","link":1162},{"name":"image","localized_name":"image","type":"IMAGE","link":1175},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1164}],"outputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","links":[1145],"slot_index":0},{"name":"negative","localized_name":"negative","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ControlNetApplyAdvanced"},"widgets_values":[1,0,1]},{"id":429,"type":"Image Comparer (rgthree)","pos":[5170,1650],"size":[446.2193603515625,494.8704528808594],"flags":{},"order":45,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1169},{"name":"image_b","type":"IMAGE","dir":3,"link":1170}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_txgkm_00005_.png&type=temp&subfolder=&rand=0.44944358112719196"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_txgkm_00006_.png&type=temp&subfolder=&rand=0.15903319456700227"}]]},{"id":430,"type":"StyleModelApply","pos":[3570,820],"size":[270.06890869140625,122],"flags":{},"order":22,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":1171},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1172},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":1173}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1139,1150,1160],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":387,"type":"Image Comparer (rgthree)","pos":[2732.6875,1661.954833984375],"size":[446.2193603515625,494.8704528808594],"flags":{},"order":33,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1068},{"name":"image_b","type":"IMAGE","dir":3,"link":1115}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lvxiv_00017_.png&type=temp&subfolder=&rand=0.23193425033461956"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lvxiv_00018_.png&type=temp&subfolder=&rand=0.4600603671403143"}]]},{"id":416,"type":"ClownsharKSampler_Beta","pos":[3850,1070],"size":[277.65570068359375,627.99951171875],"flags":{},"order":39,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1149},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1150},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1151},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1146],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,14,1,1,0,"fixed","unsample",true]},{"id":427,"type":"Image Repeat Tile To Size","pos":[3340,1250],"size":[210,106],"flags":{},"order":35,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1165}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[1167],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1536,1536,false]},{"id":428,"type":"VAEEncodeAdvanced","pos":[3580,1230],"size":[228.90342712402344,278],"flags":{},"order":36,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1166},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":1167},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1168}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1143,1151],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[1138],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":null,"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1536,1536,"red",false,"16_channels"]},{"id":371,"type":"VAEDecode","pos":[2741.197265625,974.4011840820312],"size":[140,46],"flags":{},"order":31,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1088},{"name":"vae","localized_name":"vae","type":"VAE","link":1029}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1030,1068,1174],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.26","widget_ue_connectable":{}},"widgets_values":[]},{"id":378,"type":"ClownOptions_Cycles_Beta","pos":[1768.675537109375,881.3336791992188],"size":[210,130],"flags":{},"order":7,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1044]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":381,"type":"ClownOptions_Cycles_Beta","pos":[2103.203857421875,881.467041015625],"size":[210,130],"flags":{},"order":8,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1051]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":426,"type":"ClownOptions_Cycles_Beta","pos":[4200,870],"size":[210,130],"flags":{},"order":9,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1148]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":424,"type":"ClownOptions_Cycles_Beta","pos":[4540,870],"size":[210,130],"flags":{},"order":10,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1153]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":398,"type":"Reroute","pos":[1034.667724609375,1458.654541015625],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":1122}],"outputs":[{"name":"","type":"IMAGE","links":[1107,1112,1113,1115,1123,1175,1176],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":431,"type":"Note","pos":[356.2033386230469,1583.169677734375],"size":[210,88],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Used a 384x384 image.\n\nAny size will work."],"color":"#432","bgcolor":"#653"}],"links":[[141,151,0,8,1,"VAE"],[142,151,0,72,1,"VAE"],[143,151,0,35,1,"VAE"],[144,151,0,154,7,"VAE"],[159,151,0,72,1,"VAE"],[160,151,0,157,7,"VAE"],[161,151,0,8,1,"VAE"],[162,151,0,154,7,"VAE"],[163,151,0,72,1,"VAE"],[164,151,0,8,1,"VAE"],[165,151,0,154,7,"VAE"],[171,151,0,8,1,"VAE"],[172,151,0,72,1,"VAE"],[173,151,0,154,7,"VAE"],[174,151,0,157,7,"VAE"],[176,151,0,8,1,"VAE"],[177,151,0,72,1,"VAE"],[178,151,0,154,7,"VAE"],[179,151,0,157,7,"VAE"],[195,151,0,8,1,"VAE"],[196,151,0,72,1,"VAE"],[197,151,0,154,7,"VAE"],[198,151,0,157,7,"VAE"],[199,151,0,160,7,"VAE"],[200,151,0,8,1,"VAE"],[201,151,0,72,1,"VAE"],[202,151,0,154,7,"VAE"],[203,151,0,157,7,"VAE"],[204,151,0,160,7,"VAE"],[217,151,0,8,1,"VAE"],[218,151,0,72,1,"VAE"],[219,151,0,154,7,"VAE"],[220,151,0,157,7,"VAE"],[221,151,0,160,7,"VAE"],[222,151,0,8,1,"VAE"],[223,151,0,72,1,"VAE"],[224,151,0,157,7,"VAE"],[225,151,0,8,1,"VAE"],[226,151,0,72,1,"VAE"],[227,151,0,157,7,"VAE"],[250,151,0,62,1,"VAE"],[251,151,0,157,7,"VAE"],[252,151,0,8,1,"VAE"],[253,151,0,72,1,"VAE"],[254,151,0,62,1,"VAE"],[255,151,0,157,7,"VAE"],[256,151,0,8,1,"VAE"],[257,151,0,72,1,"VAE"],[258,151,0,160,7,"VAE"],[271,151,0,62,1,"VAE"],[272,151,0,157,7,"VAE"],[273,151,0,8,1,"VAE"],[274,151,0,72,1,"VAE"],[275,151,0,160,7,"VAE"],[276,151,0,154,7,"VAE"],[277,151,0,62,1,"VAE"],[278,151,0,157,7,"VAE"],[279,151,0,8,1,"VAE"],[280,151,0,72,1,"VAE"],[281,151,0,160,7,"VAE"],[282,151,0,154,7,"VAE"],[294,151,0,157,7,"VAE"],[295,151,0,72,1,"VAE"],[296,151,0,160,7,"VAE"],[297,151,0,154,7,"VAE"],[298,151,0,8,1,"VAE"],[299,151,0,313,1,"VAE"],[300,151,0,62,1,"VAE"],[301,151,0,157,7,"VAE"],[302,151,0,72,1,"VAE"],[303,151,0,160,7,"VAE"],[304,151,0,8,1,"VAE"],[305,151,0,313,1,"VAE"],[306,151,0,62,1,"VAE"],[307,151,0,154,7,"VAE"],[309,151,0,157,7,"VAE"],[310,151,0,72,1,"VAE"],[311,151,0,160,7,"VAE"],[312,151,0,8,1,"VAE"],[313,151,0,313,1,"VAE"],[314,151,0,62,1,"VAE"],[315,151,0,154,7,"VAE"],[316,151,0,157,7,"VAE"],[317,151,0,72,1,"VAE"],[318,151,0,160,7,"VAE"],[319,151,0,8,1,"VAE"],[320,151,0,313,1,"VAE"],[321,151,0,62,1,"VAE"],[322,151,0,154,7,"VAE"],[327,151,0,157,7,"VAE"],[328,151,0,72,1,"VAE"],[329,151,0,8,1,"VAE"],[330,151,0,313,1,"VAE"],[331,151,0,62,1,"VAE"],[332,151,0,154,7,"VAE"],[333,151,0,160,7,"VAE"],[343,151,0,157,7,"VAE"],[344,151,0,72,1,"VAE"],[345,151,0,8,1,"VAE"],[346,151,0,313,1,"VAE"],[347,151,0,62,1,"VAE"],[348,151,0,160,7,"VAE"],[349,151,0,154,7,"VAE"],[351,151,0,157,7,"VAE"],[352,151,0,72,1,"VAE"],[353,151,0,8,1,"VAE"],[354,151,0,313,1,"VAE"],[355,151,0,62,1,"VAE"],[356,151,0,160,7,"VAE"],[357,151,0,154,7,"VAE"],[363,151,0,157,7,"VAE"],[364,151,0,72,1,"VAE"],[365,151,0,8,1,"VAE"],[366,151,0,160,7,"VAE"],[367,151,0,154,7,"VAE"],[368,151,0,62,1,"VAE"],[370,151,0,157,7,"VAE"],[371,151,0,72,1,"VAE"],[372,151,0,8,1,"VAE"],[373,151,0,160,7,"VAE"],[374,151,0,154,7,"VAE"],[375,151,0,62,1,"VAE"],[377,151,0,157,7,"VAE"],[378,151,0,72,1,"VAE"],[379,151,0,8,1,"VAE"],[380,151,0,160,7,"VAE"],[381,151,0,154,7,"VAE"],[382,151,0,62,1,"VAE"],[383,151,0,157,7,"VAE"],[384,151,0,72,1,"VAE"],[385,151,0,8,1,"VAE"],[386,151,0,160,7,"VAE"],[387,151,0,154,7,"VAE"],[388,151,0,62,1,"VAE"],[391,151,0,157,7,"VAE"],[392,151,0,72,1,"VAE"],[393,151,0,8,1,"VAE"],[394,151,0,160,7,"VAE"],[395,151,0,154,7,"VAE"],[396,151,0,62,1,"VAE"],[402,151,0,157,7,"VAE"],[403,151,0,72,1,"VAE"],[404,151,0,8,1,"VAE"],[405,151,0,160,7,"VAE"],[406,151,0,154,7,"VAE"],[407,151,0,62,1,"VAE"],[408,151,0,157,7,"VAE"],[409,151,0,72,1,"VAE"],[410,151,0,8,1,"VAE"],[411,151,0,160,7,"VAE"],[412,151,0,154,7,"VAE"],[413,151,0,62,1,"VAE"],[421,151,0,157,7,"VAE"],[422,151,0,72,1,"VAE"],[423,151,0,8,1,"VAE"],[424,151,0,160,7,"VAE"],[425,151,0,154,7,"VAE"],[426,151,0,62,1,"VAE"],[427,151,0,157,7,"VAE"],[428,151,0,72,1,"VAE"],[429,151,0,8,1,"VAE"],[430,151,0,160,7,"VAE"],[431,151,0,154,7,"VAE"],[432,151,0,62,1,"VAE"],[1004,349,3,361,0,"CLIP_VISION"],[1006,361,0,362,2,"CLIP_VISION_OUTPUT"],[1007,349,1,364,0,"CLIP"],[1008,364,0,362,0,"CONDITIONING"],[1009,349,4,362,1,"STYLE_MODEL"],[1022,349,0,368,0,"MODEL"],[1024,355,0,370,0,"MODEL"],[1029,349,2,371,1,"VAE"],[1030,371,0,372,0,"IMAGE"],[1031,370,0,373,4,"LATENT"],[1038,349,2,375,4,"VAE"],[1044,378,0,373,6,"OPTIONS"],[1047,368,0,355,0,"MODEL"],[1051,381,0,380,6,"OPTIONS"],[1053,373,0,380,4,"LATENT"],[1055,364,0,382,1,"CONDITIONING"],[1056,359,0,382,2,"CONTROL_NET"],[1058,349,2,382,4,"VAE"],[1068,371,0,387,0,"IMAGE"],[1088,374,0,371,0,"LATENT"],[1097,380,0,374,4,"LATENT"],[1099,369,0,373,5,"GUIDES"],[1101,375,1,369,0,"LATENT"],[1102,375,0,370,3,"LATENT"],[1107,398,0,361,1,"IMAGE"],[1108,362,0,382,0,"CONDITIONING"],[1111,375,0,355,1,"LATENT"],[1112,398,0,382,3,"IMAGE"],[1113,398,0,375,0,"IMAGE"],[1115,398,0,387,1,"IMAGE"],[1117,362,0,370,1,"CONDITIONING"],[1118,382,0,373,1,"CONDITIONING"],[1122,401,0,398,0,"*"],[1123,398,0,404,0,"IMAGE"],[1124,404,0,375,1,"IMAGE"],[1134,362,0,374,1,"CONDITIONING"],[1135,349,3,408,0,"CLIP_VISION"],[1137,349,1,409,0,"CLIP"],[1138,428,1,410,0,"LATENT"],[1139,430,0,411,1,"CONDITIONING"],[1140,417,0,411,4,"LATENT"],[1141,420,0,412,0,"IMAGE"],[1142,414,0,413,0,"MODEL"],[1143,428,0,413,1,"LATENT"],[1144,349,0,414,0,"MODEL"],[1145,425,0,415,1,"CONDITIONING"],[1146,416,0,415,4,"LATENT"],[1147,410,0,415,5,"GUIDES"],[1148,426,0,415,6,"OPTIONS"],[1149,413,0,416,0,"MODEL"],[1150,430,0,416,1,"CONDITIONING"],[1151,428,0,416,3,"LATENT"],[1152,415,0,417,4,"LATENT"],[1153,424,0,417,6,"OPTIONS"],[1154,411,0,420,0,"LATENT"],[1155,349,2,420,1,"VAE"],[1160,430,0,425,0,"CONDITIONING"],[1161,409,0,425,1,"CONDITIONING"],[1162,359,0,425,2,"CONTROL_NET"],[1164,349,2,425,4,"VAE"],[1165,421,0,427,0,"IMAGE"],[1166,421,0,428,0,"IMAGE"],[1167,427,0,428,1,"IMAGE"],[1168,349,2,428,4,"VAE"],[1169,420,0,429,0,"IMAGE"],[1170,421,0,429,1,"IMAGE"],[1171,409,0,430,0,"CONDITIONING"],[1172,349,4,430,1,"STYLE_MODEL"],[1173,408,0,430,2,"CLIP_VISION_OUTPUT"],[1174,371,0,421,0,"*"],[1175,398,0,425,3,"IMAGE"],[1176,398,0,408,1,"IMAGE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.3109994191500252,"offset":[916.9662500305632,-478.4961303433991]},"ue_links":[{"downstream":157,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":154,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":72,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":62,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"}],"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true,"links_added_by_ue":[959,960,961,962],"frontendVersion":"1.18.6"},"version":0.4} ================================================ FILE: example_workflows/flux upscale thumbnail large.json ================================================ {"last_node_id":408,"last_link_id":1127,"nodes":[{"id":369,"type":"ClownGuide_Style_Beta","pos":[1138.06640625,1574.328857421875],"size":[231.30213928222656,286],"flags":{},"order":18,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1101},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1099],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":374,"type":"ClownsharkChainsampler_Beta","pos":[2403.98583984375,1081.333740234375],"size":[274.9878234863281,528.6721801757812],"flags":{},"order":22,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1109},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1097},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1088],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,1,"resample",true]},{"id":372,"type":"SaveImage","pos":[2740,1080],"size":[442.38494873046875,530.0809936523438],"flags":{},"order":24,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1030}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":355,"type":"ModelSamplingAdvancedResolution","pos":[1134.0809326171875,1057.9874267578125],"size":[260.3999938964844,126],"flags":{},"order":17,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1047},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1111}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1024],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":368,"type":"ReFluxPatcher","pos":[897.4150390625,1095.9840087890625],"size":[210,82],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1022}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1047],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":349,"type":"FluxLoader","pos":[554.6767578125,1099.277099609375],"size":[315,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1022],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1007],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1029,1038,1058],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[1004],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[1009]}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":387,"type":"Image Comparer (rgthree)","pos":[3228.67529296875,1082.0006103515625],"size":[502.8477478027344,526.1139526367188],"flags":{},"order":25,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1068},{"name":"image_b","type":"IMAGE","dir":3,"link":1115}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lvxiv_00003_.png&type=temp&subfolder=&rand=0.3715711256758052"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_lvxiv_00004_.png&type=temp&subfolder=&rand=0.9911994449338102"}]]},{"id":373,"type":"ClownsharkChainsampler_Beta","pos":[1740,1080],"size":[272.9876403808594,526.665771484375],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1118},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1031},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1099},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1044},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1053],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":370,"type":"ClownsharKSampler_Beta","pos":[1417.3414306640625,1078.0023193359375],"size":[277.65570068359375,627.99951171875],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1024},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1117},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1102},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1031],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,14,1,1,0,"fixed","unsample",true]},{"id":382,"type":"ControlNetApplyAdvanced","pos":[1440,830],"size":[210,186],"flags":{},"order":16,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":1108},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":1055},{"name":"control_net","localized_name":"control_net","type":"CONTROL_NET","link":1056},{"name":"image","localized_name":"image","type":"IMAGE","link":1112},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1058}],"outputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","links":[1118],"slot_index":0},{"name":"negative","localized_name":"negative","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ControlNetApplyAdvanced"},"widgets_values":[1,0,1]},{"id":380,"type":"ClownsharkChainsampler_Beta","pos":[2078.66015625,1080.6669921875],"size":[263.6514892578125,527.99951171875],"flags":{},"order":21,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1053},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1051},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1097],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":401,"type":"LoadImage","pos":[660.8270874023438,1457.920166015625],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1122],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (579).png","image"]},{"id":371,"type":"VAEDecode","pos":[2741.197265625,974.4011840820312],"size":[140,46],"flags":{},"order":23,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1088},{"name":"vae","localized_name":"vae","type":"VAE","link":1029}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1030,1068],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.26","widget_ue_connectable":{}},"widgets_values":[]},{"id":378,"type":"ClownOptions_Cycles_Beta","pos":[1768.675537109375,881.3336791992188],"size":[210,130],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1044]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":398,"type":"Reroute","pos":[1034.667724609375,1458.654541015625],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":1122}],"outputs":[{"name":"","type":"IMAGE","links":[1107,1112,1113,1115,1123],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":404,"type":"Image Repeat Tile To Size","pos":[899.620361328125,1259.9044189453125],"size":[210,106],"flags":{},"order":13,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1123}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[1124],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1536,1536,false]},{"id":375,"type":"VAEEncodeAdvanced","pos":[1140,1240],"size":[228.90342712402344,278],"flags":{},"order":15,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1113},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":1124},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1038}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1102,1111],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[1101],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":null,"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1536,1536,"red",false,"16_channels"]},{"id":381,"type":"ClownOptions_Cycles_Beta","pos":[2103.203857421875,881.467041015625],"size":[210,130],"flags":{},"order":3,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1051]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[20,1,0.5,1]},{"id":403,"type":"Note","pos":[2098.053466796875,680.7237548828125],"size":[215.7804412841797,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Raise cycles here if you see halos. It doesn't hurt to go as high as 20. Minimum of 5 recommended."],"color":"#432","bgcolor":"#653"},{"id":402,"type":"Note","pos":[1755.3779296875,678.1484985351562],"size":[241.524658203125,132.7487030029297],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Lower cycles here if you see halos. Minimum of 1 or 2 recommended.\n\nThese step(s)/cycle(s) (that use the ClownGuide Style node) are needed to prevent blurring when upscaling tiny thumbnail images."],"color":"#432","bgcolor":"#653"},{"id":359,"type":"ControlNetLoader","pos":[597.9067993164062,977.3353881835938],"size":[270.0880432128906,58],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"CONTROL_NET","localized_name":"CONTROL_NET","type":"CONTROL_NET","links":[1056],"slot_index":0}],"properties":{"Node name for S&R":"ControlNetLoader"},"widgets_values":["flux_tile.safetensors"]},{"id":362,"type":"StyleModelApply","pos":[1141.4669189453125,829.1477661132812],"size":[270.06890869140625,122],"flags":{},"order":14,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":1008},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1009},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":1006}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1108,1109,1117],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":361,"type":"CLIPVisionEncode","pos":[862.2003784179688,825.134765625],"size":[253.60000610351562,78],"flags":{},"order":12,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":1004},{"name":"image","localized_name":"image","type":"IMAGE","link":1107}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[1006],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":364,"type":"CLIPTextEncode","pos":[899.5093383789062,952.8309936523438],"size":[210,88],"flags":{},"order":10,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1007}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1008,1055],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":408,"type":"Note","pos":[583.3265380859375,830.6437377929688],"size":[248.87789916992188,88],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Jasper's tile controlnet was used."],"color":"#432","bgcolor":"#653"},{"id":407,"type":"Note","pos":[424.7425537109375,1579.1385498046875],"size":[210,88],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Input image was 384x384.\n\nAny size can be used."],"color":"#432","bgcolor":"#653"}],"links":[[141,151,0,8,1,"VAE"],[142,151,0,72,1,"VAE"],[143,151,0,35,1,"VAE"],[144,151,0,154,7,"VAE"],[159,151,0,72,1,"VAE"],[160,151,0,157,7,"VAE"],[161,151,0,8,1,"VAE"],[162,151,0,154,7,"VAE"],[163,151,0,72,1,"VAE"],[164,151,0,8,1,"VAE"],[165,151,0,154,7,"VAE"],[171,151,0,8,1,"VAE"],[172,151,0,72,1,"VAE"],[173,151,0,154,7,"VAE"],[174,151,0,157,7,"VAE"],[176,151,0,8,1,"VAE"],[177,151,0,72,1,"VAE"],[178,151,0,154,7,"VAE"],[179,151,0,157,7,"VAE"],[195,151,0,8,1,"VAE"],[196,151,0,72,1,"VAE"],[197,151,0,154,7,"VAE"],[198,151,0,157,7,"VAE"],[199,151,0,160,7,"VAE"],[200,151,0,8,1,"VAE"],[201,151,0,72,1,"VAE"],[202,151,0,154,7,"VAE"],[203,151,0,157,7,"VAE"],[204,151,0,160,7,"VAE"],[217,151,0,8,1,"VAE"],[218,151,0,72,1,"VAE"],[219,151,0,154,7,"VAE"],[220,151,0,157,7,"VAE"],[221,151,0,160,7,"VAE"],[222,151,0,8,1,"VAE"],[223,151,0,72,1,"VAE"],[224,151,0,157,7,"VAE"],[225,151,0,8,1,"VAE"],[226,151,0,72,1,"VAE"],[227,151,0,157,7,"VAE"],[250,151,0,62,1,"VAE"],[251,151,0,157,7,"VAE"],[252,151,0,8,1,"VAE"],[253,151,0,72,1,"VAE"],[254,151,0,62,1,"VAE"],[255,151,0,157,7,"VAE"],[256,151,0,8,1,"VAE"],[257,151,0,72,1,"VAE"],[258,151,0,160,7,"VAE"],[271,151,0,62,1,"VAE"],[272,151,0,157,7,"VAE"],[273,151,0,8,1,"VAE"],[274,151,0,72,1,"VAE"],[275,151,0,160,7,"VAE"],[276,151,0,154,7,"VAE"],[277,151,0,62,1,"VAE"],[278,151,0,157,7,"VAE"],[279,151,0,8,1,"VAE"],[280,151,0,72,1,"VAE"],[281,151,0,160,7,"VAE"],[282,151,0,154,7,"VAE"],[294,151,0,157,7,"VAE"],[295,151,0,72,1,"VAE"],[296,151,0,160,7,"VAE"],[297,151,0,154,7,"VAE"],[298,151,0,8,1,"VAE"],[299,151,0,313,1,"VAE"],[300,151,0,62,1,"VAE"],[301,151,0,157,7,"VAE"],[302,151,0,72,1,"VAE"],[303,151,0,160,7,"VAE"],[304,151,0,8,1,"VAE"],[305,151,0,313,1,"VAE"],[306,151,0,62,1,"VAE"],[307,151,0,154,7,"VAE"],[309,151,0,157,7,"VAE"],[310,151,0,72,1,"VAE"],[311,151,0,160,7,"VAE"],[312,151,0,8,1,"VAE"],[313,151,0,313,1,"VAE"],[314,151,0,62,1,"VAE"],[315,151,0,154,7,"VAE"],[316,151,0,157,7,"VAE"],[317,151,0,72,1,"VAE"],[318,151,0,160,7,"VAE"],[319,151,0,8,1,"VAE"],[320,151,0,313,1,"VAE"],[321,151,0,62,1,"VAE"],[322,151,0,154,7,"VAE"],[327,151,0,157,7,"VAE"],[328,151,0,72,1,"VAE"],[329,151,0,8,1,"VAE"],[330,151,0,313,1,"VAE"],[331,151,0,62,1,"VAE"],[332,151,0,154,7,"VAE"],[333,151,0,160,7,"VAE"],[343,151,0,157,7,"VAE"],[344,151,0,72,1,"VAE"],[345,151,0,8,1,"VAE"],[346,151,0,313,1,"VAE"],[347,151,0,62,1,"VAE"],[348,151,0,160,7,"VAE"],[349,151,0,154,7,"VAE"],[351,151,0,157,7,"VAE"],[352,151,0,72,1,"VAE"],[353,151,0,8,1,"VAE"],[354,151,0,313,1,"VAE"],[355,151,0,62,1,"VAE"],[356,151,0,160,7,"VAE"],[357,151,0,154,7,"VAE"],[363,151,0,157,7,"VAE"],[364,151,0,72,1,"VAE"],[365,151,0,8,1,"VAE"],[366,151,0,160,7,"VAE"],[367,151,0,154,7,"VAE"],[368,151,0,62,1,"VAE"],[370,151,0,157,7,"VAE"],[371,151,0,72,1,"VAE"],[372,151,0,8,1,"VAE"],[373,151,0,160,7,"VAE"],[374,151,0,154,7,"VAE"],[375,151,0,62,1,"VAE"],[377,151,0,157,7,"VAE"],[378,151,0,72,1,"VAE"],[379,151,0,8,1,"VAE"],[380,151,0,160,7,"VAE"],[381,151,0,154,7,"VAE"],[382,151,0,62,1,"VAE"],[383,151,0,157,7,"VAE"],[384,151,0,72,1,"VAE"],[385,151,0,8,1,"VAE"],[386,151,0,160,7,"VAE"],[387,151,0,154,7,"VAE"],[388,151,0,62,1,"VAE"],[391,151,0,157,7,"VAE"],[392,151,0,72,1,"VAE"],[393,151,0,8,1,"VAE"],[394,151,0,160,7,"VAE"],[395,151,0,154,7,"VAE"],[396,151,0,62,1,"VAE"],[402,151,0,157,7,"VAE"],[403,151,0,72,1,"VAE"],[404,151,0,8,1,"VAE"],[405,151,0,160,7,"VAE"],[406,151,0,154,7,"VAE"],[407,151,0,62,1,"VAE"],[408,151,0,157,7,"VAE"],[409,151,0,72,1,"VAE"],[410,151,0,8,1,"VAE"],[411,151,0,160,7,"VAE"],[412,151,0,154,7,"VAE"],[413,151,0,62,1,"VAE"],[421,151,0,157,7,"VAE"],[422,151,0,72,1,"VAE"],[423,151,0,8,1,"VAE"],[424,151,0,160,7,"VAE"],[425,151,0,154,7,"VAE"],[426,151,0,62,1,"VAE"],[427,151,0,157,7,"VAE"],[428,151,0,72,1,"VAE"],[429,151,0,8,1,"VAE"],[430,151,0,160,7,"VAE"],[431,151,0,154,7,"VAE"],[432,151,0,62,1,"VAE"],[1004,349,3,361,0,"CLIP_VISION"],[1006,361,0,362,2,"CLIP_VISION_OUTPUT"],[1007,349,1,364,0,"CLIP"],[1008,364,0,362,0,"CONDITIONING"],[1009,349,4,362,1,"STYLE_MODEL"],[1022,349,0,368,0,"MODEL"],[1024,355,0,370,0,"MODEL"],[1029,349,2,371,1,"VAE"],[1030,371,0,372,0,"IMAGE"],[1031,370,0,373,4,"LATENT"],[1038,349,2,375,4,"VAE"],[1044,378,0,373,6,"OPTIONS"],[1047,368,0,355,0,"MODEL"],[1051,381,0,380,6,"OPTIONS"],[1053,373,0,380,4,"LATENT"],[1055,364,0,382,1,"CONDITIONING"],[1056,359,0,382,2,"CONTROL_NET"],[1058,349,2,382,4,"VAE"],[1068,371,0,387,0,"IMAGE"],[1088,374,0,371,0,"LATENT"],[1097,380,0,374,4,"LATENT"],[1099,369,0,373,5,"GUIDES"],[1101,375,1,369,0,"LATENT"],[1102,375,0,370,3,"LATENT"],[1107,398,0,361,1,"IMAGE"],[1108,362,0,382,0,"CONDITIONING"],[1109,362,0,374,1,"CONDITIONING"],[1111,375,0,355,1,"LATENT"],[1112,398,0,382,3,"IMAGE"],[1113,398,0,375,0,"IMAGE"],[1115,398,0,387,1,"IMAGE"],[1117,362,0,370,1,"CONDITIONING"],[1118,382,0,373,1,"CONDITIONING"],[1122,401,0,398,0,"*"],[1123,398,0,404,0,"IMAGE"],[1124,404,0,375,1,"IMAGE"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.3109994191500252,"offset":[1512.0539235106066,-356.0468640337415]},"ue_links":[{"downstream":157,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":154,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":72,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":62,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"}],"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true,"links_added_by_ue":[959,960,961,962],"frontendVersion":"1.18.6"},"version":0.4} ================================================ FILE: example_workflows/flux upscale thumbnail widescreen.json ================================================ {"last_node_id":411,"last_link_id":1130,"nodes":[{"id":369,"type":"ClownGuide_Style_Beta","pos":[1138.06640625,1574.328857421875],"size":[231.30213928222656,286],"flags":{},"order":18,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":1101},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1099],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":374,"type":"ClownsharkChainsampler_Beta","pos":[2403.98583984375,1081.333740234375],"size":[274.9878234863281,528.6721801757812],"flags":{},"order":22,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1109},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1097},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1088],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,1,"resample",true]},{"id":372,"type":"SaveImage","pos":[2740,1080],"size":[442.38494873046875,530.0809936523438],"flags":{},"order":24,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1030}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":355,"type":"ModelSamplingAdvancedResolution","pos":[1134.0809326171875,1057.9874267578125],"size":[260.3999938964844,126],"flags":{},"order":17,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1047},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1111}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1024],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":368,"type":"ReFluxPatcher","pos":[897.4150390625,1095.9840087890625],"size":[210,82],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1022}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1047],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":349,"type":"FluxLoader","pos":[554.6767578125,1099.277099609375],"size":[315,282],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1022],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1007],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1029,1038,1058],"slot_index":2},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":[1004],"slot_index":3},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":[1009]}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["colossusProjectFlux_v42AIO.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":387,"type":"Image Comparer (rgthree)","pos":[3228.67529296875,1082.0006103515625],"size":[502.8477478027344,526.1139526367188],"flags":{},"order":25,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":1068},{"name":"image_b","type":"IMAGE","dir":3,"link":1115}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_klodp_00033_.png&type=temp&subfolder=&rand=0.5892199958912905"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_klodp_00034_.png&type=temp&subfolder=&rand=0.10900460801823297"}]]},{"id":373,"type":"ClownsharkChainsampler_Beta","pos":[1740,1080],"size":[272.9876403808594,526.665771484375],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1118},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1031},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1099},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1044},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1053],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":382,"type":"ControlNetApplyAdvanced","pos":[1440,830],"size":[210,186],"flags":{},"order":16,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":1108},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":1055},{"name":"control_net","localized_name":"control_net","type":"CONTROL_NET","link":1056},{"name":"image","localized_name":"image","type":"IMAGE","link":1112},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1058}],"outputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","links":[1118],"slot_index":0},{"name":"negative","localized_name":"negative","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ControlNetApplyAdvanced"},"widgets_values":[1,0,1]},{"id":380,"type":"ClownsharkChainsampler_Beta","pos":[2078.66015625,1080.6669921875],"size":[263.6514892578125,527.99951171875],"flags":{},"order":21,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1053},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1051},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1097],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,1,"resample",true]},{"id":371,"type":"VAEDecode","pos":[2741.197265625,974.4011840820312],"size":[140,46],"flags":{},"order":23,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1088},{"name":"vae","localized_name":"vae","type":"VAE","link":1029}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1030,1068],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.26","widget_ue_connectable":{}},"widgets_values":[]},{"id":378,"type":"ClownOptions_Cycles_Beta","pos":[1768.675537109375,881.3336791992188],"size":[210,130],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1044]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1]},{"id":381,"type":"ClownOptions_Cycles_Beta","pos":[2103.203857421875,881.467041015625],"size":[210,130],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1051]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[20,1,0.5,1]},{"id":403,"type":"Note","pos":[2098.053466796875,680.7237548828125],"size":[215.7804412841797,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Raise cycles here if you see halos. It doesn't hurt to go as high as 20. Minimum of 5 recommended."],"color":"#432","bgcolor":"#653"},{"id":402,"type":"Note","pos":[1755.3779296875,678.1484985351562],"size":[241.524658203125,132.7487030029297],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Lower cycles here if you see halos. Minimum of 1 or 2 recommended.\n\nThese step(s)/cycle(s) (that use the ClownGuide Style node) are needed to prevent blurring when upscaling tiny thumbnail images."],"color":"#432","bgcolor":"#653"},{"id":359,"type":"ControlNetLoader","pos":[597.9067993164062,977.3353881835938],"size":[270.0880432128906,58],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"CONTROL_NET","localized_name":"CONTROL_NET","type":"CONTROL_NET","links":[1056],"slot_index":0}],"properties":{"Node name for S&R":"ControlNetLoader"},"widgets_values":["flux_tile.safetensors"]},{"id":362,"type":"StyleModelApply","pos":[1141.4669189453125,829.1477661132812],"size":[270.06890869140625,122],"flags":{},"order":14,"mode":0,"inputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","link":1008},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","link":1009},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","link":1006}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1108,1109,1117],"slot_index":0}],"properties":{"Node name for S&R":"StyleModelApply"},"widgets_values":[1,"multiply"]},{"id":361,"type":"CLIPVisionEncode","pos":[862.2003784179688,825.134765625],"size":[253.60000610351562,78],"flags":{},"order":12,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":1004},{"name":"image","localized_name":"image","type":"IMAGE","link":1107}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[1006],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["center"]},{"id":364,"type":"CLIPTextEncode","pos":[899.5093383789062,952.8309936523438],"size":[210,88],"flags":{},"order":10,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1007}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1008,1055],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":408,"type":"Note","pos":[549.5983276367188,826.2056884765625],"size":[294.1452331542969,99.538818359375],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Jasper's tile controlnet was used.\n\nhttps://huggingface.co/jasperai/Flux.1-dev-Controlnet-Upscaler/blob/main/diffusion_pytorch_model.safetensors"],"color":"#432","bgcolor":"#653"},{"id":370,"type":"ClownsharKSampler_Beta","pos":[1417.3414306640625,1078.0023193359375],"size":[277.65570068359375,627.99951171875],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1024},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1117},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1102},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1031],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,14,1,1,0,"fixed","unsample",true]},{"id":404,"type":"Image Repeat Tile To Size","pos":[899.620361328125,1259.9044189453125],"size":[210,106],"flags":{},"order":13,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1123}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[1124],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1792,1024,true]},{"id":375,"type":"VAEEncodeAdvanced","pos":[1140,1240],"size":[228.90342712402344,278],"flags":{},"order":15,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1113},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":1124},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1038}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1102,1111],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[1101],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":null,"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["image_2",1,1,"red",false,"16_channels"]},{"id":398,"type":"Reroute","pos":[1034.0006103515625,1404.638671875],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":1130}],"outputs":[{"name":"","type":"IMAGE","links":[1107,1112,1113,1115,1123],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":411,"type":"LoadImage","pos":[791.842041015625,1491.6041259765625],"size":[315,314],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1130],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (595).png","image"]},{"id":407,"type":"Note","pos":[552.9491577148438,1493.21923828125],"size":[210.6668243408203,166.69004821777344],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Input image was 672x384.\n\nAny size can be used. Just be sure to keep the aspect ratio the same, per usual.\n\nBest results will be with minimum size = 384 (height and/or width), due to that being what SigCLIP was trained on (which is what Redux uses)."],"color":"#432","bgcolor":"#653"}],"links":[[141,151,0,8,1,"VAE"],[142,151,0,72,1,"VAE"],[143,151,0,35,1,"VAE"],[144,151,0,154,7,"VAE"],[159,151,0,72,1,"VAE"],[160,151,0,157,7,"VAE"],[161,151,0,8,1,"VAE"],[162,151,0,154,7,"VAE"],[163,151,0,72,1,"VAE"],[164,151,0,8,1,"VAE"],[165,151,0,154,7,"VAE"],[171,151,0,8,1,"VAE"],[172,151,0,72,1,"VAE"],[173,151,0,154,7,"VAE"],[174,151,0,157,7,"VAE"],[176,151,0,8,1,"VAE"],[177,151,0,72,1,"VAE"],[178,151,0,154,7,"VAE"],[179,151,0,157,7,"VAE"],[195,151,0,8,1,"VAE"],[196,151,0,72,1,"VAE"],[197,151,0,154,7,"VAE"],[198,151,0,157,7,"VAE"],[199,151,0,160,7,"VAE"],[200,151,0,8,1,"VAE"],[201,151,0,72,1,"VAE"],[202,151,0,154,7,"VAE"],[203,151,0,157,7,"VAE"],[204,151,0,160,7,"VAE"],[217,151,0,8,1,"VAE"],[218,151,0,72,1,"VAE"],[219,151,0,154,7,"VAE"],[220,151,0,157,7,"VAE"],[221,151,0,160,7,"VAE"],[222,151,0,8,1,"VAE"],[223,151,0,72,1,"VAE"],[224,151,0,157,7,"VAE"],[225,151,0,8,1,"VAE"],[226,151,0,72,1,"VAE"],[227,151,0,157,7,"VAE"],[250,151,0,62,1,"VAE"],[251,151,0,157,7,"VAE"],[252,151,0,8,1,"VAE"],[253,151,0,72,1,"VAE"],[254,151,0,62,1,"VAE"],[255,151,0,157,7,"VAE"],[256,151,0,8,1,"VAE"],[257,151,0,72,1,"VAE"],[258,151,0,160,7,"VAE"],[271,151,0,62,1,"VAE"],[272,151,0,157,7,"VAE"],[273,151,0,8,1,"VAE"],[274,151,0,72,1,"VAE"],[275,151,0,160,7,"VAE"],[276,151,0,154,7,"VAE"],[277,151,0,62,1,"VAE"],[278,151,0,157,7,"VAE"],[279,151,0,8,1,"VAE"],[280,151,0,72,1,"VAE"],[281,151,0,160,7,"VAE"],[282,151,0,154,7,"VAE"],[294,151,0,157,7,"VAE"],[295,151,0,72,1,"VAE"],[296,151,0,160,7,"VAE"],[297,151,0,154,7,"VAE"],[298,151,0,8,1,"VAE"],[299,151,0,313,1,"VAE"],[300,151,0,62,1,"VAE"],[301,151,0,157,7,"VAE"],[302,151,0,72,1,"VAE"],[303,151,0,160,7,"VAE"],[304,151,0,8,1,"VAE"],[305,151,0,313,1,"VAE"],[306,151,0,62,1,"VAE"],[307,151,0,154,7,"VAE"],[309,151,0,157,7,"VAE"],[310,151,0,72,1,"VAE"],[311,151,0,160,7,"VAE"],[312,151,0,8,1,"VAE"],[313,151,0,313,1,"VAE"],[314,151,0,62,1,"VAE"],[315,151,0,154,7,"VAE"],[316,151,0,157,7,"VAE"],[317,151,0,72,1,"VAE"],[318,151,0,160,7,"VAE"],[319,151,0,8,1,"VAE"],[320,151,0,313,1,"VAE"],[321,151,0,62,1,"VAE"],[322,151,0,154,7,"VAE"],[327,151,0,157,7,"VAE"],[328,151,0,72,1,"VAE"],[329,151,0,8,1,"VAE"],[330,151,0,313,1,"VAE"],[331,151,0,62,1,"VAE"],[332,151,0,154,7,"VAE"],[333,151,0,160,7,"VAE"],[343,151,0,157,7,"VAE"],[344,151,0,72,1,"VAE"],[345,151,0,8,1,"VAE"],[346,151,0,313,1,"VAE"],[347,151,0,62,1,"VAE"],[348,151,0,160,7,"VAE"],[349,151,0,154,7,"VAE"],[351,151,0,157,7,"VAE"],[352,151,0,72,1,"VAE"],[353,151,0,8,1,"VAE"],[354,151,0,313,1,"VAE"],[355,151,0,62,1,"VAE"],[356,151,0,160,7,"VAE"],[357,151,0,154,7,"VAE"],[363,151,0,157,7,"VAE"],[364,151,0,72,1,"VAE"],[365,151,0,8,1,"VAE"],[366,151,0,160,7,"VAE"],[367,151,0,154,7,"VAE"],[368,151,0,62,1,"VAE"],[370,151,0,157,7,"VAE"],[371,151,0,72,1,"VAE"],[372,151,0,8,1,"VAE"],[373,151,0,160,7,"VAE"],[374,151,0,154,7,"VAE"],[375,151,0,62,1,"VAE"],[377,151,0,157,7,"VAE"],[378,151,0,72,1,"VAE"],[379,151,0,8,1,"VAE"],[380,151,0,160,7,"VAE"],[381,151,0,154,7,"VAE"],[382,151,0,62,1,"VAE"],[383,151,0,157,7,"VAE"],[384,151,0,72,1,"VAE"],[385,151,0,8,1,"VAE"],[386,151,0,160,7,"VAE"],[387,151,0,154,7,"VAE"],[388,151,0,62,1,"VAE"],[391,151,0,157,7,"VAE"],[392,151,0,72,1,"VAE"],[393,151,0,8,1,"VAE"],[394,151,0,160,7,"VAE"],[395,151,0,154,7,"VAE"],[396,151,0,62,1,"VAE"],[402,151,0,157,7,"VAE"],[403,151,0,72,1,"VAE"],[404,151,0,8,1,"VAE"],[405,151,0,160,7,"VAE"],[406,151,0,154,7,"VAE"],[407,151,0,62,1,"VAE"],[408,151,0,157,7,"VAE"],[409,151,0,72,1,"VAE"],[410,151,0,8,1,"VAE"],[411,151,0,160,7,"VAE"],[412,151,0,154,7,"VAE"],[413,151,0,62,1,"VAE"],[421,151,0,157,7,"VAE"],[422,151,0,72,1,"VAE"],[423,151,0,8,1,"VAE"],[424,151,0,160,7,"VAE"],[425,151,0,154,7,"VAE"],[426,151,0,62,1,"VAE"],[427,151,0,157,7,"VAE"],[428,151,0,72,1,"VAE"],[429,151,0,8,1,"VAE"],[430,151,0,160,7,"VAE"],[431,151,0,154,7,"VAE"],[432,151,0,62,1,"VAE"],[1004,349,3,361,0,"CLIP_VISION"],[1006,361,0,362,2,"CLIP_VISION_OUTPUT"],[1007,349,1,364,0,"CLIP"],[1008,364,0,362,0,"CONDITIONING"],[1009,349,4,362,1,"STYLE_MODEL"],[1022,349,0,368,0,"MODEL"],[1024,355,0,370,0,"MODEL"],[1029,349,2,371,1,"VAE"],[1030,371,0,372,0,"IMAGE"],[1031,370,0,373,4,"LATENT"],[1038,349,2,375,4,"VAE"],[1044,378,0,373,6,"OPTIONS"],[1047,368,0,355,0,"MODEL"],[1051,381,0,380,6,"OPTIONS"],[1053,373,0,380,4,"LATENT"],[1055,364,0,382,1,"CONDITIONING"],[1056,359,0,382,2,"CONTROL_NET"],[1058,349,2,382,4,"VAE"],[1068,371,0,387,0,"IMAGE"],[1088,374,0,371,0,"LATENT"],[1097,380,0,374,4,"LATENT"],[1099,369,0,373,5,"GUIDES"],[1101,375,1,369,0,"LATENT"],[1102,375,0,370,3,"LATENT"],[1107,398,0,361,1,"IMAGE"],[1108,362,0,382,0,"CONDITIONING"],[1109,362,0,374,1,"CONDITIONING"],[1111,375,0,355,1,"LATENT"],[1112,398,0,382,3,"IMAGE"],[1113,398,0,375,0,"IMAGE"],[1115,398,0,387,1,"IMAGE"],[1117,362,0,370,1,"CONDITIONING"],[1118,382,0,373,1,"CONDITIONING"],[1123,398,0,404,0,"IMAGE"],[1124,404,0,375,1,"IMAGE"],[1130,411,0,398,0,"*"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7449402268886842,"offset":[634.5784677482833,-682.7929436822943]},"ue_links":[{"downstream":157,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":154,"downstream_slot":7,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":72,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"},{"downstream":62,"downstream_slot":1,"upstream":"151","upstream_slot":0,"controller":64,"type":"VAE"}],"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true,"links_added_by_ue":[959,960,961,962],"frontendVersion":"1.18.6"},"version":0.4} ================================================ FILE: example_workflows/hidream guide data projection.json ================================================ {"last_node_id":641,"last_link_id":2035,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":8,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":9,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,true,1,1,"beta57",0,2,false]}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[1686.8845871920696,637.6012821508443]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/hidream guide epsilon projection.json ================================================ {"last_node_id":641,"last_link_id":2035,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":8,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":9,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,true,1,1,"beta57",0,6,false]}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[1138.2513303928165,621.4269926638877]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/hidream guide flow.json ================================================ {"last_node_id":640,"last_link_id":2035,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028,2034],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":11,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":12,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":6,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":8,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["flow",false,false,1,1,"beta57",0,10,false]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":10,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":2032},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":638,"type":"SharkOptions_GuideCond_Beta","pos":[955.9966430664062,585.7319946289062],"size":[284.5923156738281,98],"flags":{},"order":7,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2035},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2033},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2032],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[4]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":4,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029,2033],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":639,"type":"CLIPTextEncode","pos":[599.5145263671875,565.6756591796875],"size":[315.33026123046875,117.94475555419922],"flags":{"collapsed":false},"order":5,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2034}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2035],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["illustration of a singing clock with huge teeth in a surreal forest with torquiose mountains and a red and yellow sky, ragged trees and a pool of black oil on the ground, dripping paint oozing off the clock"]},{"id":640,"type":"Note","pos":[246.91494750976562,519.0934448242188],"size":[323.0928649902344,167.39759826660156],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["With the \"flow\" mode it is usually beneficial to use the supplemental GuideCond node, which allows you to set conditionings for the guide itself. With \"flow\", the guide changes during the sampling process. Without GuideCond in use, it will default to reusing your main prompt, which may result in some loss of adherence to the guide image.\n\n\"Lure\" is the only other mode that will use GuideCond."],"color":"#432","bgcolor":"#653"}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"],[2032,638,0,630,6,"OPTIONS"],[2033,637,0,638,1,"CONDITIONING"],[2034,636,1,639,0,"CLIP"],[2035,639,0,638,0,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[1119.4904101845082,499.1497204604395]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/hidream guide fully_pseudoimplicit.json ================================================ {"last_node_id":643,"last_link_id":2036,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":9,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":10,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":5,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":4,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"fully_implicit/gauss-legendre_2s","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["fully_pseudoimplicit",false,false,0.75,1,"linear_quadratic",0,10,false]},{"id":643,"type":"Note","pos":[1599.7352294921875,-422.8976135253906],"size":[258.39599609375,111.11077880859375],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["fully_pseudoimplicit only works with \"fully_implicit\" sampler types. With all others, it will revert automatically to pseudoimplicit.\n\npseudoimplicit may, however, be used with \"fully_implicit\" samplers."],"color":"#432","bgcolor":"#653"}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[840.6440644823947,678.3605934631012]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/hidream guide lure.json ================================================ {"last_node_id":640,"last_link_id":2035,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028,2034],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":11,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":12,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":6,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":10,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":2032},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":638,"type":"SharkOptions_GuideCond_Beta","pos":[955.9966430664062,585.7319946289062],"size":[284.5923156738281,98],"flags":{},"order":7,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2035},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2033},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2032],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[4]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":4,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029,2033],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":639,"type":"CLIPTextEncode","pos":[599.5145263671875,565.6756591796875],"size":[315.33026123046875,117.94475555419922],"flags":{"collapsed":false},"order":5,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2034}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2035],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["illustration of a singing clock with huge teeth in a surreal forest with torquiose mountains and a red and yellow sky, ragged trees and a pool of black oil on the ground, dripping paint oozing off the clock"]},{"id":640,"type":"Note","pos":[245.6206512451172,517.1527709960938],"size":[323.0928649902344,167.39759826660156],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["With the \"flow\" mode it is usually beneficial to use the supplemental GuideCond node, which allows you to set conditionings for the guide itself. With \"flow\", the guide changes during the sampling process. Without GuideCond in use, it will default to reusing your main prompt, which may result in some loss of adherence to the guide image.\n\n\"Lure\" is the only other mode that will use GuideCond."],"color":"#432","bgcolor":"#653"},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":8,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["lure",false,false,1,1,"linear_quadratic",0,13,false]}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"],[2032,638,0,630,6,"OPTIONS"],[2033,637,0,638,1,"CONDITIONING"],[2034,636,1,639,0,"CLIP"],[2035,639,0,638,0,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[1342.694620988285,531.4979770514516]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/hidream guide pseudoimplicit.json ================================================ {"last_node_id":641,"last_link_id":2035,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1610,-230],"size":[210,46],"flags":{"collapsed":false},"order":8,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2030},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1610,-120],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":9,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6965942382812,242.70477294921875],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,650],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2030],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,4,0,"fixed","standard",true]},{"id":637,"type":"CLIPTextEncode","pos":[962.297607421875,99.93917846679688],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, low detail, blurry, shallow depth of field, mutated, symmetrical, generic"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["pseudoimplicit",false,false,0.1,1,"beta57",0,5,false]}],"links":[[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"],[2030,630,0,591,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[1182.8926069221118,636.9542766363238]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/hidream hires fix.json ================================================ {"last_node_id":1358,"last_link_id":3624,"nodes":[{"id":490,"type":"Reroute","pos":[13130,-70],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":3534}],"outputs":[{"name":"","type":"CLIP","links":[2881,3323],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1317,"type":"ClownModelLoader","pos":[12770,-90],"size":[315,266],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3539],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[3534],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[3535],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp16.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13253.044921875,283.4559020996094],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":18,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[3540],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1536,768,"red",false,"16_channels"]},{"id":13,"type":"Reroute","pos":[13130,-110],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":3539}],"outputs":[{"name":"","type":"MODEL","links":[3548,3597],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1224,"type":"CLIPTextEncode","pos":[13250,-90],"size":[269.0397644042969,155.65545654296875],"flags":{"collapsed":false},"order":17,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3323}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3480,3599],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a cold war era photograph from 1983 of a group of four friends holding up their hands inside an antique living room in a victorian era mansion"]},{"id":970,"type":"CLIPTextEncode","pos":[13253.0546875,116.28263854980469],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":16,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3600],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":14,"type":"Reroute","pos":[13130,-30],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":3535}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1322,"type":"ClownsharkChainsampler_Beta","pos":[14503.9365234375,-99.09358978271484],"size":[281.6568603515625,542.124755859375],"flags":{},"order":24,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3612},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3610},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3550],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,4,"resample",true]},{"id":1350,"type":"ClownOptions_Tile_Beta","pos":[14700,540],"size":[210,82],"flags":{},"order":19,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3614}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3615],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[1216,832]},{"id":1351,"type":"ClownOptions_Tile_Beta","pos":[14940,540],"size":[210,82],"flags":{},"order":21,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3615}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[1152,896]},{"id":1349,"type":"ClownOptions_Tile_Beta","pos":[14470,540],"size":[210,82],"flags":{},"order":15,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3616}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3614],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[1536,768]},{"id":1352,"type":"ClownOptions_Tile_Beta","pos":[14233.716796875,538.3314819335938],"size":[210,82],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3616],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[2048,1024]},{"id":1353,"type":"ClownOptions_Tile_Beta","pos":[14232.0498046875,680.947998046875],"size":[210,82],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[-1,-1]},{"id":1354,"type":"Note","pos":[14476.6044921875,675.5231323242188],"size":[258.67279052734375,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["As with the rest of RES4LYF nodes, \"-1\" means \"go to the end\" or \"max value\". In this case, that means \"use full image sizes\". So, the node to the left will be equivalent to the one above."],"color":"#432","bgcolor":"#653"},{"id":907,"type":"ClownsharKSampler_Beta","pos":[13550.5615234375,-92.92960357666016],"size":[301.752197265625,657.727294921875],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3548},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3480},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3540},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3618],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":[],"slot_index":1},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_3m","bong_tangent",30,15,1,4,4,"fixed","standard",true]},{"id":1355,"type":"LatentUpscale","pos":[13877.537109375,-92.35859680175781],"size":[286.32501220703125,130],"flags":{},"order":22,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3618}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[3619],"slot_index":0}],"properties":{"Node name for S&R":"LatentUpscale"},"widgets_values":["nearest-exact",2048,1024,"disabled"]},{"id":1345,"type":"ClownOptions_Tile_Beta","pos":[13953.123046875,285.76708984375],"size":[210,82],"flags":{},"order":4,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3609,3610],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Tile_Beta"},"widgets_values":[1536,768]},{"id":909,"type":"SaveImage","pos":[14811.001953125,-99.0184555053711],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":26,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":908,"type":"VAEDecode","pos":[14808.998046875,-201.5235595703125],"size":[140,46],"flags":{},"order":25,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3550},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1356,"type":"Note","pos":[12793.412109375,-250.5360870361328],"size":[276.617431640625,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Want to use this workflow with another model? Just hook up a different model! You may need to set CFG = 1.0 if you're going to use a distilled model, such as HiDream Dev (or Fast) or Flux Dev."],"color":"#432","bgcolor":"#653"},{"id":1321,"type":"Note","pos":[12769.740234375,239.9431915283203],"size":[345.97113037109375,161.35496520996094],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["There are many samplers to try, but res_2m, res_3m, res_2s, and res_3s are very reliable. If you want to push quality a bit higher in exchange for time, you could even try res_5s.\n\nres_2m and res_3m begin with higher order steps (one res_2s step, and two res_3s steps, respectively) to initialize the sampling process. Ultimately, the result is faster convergence in terms of wall time, as fewer steps end up being necessary."],"color":"#432","bgcolor":"#653"},{"id":1347,"type":"Note","pos":[13505.927734375,-326.1947937011719],"size":[348.3962097167969,172.26731872558594],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Connect \"Upscale Latent\" directly to the last chainsampler to skip the iterative refinement steps (which is what implicit steps are: they use the output of a step as the input, then re-run it to refine). They help minimize mutations with a \"hires fix\" workflow like this.\n\n\"rebound\" is the highest quality implicit_type, but is also slightly slower.\n\nYou may also use ClownOptions Cycles instead of ClownOptions Implicit Steps."],"color":"#432","bgcolor":"#653"},{"id":1346,"type":"Note","pos":[13898.4658203125,421.6622314453125],"size":[261.7038269042969,363.83868408203125],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["If you use tiled sampling, be sure to choose tile sizes that will need to overlap each other, or you might see seams. For example, for 2048x1024, it would be unwise to choose 1024x1024 or 512x512 as your only tile size, as 2048 / 1024 = 1.0, 2048 / 512 = 4.0, etc.\n\nThis workflow will upscale to 2048x1024. 2048 is not divisible by 1536, and 1024 is not divisible by 768, thereofer they will have overlapping areas.\n\nIt's best to pick tile sizes that you know the model is trained at, with which you can generate txt2img without hallucination, doubling, mutations, \"grid\" artifacts, etc.\n\nTiled sampling will be slower, but can prevent drifts in luminosity, hue, artifacts around the edge of the image, and mutations, while reducing VRAM use. However, it can also cause parts of the image to look \"out of sync\". You can alternate tile sizes like shown to the right, which can sometimes help."],"color":"#432","bgcolor":"#653"},{"id":1324,"type":"ClownsharkChainsampler_Beta","pos":[14189.3935546875,-89.69397735595703],"size":[285.5440673828125,552.053955078125],"flags":{},"order":23,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3597},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3599},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3600},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3619},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3624},{"name":"options 2","type":"OPTIONS","link":3609},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3612],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",1,4,"resample",true]},{"id":1325,"type":"ClownOptions_ImplicitSteps_Beta","pos":[13884.9677734375,94.86456298828125],"size":[278.0316467285156,130],"flags":{},"order":9,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_ImplicitSteps_Beta"},"widgets_values":["rebound","bongmath",10,0]},{"id":1357,"type":"Note","pos":[14184.4599609375,-302.5225830078125],"size":[305.0502014160156,150.26080322265625],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The sampling will appear to froze for a minute at this node, but it is not actually frozen. Reducing implicit_steps or cycles will speed things up.\n\nIf you are willing to use a slower sampler to improve quality, the biggest bang for your buck will be with this first chainsampler. Try changing the sampler_name to res_3s, or gauss-legendre_2s.\n"],"color":"#432","bgcolor":"#653"},{"id":1358,"type":"ClownOptions_Cycles_Beta","pos":[13880.7060546875,-310.925537109375],"size":[280.4444274902344,154],"flags":{},"order":11,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3624],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[6,1,0.5,"none",4]}],"links":[[18,14,0,7,4,"VAE"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[3323,490,0,1224,0,"CLIP"],[3480,1224,0,907,1,"CONDITIONING"],[3534,1317,1,490,0,"*"],[3535,1317,2,14,0,"*"],[3539,1317,0,13,0,"*"],[3540,7,3,907,3,"LATENT"],[3548,13,0,907,0,"MODEL"],[3550,1322,0,908,0,"LATENT"],[3597,13,0,1324,0,"MODEL"],[3599,1224,0,1324,1,"CONDITIONING"],[3600,970,0,1324,2,"CONDITIONING"],[3609,1345,0,1324,7,"OPTIONS"],[3610,1345,0,1322,6,"OPTIONS"],[3612,1324,0,1322,4,"LATENT"],[3614,1349,0,1350,0,"OPTIONS"],[3615,1350,0,1351,0,"OPTIONS"],[3616,1352,0,1349,0,"OPTIONS"],[3618,907,0,1355,0,"LATENT"],[3619,1355,0,1324,4,"LATENT"],[3624,1358,0,1324,6,"OPTIONS"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.9194342495775452,"offset":[-11744.076730306608,403.1731222243355]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/hidream regional 3 zones.json ================================================ {"last_node_id":612,"last_link_id":1834,"nodes":[{"id":13,"type":"Reroute","pos":[580,-180],"size":[75,26],"flags":{},"order":18,"mode":0,"inputs":[{"name":"","type":"*","link":1611}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":404,"type":"VAELoader","pos":[328.6705627441406,5.664919376373291],"size":[210,58],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[1344],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["ae.sft"]},{"id":402,"type":"QuadrupleCLIPLoader","pos":[130,-170],"size":[407.7720031738281,130],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[1552],"slot_index":0}],"properties":{"Node name for S&R":"QuadrupleCLIPLoader"},"widgets_values":["clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors"]},{"id":403,"type":"UNETLoader","pos":[216.5030059814453,-297.7170715332031],"size":[320.7802429199219,82],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[1610],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn"]},{"id":14,"type":"Reroute","pos":[580,-100],"size":[75,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"","type":"*","link":1344}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[695.769287109375,-369.69635009765625],"size":[260.3999938964844,126],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1680],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":490,"type":"Reroute","pos":[580.390380859375,-139.51483154296875],"size":[75,26],"flags":{},"order":8,"mode":0,"inputs":[{"name":"","type":"*","link":1552}],"outputs":[{"name":"","type":"CLIP","links":[1559,1691,1693,1707],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":394,"type":"CLIPTextEncode","pos":[694.6102905273438,168.60507202148438],"size":[264.9925842285156,127.11075592041016],"flags":{},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1559}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1355],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["bright light, well-lit, daylight, monotone, desaturated, professional photography, blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":398,"type":"SaveImage","pos":[1387.6151123046875,-268.26824951171875],"size":[603.7825927734375,598.39404296875],"flags":{},"order":23,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":608,"type":"ImageToMask","pos":[478.4993896484375,-645.0528564453125],"size":[210,58],"flags":{},"order":12,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1809}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1807],"slot_index":0}],"properties":{"Node name for S&R":"ImageToMask"},"widgets_values":["red"]},{"id":397,"type":"VAEDecode","pos":[1388.41064453125,-374.6264953613281],"size":[210,46],"flags":{},"order":22,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1815},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":605,"type":"LoadImage","pos":[-140,-900],"size":[210,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1810],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (446).png","image"]},{"id":603,"type":"LoadImage","pos":[-130,-1280],"size":[210,314],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1811],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (444).png","image"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[696.7778930664062,-164.97328186035156],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":13,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398,1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,2048,"red",false,"16_channels"]},{"id":540,"type":"CLIPTextEncode","pos":[743.9880981445312,-978.6345825195312],"size":[275.3782653808594,125.7564697265625],"flags":{},"order":17,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1707}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1814],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a charcoal drawing of the top of a skyscraper"]},{"id":520,"type":"CLIPTextEncode","pos":[740,-790],"size":[275.3782653808594,125.7564697265625],"flags":{},"order":16,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1693}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1813],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a children's messy crayon drawing of the middle floors of a skyscraper"]},{"id":455,"type":"CLIPTextEncode","pos":[740,-600],"size":[285.3899230957031,125.00720977783203],"flags":{"collapsed":false},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1691}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1812],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a close up high quality cinematic color photograph of the base of an office building in a city park in wisconsin"]},{"id":606,"type":"ImageToMask","pos":[484.2362976074219,-962.7913818359375],"size":[210,58],"flags":{},"order":11,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1811}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1806],"slot_index":0}],"properties":{"Node name for S&R":"ImageToMask"},"widgets_values":["red"]},{"id":607,"type":"ImageToMask","pos":[478.7450256347656,-798.6764526367188],"size":[210,58],"flags":{},"order":10,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":1810}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[1808],"slot_index":0}],"properties":{"Node name for S&R":"ImageToMask"},"widgets_values":["red"]},{"id":604,"type":"LoadImage","pos":[-150,-510],"size":[210,314],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1809],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (445).png","image"]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":21,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1680},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1834},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1355},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1815],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",20,-1,1,4,86,"fixed","standard",true]},{"id":533,"type":"ClownRegionalConditioning_ABC","pos":[1087.326904296875,-873.5692138671875],"size":[243.60000610351562,390],"flags":{},"order":19,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":1814},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":1813},{"name":"conditioning_C","localized_name":"conditioning_C","type":"CONDITIONING","shape":7,"link":1812},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":1806},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":1808},{"name":"mask_C","localized_name":"mask_C","type":"MASK","shape":7,"link":1807},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[1834],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning_ABC"},"widgets_values":[-0.9,-0.25,0,"constant",0,-1,"boolean",256,false]},{"id":612,"type":"Note","pos":[159.41253662109375,-707.9190063476562],"size":[210,99.94182586669922],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["It is critical that each part of the image is covered by one of these masks."],"color":"#432","bgcolor":"#653"},{"id":512,"type":"ReHiDreamPatcher","pos":[212.8125762939453,-444.52001953125],"size":[320.9115295410156,82],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1610}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1611],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float32",true]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1344,404,0,14,0,"*"],[1355,394,0,401,2,"CONDITIONING"],[1395,13,0,431,0,"MODEL"],[1398,7,3,431,1,"LATENT"],[1399,7,3,401,3,"LATENT"],[1552,402,0,490,0,"*"],[1559,490,0,394,0,"CLIP"],[1610,403,0,512,0,"MODEL"],[1611,512,0,13,0,"*"],[1680,431,0,401,0,"MODEL"],[1691,490,0,455,0,"CLIP"],[1693,490,0,520,0,"CLIP"],[1707,490,0,540,0,"CLIP"],[1806,606,0,533,3,"MASK"],[1807,608,0,533,5,"MASK"],[1808,607,0,533,4,"MASK"],[1809,604,0,608,0,"IMAGE"],[1810,605,0,607,0,"IMAGE"],[1811,603,0,606,0,"IMAGE"],[1812,455,0,533,2,"CONDITIONING"],[1813,520,0,533,1,"CONDITIONING"],[1814,540,0,533,0,"CONDITIONING"],[1815,401,0,397,0,"LATENT"],[1834,533,0,401,1,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.3109994191500227,"offset":[2330.291089462677,1329.1104989082662]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/hidream regional antiblur.json ================================================ {"last_node_id":727,"last_link_id":2103,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":2098}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":2100}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":21,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":701,"type":"Note","pos":[80,-520],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[-210,-520],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":676,"type":"InvertMask","pos":[20,-370],"size":[142.42074584960938,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2073}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2083],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":662,"type":"CLIPTextEncode","pos":[460,-370],"size":[210,88],"flags":{"collapsed":false},"order":13,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2094],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[719.6110229492188,16.752899169921875],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":16,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":710,"type":"MaskPreview","pos":[180,-190],"size":[210,246],"flags":{},"order":17,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":20,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":715,"type":"SolidMask","pos":[-220,-370],"size":[210,106],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2073],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1024,1024]},{"id":716,"type":"SolidMask","pos":[-220,-220],"size":[210,106],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2065],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,384,864]},{"id":709,"type":"MaskComposite","pos":[190,-370],"size":[210,126],"flags":{},"order":11,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2083},{"name":"source","localized_name":"source","type":"MASK","link":2065}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2091],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[256,160,"add"]},{"id":704,"type":"Note","pos":[101.74818420410156,112.67951965332031],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":703,"type":"Note","pos":[423.10699462890625,-96.14085388183594],"size":[241.9689483642578,386.7543640136719],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask.\n\nboolean_masked means that the masked area can \"see\" the rest of the image, but the unmasked area cannot. \"boolean\" would mean neither area could see the rest of the image.\n\nTry setting to boolean_unmasked and see what happens!\n\nIf you still have blur, try reducing edge_width (and if you have seams, try increasing it, or setting end_step to something like 20). \n\nAlso verify that you can generate the background prompt alone without blur (if you can't, this won't work). And don't get stuck on one seed.\n\nVaguely human-shaped masks also tend to work better than the blocky one used here."],"color":"#432","bgcolor":"#653"},{"id":725,"type":"ReHiDreamPatcher","pos":[1009.8884887695312,-694.5361328125],"size":[210,82],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2097}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2098],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float64",true]},{"id":724,"type":"ClownModelLoader","pos":[660.0880126953125,-695.142333984375],"size":[315,266],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2097],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2099],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2100],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":722,"type":"ClownRegionalConditioning2","pos":[690,-370],"size":[287.75750732421875,330],"flags":{},"order":18,"mode":0,"inputs":[{"name":"conditioning_masked","localized_name":"conditioning_masked","type":"CONDITIONING","shape":7,"link":2094},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":2093},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2091},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[2095],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning2"},"widgets_values":[0.9,0.1,0,"constant",0,-1,"boolean_masked",32,false]},{"id":723,"type":"CLIPTextEncode","pos":[460,-240],"size":[210,88],"flags":{"collapsed":false},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2092}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2093],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a college campus"]},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":2099}],"outputs":[{"name":"","type":"CLIP","links":[1939,2092,2102],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":727,"type":"CLIPTextEncode","pos":[721.318359375,349.4079895019531],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2102}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2103],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2095},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2103},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,4,0,"fixed","standard",true]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1967,13,0,401,0,"MODEL"],[2054,709,0,710,0,"MASK"],[2065,716,0,709,1,"MASK"],[2073,715,0,676,0,"MASK"],[2083,676,0,709,0,"MASK"],[2091,709,0,722,2,"MASK"],[2092,490,0,723,0,"CLIP"],[2093,723,0,722,1,"CONDITIONING"],[2094,662,0,722,0,"CONDITIONING"],[2095,722,0,401,1,"CONDITIONING"],[2096,401,0,397,0,"LATENT"],[2097,724,0,725,0,"MODEL"],[2098,725,0,13,0,"*"],[2099,724,1,490,0,"*"],[2100,724,2,14,0,"*"],[2102,490,0,727,0,"CLIP"],[2103,727,0,401,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.91943424957756,"offset":[1345.3511333682184,704.1505917671295]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/hidream style antiblur.json ================================================ {"last_node_id":742,"last_link_id":2119,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"","type":"*","link":2115}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":5,"mode":0,"inputs":[{"name":"","type":"*","link":2116}],"outputs":[{"name":"","type":"CLIP","links":[1939,2119],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":6,"mode":0,"inputs":[{"name":"","type":"*","link":2117}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":14,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":13,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":7,"type":"VAEEncodeAdvanced","pos":[412.2475280761719,-199.0681915283203],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":10,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2113},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2100],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":662,"type":"CLIPTextEncode","pos":[761.3005981445312,-357.2689208984375],"size":[210,102.54972839355469],"flags":{"collapsed":false},"order":8,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2098],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat, a college campus, brick buildings"]},{"id":727,"type":"Note","pos":[412.8926086425781,-351.8606872558594],"size":[272.4425048828125,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This approach can be combined with the regional conditioning anti-blur approach for an even more powerful effect."],"color":"#432","bgcolor":"#653"},{"id":724,"type":"ClownGuide_Style_Beta","pos":[703.7374267578125,-198.63233947753906],"size":[262.8634033203125,286],"flags":{},"order":11,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2100},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2099],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,10,false]},{"id":739,"type":"LoadImage","pos":[70.82455444335938,-201.66342163085938],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2113],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (655).png","image"]},{"id":741,"type":"ReHiDreamPatcher","pos":[1000,-680],"size":[210,82],"flags":{},"order":4,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2114}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2115],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float64",true]},{"id":740,"type":"ClownModelLoader","pos":[650,-680],"size":[315,266],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2114],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2116],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2117],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":12,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2098},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2118},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2099},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","bong_tangent",30,-1,1,4,7,"fixed","standard",true]},{"id":742,"type":"CLIPTextEncode","pos":[703.5707397460938,144.26979064941406],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":9,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2119}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2118],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":726,"type":"Note","pos":[305.74163818359375,169.59754943847656],"size":[364.5906677246094,164.38613891601562],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The best style guide images will share the lighting and color composition of your desired scene. Some are just inexplicably ineffective at killing blur. Just gather up a bunch of images to try, you'll find some good ones that can be reused for many things. I'm including the one used here in the example_workflows directory, be sure to check for it.\n\nAnd don't forget to change seeds. Don't optimize for one seed only. Don't get stuck on one seed! Sometimes one is just not going to work out for whatever you're doing."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1967,13,0,401,0,"MODEL"],[2096,401,0,397,0,"LATENT"],[2098,662,0,401,1,"CONDITIONING"],[2099,724,0,401,5,"GUIDES"],[2100,7,0,724,0,"LATENT"],[2113,739,0,7,0,"IMAGE"],[2114,740,0,741,0,"MODEL"],[2115,741,0,13,0,"*"],[2116,740,1,490,0,"*"],[2117,740,2,14,0,"*"],[2118,742,0,401,2,"CONDITIONING"],[2119,490,0,742,0,"CLIP"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7449402268886909,"offset":[1731.8135682982838,807.2501654184575]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/hidream style transfer txt2img.json ================================================ {"last_node_id":1385,"last_link_id":3733,"nodes":[{"id":13,"type":"Reroute","pos":[13508.9013671875,-109.2831802368164],"size":[75,26],"flags":{},"order":17,"mode":0,"inputs":[{"name":"","type":"*","link":3686}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[13508.9013671875,-29.283178329467773],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":3671}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[13508.9013671875,-69.28317260742188],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":3670}],"outputs":[{"name":"","type":"CLIP","links":[2881,3581],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1363,"type":"ReHiDreamPatcher","pos":[13268.9013671875,-109.2831802368164],"size":[210,82],"flags":{},"order":12,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3685}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3686],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float64",true]},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14758.255859375,-64.39308166503906],"size":[340.20001220703125,510],"flags":{},"order":29,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3698},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,4,"resample",true]},{"id":1318,"type":"ClownGuide_Beta","pos":[13828.255859375,675.60693359375],"size":[263.102783203125,290],"flags":{},"order":24,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3710},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3699,3708],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.7,1,"constant",0,-1,false]},{"id":1333,"type":"CLIPTextEncode","pos":[13688.255859375,-44.393089294433594],"size":[280.6252746582031,164.06936645507812],"flags":{"collapsed":false},"order":19,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3581}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3602,3626],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["messy blackboard chalk drawing of the inside of a car driving down a creepy road. colorful chalk with shading that shows the chalk textures from drawing with the side of the chalk\n"]},{"id":1358,"type":"ClownModelLoader","pos":[12828.9013671875,-299.2831726074219],"size":[341.7054443359375,266],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3685],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[3670],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[3671],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","clip_g_hidream.safetensors","clip_l_hidream.safetensors","hidream","ae.sft"]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[13218.9013671875,-309.28314208984375],"size":[260.3999938964844,126],"flags":{},"order":25,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2692],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":970,"type":"CLIPTextEncode","pos":[13688.255859375,165.60690307617188],"size":[281.9206848144531,109.87118530273438],"flags":{},"order":18,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3627],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, jpeg artifacts, low quality, bad quality, unsharp"]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[14008.255859375,-64.39308166503906],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2692},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3602},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3708},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3578],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","beta57",20,11,1,1,201,"fixed","unsample",true]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[14378.255859375,-64.39308166503906],"size":[340.20001220703125,570],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3626},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3627},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3578},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3604},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":3707},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3698],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_3s_non-monotonic",1,4,"resample",true]},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[14408.255859375,-294.3930969238281],"size":[265.2884826660156,178],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,0.5,"none",-1,4]},{"id":1373,"type":"LoadImage","pos":[12848.2666015625,531.6068115234375],"size":[315,314],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3721],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Composition)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (476).png","image"]},{"id":1374,"type":"LoadImage","pos":[12838.2666015625,171.6068115234375],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3725],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Style Guide)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14627_.png","image"]},{"id":1378,"type":"Reroute","pos":[13184.07421875,533.128662109375],"size":[75,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"","type":"*","link":3721}],"outputs":[{"name":"","type":"IMAGE","links":[3724,3729],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1379,"type":"Reroute","pos":[13185.853515625,168.15780639648438],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":3725}],"outputs":[{"name":"","type":"IMAGE","links":[3726],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[15220,-259.5838928222656],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":31,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1362,"type":"PreviewImage","pos":[13317.849609375,617.1558837890625],"size":[210,246],"flags":{},"order":22,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3682}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1350,"type":"ColorMatch","pos":[13709.701171875,316.05731201171875],"size":[210,102],"flags":{"collapsed":false},"order":21,"mode":0,"inputs":[{"name":"image_ref","localized_name":"image_ref","type":"IMAGE","link":3728},{"name":"image_target","localized_name":"image_target","type":"IMAGE","link":3724}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3682,3688],"slot_index":0}],"properties":{"Node name for S&R":"ColorMatch"},"widgets_values":["mkl",1]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13343.19140625,556.8784790039062],"size":[261.2217712402344,298],"flags":{"collapsed":true},"order":23,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3688},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3727},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18},{"name":"width","type":"INT","pos":[10,160.00003051757812],"widget":{"name":"width"},"link":3732},{"name":"height","type":"INT","pos":[10,184.00003051757812],"widget":{"name":"height"},"link":3733}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983,3710],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3709],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1371,"type":"Image Repeat Tile To Size","pos":[13329.5947265625,497.8262939453125],"size":[210,146],"flags":{"collapsed":true},"order":20,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3726},{"name":"width","type":"INT","pos":[10,36],"widget":{"name":"width"},"link":3730},{"name":"height","type":"INT","pos":[10,60],"widget":{"name":"height"},"link":3731}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3727,3728],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":1380,"type":"SetImageSize","pos":[13324.7197265625,323.0480041503906],"size":[210,102],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[3730,3732],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[3731,3733],"slot_index":1}],"properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1344,768]},{"id":1377,"type":"Image Comparer (rgthree)","pos":[15742.4619140625,-253.3526153564453],"size":[461.9190368652344,413.5953369140625],"flags":{},"order":32,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":3720},{"name":"image_b","type":"IMAGE","dir":3,"link":3729}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_pzczy_00003_.png&type=temp&subfolder=&rand=0.543351218901418"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_pzczy_00004_.png&type=temp&subfolder=&rand=0.38178761627111313"}]]},{"id":908,"type":"VAEDecode","pos":[15217.7802734375,-312.1965637207031],"size":[210,46],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697,3720],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1376,"type":"Note","pos":[13703.0439453125,536.6895751953125],"size":[261.9539489746094,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease weight in ClownGuide to alter adherence to the input image."],"color":"#432","bgcolor":"#653"},{"id":1383,"type":"Note","pos":[14428.40234375,580.1749877929688],"size":[261.9539489746094,88],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Samplers like res_2s in this cycling node will also work and are faster. res_2m and res_3m are even faster, but sometimes the effect takes longer in wall time to fully kick in."],"color":"#432","bgcolor":"#653"},{"id":1384,"type":"Note","pos":[14793.0322265625,518.4120483398438],"size":[261.9539489746094,88],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_2m or res_3m can be used here instead and are faster, but are less likely to fully clean up lingering artifacts."],"color":"#432","bgcolor":"#653"},{"id":1328,"type":"ClownOptions_SDE_Beta","pos":[14186.4755859375,-132.6126251220703],"size":[315,266],"flags":{"collapsed":true},"order":8,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3707],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1381,"type":"Note","pos":[13881.6279296875,-217.62835693359375],"size":[261.9539489746094,88],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease \"steps_to_run\" in ClownsharKSampler to change the effective denoise level."],"color":"#432","bgcolor":"#653"},{"id":1382,"type":"Note","pos":[14718.0498046875,-295.4144592285156],"size":[268.1851806640625,124.49711608886719],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increasing cycles will increase the amount of change, but take longer.\n\nCycles will rerun the same step over and over, forwards and backwards, iteratively refining an image at a controlled noise level."],"color":"#432","bgcolor":"#653"},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[14108.255859375,675.60693359375],"size":[246.31312561035156,286],"flags":{},"order":26,"mode":4,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3709},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3699}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3604],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":1385,"type":"Note","pos":[14396.5634765625,742.3948364257812],"size":[261.9539489746094,88],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["method = AdaIN is faster and uses less memory, but is less accurate. Some prefer the effect."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1395,13,0,431,0,"MODEL"],[1398,7,3,431,1,"LATENT"],[2692,431,0,907,0,"MODEL"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2983,7,0,907,3,"LATENT"],[3469,981,0,908,0,"LATENT"],[3533,1317,0,980,6,"OPTIONS"],[3578,907,0,980,4,"LATENT"],[3581,490,0,1333,0,"CLIP"],[3602,1333,0,907,1,"CONDITIONING"],[3604,1308,0,980,5,"GUIDES"],[3626,1333,0,980,1,"CONDITIONING"],[3627,970,0,980,2,"CONDITIONING"],[3670,1358,1,490,0,"*"],[3671,1358,2,14,0,"*"],[3682,1350,0,1362,0,"IMAGE"],[3685,1358,0,1363,0,"MODEL"],[3686,1363,0,13,0,"*"],[3688,1350,0,7,0,"IMAGE"],[3698,980,0,981,4,"LATENT"],[3699,1318,0,1308,3,"GUIDES"],[3707,1328,0,980,7,"OPTIONS"],[3708,1318,0,907,5,"GUIDES"],[3709,7,1,1308,0,"LATENT"],[3710,7,0,1318,0,"LATENT"],[3720,908,0,1377,0,"IMAGE"],[3721,1373,0,1378,0,"*"],[3724,1378,0,1350,1,"IMAGE"],[3725,1374,0,1379,0,"*"],[3726,1379,0,1371,0,"IMAGE"],[3727,1371,0,7,1,"IMAGE"],[3728,1371,0,1350,0,"IMAGE"],[3729,1378,0,1377,1,"IMAGE"],[3730,1380,0,1371,1,"INT"],[3731,1380,1,1371,2,"INT"],[3732,1380,0,7,5,"INT"],[3733,1380,1,7,6,"INT"]],"groups":[{"id":1,"title":"Model Loaders","bounding":[12796.72265625,-401.9004211425781,822.762451171875,436.0693359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Sampling","bounding":[13652.6533203125,-402.70721435546875,1470.8076171875,1409.0289306640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Input Prep","bounding":[12797.1396484375,77.69412231445312,817.4218139648438,820.6239624023438],"color":"#3f789e","font_size":24,"flags":{}},{"id":4,"title":"Save and Compare","bounding":[15180.705078125,-399.09112548828125,1050.6468505859375,615.8845825195312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.3072020475058237,"offset":[-11012.049075449982,623.0809311059861]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/hidream style transfer v2.json ================================================ {"last_node_id":1385,"last_link_id":3733,"nodes":[{"id":13,"type":"Reroute","pos":[13508.9013671875,-109.2831802368164],"size":[75,26],"flags":{},"order":17,"mode":0,"inputs":[{"name":"","type":"*","link":3686}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[13508.9013671875,-29.283178329467773],"size":[75,26],"flags":{},"order":14,"mode":0,"inputs":[{"name":"","type":"*","link":3671}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[13508.9013671875,-69.28317260742188],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":3670}],"outputs":[{"name":"","type":"CLIP","links":[2881,3581],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1363,"type":"ReHiDreamPatcher","pos":[13268.9013671875,-109.2831802368164],"size":[210,82],"flags":{},"order":12,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3685}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3686],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float64",true]},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14758.255859375,-64.39308166503906],"size":[340.20001220703125,510],"flags":{},"order":29,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3698},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,4,"resample",true]},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[14108.255859375,675.60693359375],"size":[246.31312561035156,286],"flags":{},"order":26,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3709},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3699}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3604],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":1318,"type":"ClownGuide_Beta","pos":[13828.255859375,675.60693359375],"size":[263.102783203125,290],"flags":{},"order":24,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3710},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3699,3708],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.7,1,"constant",0,-1,false]},{"id":1333,"type":"CLIPTextEncode","pos":[13688.255859375,-44.393089294433594],"size":[280.6252746582031,164.06936645507812],"flags":{"collapsed":false},"order":19,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3581}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3602,3626],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["messy blackboard chalk drawing of the inside of a car driving down a creepy road. colorful chalk with shading that shows the chalk textures from drawing with the side of the chalk\n"]},{"id":1358,"type":"ClownModelLoader","pos":[12828.9013671875,-299.2831726074219],"size":[341.7054443359375,266],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3685],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[3670],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[3671],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","clip_g_hidream.safetensors","clip_l_hidream.safetensors","hidream","ae.sft"]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[13218.9013671875,-309.28314208984375],"size":[260.3999938964844,126],"flags":{},"order":25,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2692],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":970,"type":"CLIPTextEncode","pos":[13688.255859375,165.60690307617188],"size":[281.9206848144531,109.87118530273438],"flags":{},"order":18,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3627],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, jpeg artifacts, low quality, bad quality, unsharp"]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[14008.255859375,-64.39308166503906],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2692},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3602},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3708},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3578],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","beta57",20,11,1,1,201,"fixed","unsample",true]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[14378.255859375,-64.39308166503906],"size":[340.20001220703125,570],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3626},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3627},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3578},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3604},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":3707},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3698],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_3s_non-monotonic",1,4,"resample",true]},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[14408.255859375,-294.3930969238281],"size":[265.2884826660156,178],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,0.5,"none",-1,4]},{"id":1373,"type":"LoadImage","pos":[12848.2666015625,531.6068115234375],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3721],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Composition)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (476).png","image"]},{"id":1374,"type":"LoadImage","pos":[12838.2666015625,171.6068115234375],"size":[315,314],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3725],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Style Guide)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14627_.png","image"]},{"id":1378,"type":"Reroute","pos":[13184.07421875,533.128662109375],"size":[75,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"","type":"*","link":3721}],"outputs":[{"name":"","type":"IMAGE","links":[3724,3729],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1379,"type":"Reroute","pos":[13185.853515625,168.15780639648438],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":3725}],"outputs":[{"name":"","type":"IMAGE","links":[3726],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[15220,-259.5838928222656],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":31,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1362,"type":"PreviewImage","pos":[13317.849609375,617.1558837890625],"size":[210,246],"flags":{},"order":22,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3682}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1350,"type":"ColorMatch","pos":[13709.701171875,316.05731201171875],"size":[210,102],"flags":{"collapsed":false},"order":21,"mode":0,"inputs":[{"name":"image_ref","localized_name":"image_ref","type":"IMAGE","link":3728},{"name":"image_target","localized_name":"image_target","type":"IMAGE","link":3724}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3682,3688],"slot_index":0}],"properties":{"Node name for S&R":"ColorMatch"},"widgets_values":["mkl",1]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13343.19140625,556.8784790039062],"size":[261.2217712402344,298],"flags":{"collapsed":true},"order":23,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3688},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3727},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18},{"name":"width","type":"INT","pos":[10,160.00003051757812],"widget":{"name":"width"},"link":3732},{"name":"height","type":"INT","pos":[10,184.00003051757812],"widget":{"name":"height"},"link":3733}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983,3710],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3709],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1371,"type":"Image Repeat Tile To Size","pos":[13329.5947265625,497.8262939453125],"size":[210,146],"flags":{"collapsed":true},"order":20,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3726},{"name":"width","type":"INT","pos":[10,36],"widget":{"name":"width"},"link":3730},{"name":"height","type":"INT","pos":[10,60],"widget":{"name":"height"},"link":3731}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3727,3728],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":1380,"type":"SetImageSize","pos":[13324.7197265625,323.0480041503906],"size":[210,102],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[3730,3732],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[3731,3733],"slot_index":1}],"properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1344,768]},{"id":1377,"type":"Image Comparer (rgthree)","pos":[15742.4619140625,-253.3526153564453],"size":[461.9190368652344,413.5953369140625],"flags":{},"order":32,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":3720},{"name":"image_b","type":"IMAGE","dir":3,"link":3729}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_pzczy_00001_.png&type=temp&subfolder=&rand=0.2568823425587843"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_pzczy_00002_.png&type=temp&subfolder=&rand=0.9444625525852213"}]]},{"id":908,"type":"VAEDecode","pos":[15217.7802734375,-312.1965637207031],"size":[210,46],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697,3720],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1376,"type":"Note","pos":[13703.0439453125,536.6895751953125],"size":[261.9539489746094,88],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease weight in ClownGuide to alter adherence to the input image."],"color":"#432","bgcolor":"#653"},{"id":1383,"type":"Note","pos":[14428.40234375,580.1749877929688],"size":[261.9539489746094,88],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Samplers like res_2s in this cycling node will also work and are faster. res_2m and res_3m are even faster, but sometimes the effect takes longer in wall time to fully kick in."],"color":"#432","bgcolor":"#653"},{"id":1384,"type":"Note","pos":[14793.0322265625,518.4120483398438],"size":[261.9539489746094,88],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_2m or res_3m can be used here instead and are faster, but are less likely to fully clean up lingering artifacts."],"color":"#432","bgcolor":"#653"},{"id":1385,"type":"Note","pos":[14398.345703125,768.2096557617188],"size":[261.9539489746094,88],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["method = AdaIN is faster and uses less memory, but is less accurate. Some prefer the effect."],"color":"#432","bgcolor":"#653"},{"id":1328,"type":"ClownOptions_SDE_Beta","pos":[14186.4755859375,-132.6126251220703],"size":[315,266],"flags":{"collapsed":true},"order":1,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3707],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1381,"type":"Note","pos":[13881.6279296875,-217.62835693359375],"size":[261.9539489746094,88],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease \"steps_to_run\" in ClownsharKSampler to change the effective denoise level."],"color":"#432","bgcolor":"#653"},{"id":1382,"type":"Note","pos":[14718.0498046875,-295.4144592285156],"size":[268.1851806640625,124.49711608886719],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increasing cycles will increase the amount of change, but take longer.\n\nCycles will rerun the same step over and over, forwards and backwards, iteratively refining an image at a controlled noise level."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[1395,13,0,431,0,"MODEL"],[1398,7,3,431,1,"LATENT"],[2692,431,0,907,0,"MODEL"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2983,7,0,907,3,"LATENT"],[3469,981,0,908,0,"LATENT"],[3533,1317,0,980,6,"OPTIONS"],[3578,907,0,980,4,"LATENT"],[3581,490,0,1333,0,"CLIP"],[3602,1333,0,907,1,"CONDITIONING"],[3604,1308,0,980,5,"GUIDES"],[3626,1333,0,980,1,"CONDITIONING"],[3627,970,0,980,2,"CONDITIONING"],[3670,1358,1,490,0,"*"],[3671,1358,2,14,0,"*"],[3682,1350,0,1362,0,"IMAGE"],[3685,1358,0,1363,0,"MODEL"],[3686,1363,0,13,0,"*"],[3688,1350,0,7,0,"IMAGE"],[3698,980,0,981,4,"LATENT"],[3699,1318,0,1308,3,"GUIDES"],[3707,1328,0,980,7,"OPTIONS"],[3708,1318,0,907,5,"GUIDES"],[3709,7,1,1308,0,"LATENT"],[3710,7,0,1318,0,"LATENT"],[3720,908,0,1377,0,"IMAGE"],[3721,1373,0,1378,0,"*"],[3724,1378,0,1350,1,"IMAGE"],[3725,1374,0,1379,0,"*"],[3726,1379,0,1371,0,"IMAGE"],[3727,1371,0,7,1,"IMAGE"],[3728,1371,0,1350,0,"IMAGE"],[3729,1378,0,1377,1,"IMAGE"],[3730,1380,0,1371,1,"INT"],[3731,1380,1,1371,2,"INT"],[3732,1380,0,7,5,"INT"],[3733,1380,1,7,6,"INT"]],"groups":[{"id":1,"title":"Model Loaders","bounding":[12796.72265625,-401.9004211425781,822.762451171875,436.0693359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Sampling","bounding":[13652.6533203125,-402.70721435546875,1470.8076171875,1409.0289306640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Input Prep","bounding":[12797.1396484375,77.69412231445312,817.4218139648438,820.6239624023438],"color":"#3f789e","font_size":24,"flags":{}},{"id":4,"title":"Save and Compare","bounding":[15180.705078125,-399.09112548828125,1050.6468505859375,615.8845825195312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.3072020475058237,"offset":[-10982.673431174471,526.9422127403179]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/hidream style transfer.json ================================================ {"last_node_id":1317,"last_link_id":3533,"nodes":[{"id":13,"type":"Reroute","pos":[13140,110],"size":[75,26],"flags":{},"order":11,"mode":0,"inputs":[{"name":"","type":"*","link":3509}],"outputs":[{"name":"","type":"MODEL","links":[1395],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":402,"type":"QuadrupleCLIPLoader","pos":[12690,150],"size":[407.7720031738281,130],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[1552],"slot_index":0}],"properties":{"Node name for S&R":"QuadrupleCLIPLoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors"]},{"id":490,"type":"Reroute","pos":[13140,150],"size":[75,26],"flags":{},"order":6,"mode":0,"inputs":[{"name":"","type":"*","link":1552}],"outputs":[{"name":"","type":"CLIP","links":[2881,3323],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14277.9453125,-92.8893051147461],"size":[340.20001220703125,510],"flags":{},"order":17,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3250},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,4,"resample",true]},{"id":908,"type":"VAEDecode","pos":[14640.490234375,-94.68604278564453],"size":[210,46],"flags":{},"order":18,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":909,"type":"SaveImage","pos":[14635.966796875,4.407815933227539],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":19,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[13253.2275390625,-90.14451599121094],"size":[260.3999938964844,126],"flags":{},"order":14,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1395},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2692],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":14,"type":"Reroute","pos":[13140,190],"size":[75,26],"flags":{},"order":8,"mode":0,"inputs":[{"name":"","type":"*","link":1344}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":403,"type":"UNETLoader","pos":[12780,20],"size":[320.7802429199219,82],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[3508],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn"]},{"id":404,"type":"VAELoader","pos":[12887.7998046875,328.069091796875],"size":[210,58],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[1344],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ae.sft"]},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[13637.08984375,660.7327270507812],"size":[246.31312561035156,286],"flags":{},"order":13,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3531},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3530],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[13918.0234375,-98.65141296386719],"size":[340.20001220703125,570],"flags":{},"order":16,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2971},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3530},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3250],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",1,4,"resample",true]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13250.6240234375,672.3837890625],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":12,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3515},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3532},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3531],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",896,1152,"red",false,"16_channels"]},{"id":1285,"type":"LoadImage","pos":[12887.7626953125,444.2932434082031],"size":[315,314],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3515],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (544).png","image"]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[13550.5615234375,-92.92960357666016],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":15,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2692},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3480},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2971],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","beta57",20,14,1,4,201,"fixed","unsample",true]},{"id":1309,"type":"LoadImage","pos":[12889.3486328125,815.3554077148438],"size":[315,314],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3532],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image Apr 29, 2025, 09_18_46 PM.png","image"]},{"id":1297,"type":"ReHiDreamPatcher","pos":[12779.865234375,-110.67424774169922],"size":[321.6453552246094,82],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3508}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3509],"slot_index":0}],"properties":{"Node name for S&R":"ReHiDreamPatcher","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["float32",true]},{"id":1224,"type":"CLIPTextEncode","pos":[13247.2734375,95.37741088867188],"size":[269.0397644042969,155.65545654296875],"flags":{"collapsed":false},"order":10,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3323}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3480],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a gritty illustration of a japanese woman with traditional hair in traditional clothes"]},{"id":970,"type":"CLIPTextEncode","pos":[13257.970703125,316.4944152832031],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":9,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[13959.880859375,541.2625122070312],"size":[265.2884826660156,178],"flags":{},"order":5,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[20,1,0.5,"none",-1,4]}],"links":[[18,14,0,7,4,"VAE"],[1344,404,0,14,0,"*"],[1395,13,0,431,0,"MODEL"],[1398,7,3,431,1,"LATENT"],[1552,402,0,490,0,"*"],[2692,431,0,907,0,"MODEL"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2971,907,0,980,4,"LATENT"],[2983,7,0,907,3,"LATENT"],[3250,980,0,981,4,"LATENT"],[3323,490,0,1224,0,"CLIP"],[3469,981,0,908,0,"LATENT"],[3480,1224,0,907,1,"CONDITIONING"],[3508,403,0,1297,0,"MODEL"],[3509,1297,0,13,0,"*"],[3515,1285,0,7,0,"IMAGE"],[3530,1308,0,980,5,"GUIDES"],[3531,7,1,1308,0,"LATENT"],[3532,1309,0,7,1,"IMAGE"],[3533,1317,0,980,6,"OPTIONS"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7398859252302459,"offset":[-10583.206320408986,234.77974623579652]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/hidream txt2img.json ================================================ {"last_node_id":1321,"last_link_id":3548,"nodes":[{"id":490,"type":"Reroute","pos":[13130,-70],"size":[75,26],"flags":{},"order":3,"mode":0,"inputs":[{"name":"","type":"*","link":3534}],"outputs":[{"name":"","type":"CLIP","links":[2881,3323],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1317,"type":"ClownModelLoader","pos":[12770,-90],"size":[315,266],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3539],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[3534],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[3535],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp16.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":14,"type":"Reroute","pos":[13130,-30],"size":[75,26],"flags":{},"order":4,"mode":0,"inputs":[{"name":"","type":"*","link":3535}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":970,"type":"CLIPTextEncode","pos":[13253.0546875,116.28263854980469],"size":[261.8798522949219,111.21334838867188],"flags":{},"order":5,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, low quality, bad quality, low detail, mutated, jpeg artifacts, compression artifacts,"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13253.044921875,283.4559020996094],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":7,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[3540],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1224,"type":"CLIPTextEncode","pos":[13250,-90],"size":[269.0397644042969,155.65545654296875],"flags":{"collapsed":false},"order":6,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3323}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3480],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a cold war era photograph from 1983 of a group of four friends holding up their hands inside an antique living room in a victorian era mansion"]},{"id":13,"type":"Reroute","pos":[13130,-110],"size":[75,26],"flags":{},"order":2,"mode":0,"inputs":[{"name":"","type":"*","link":3539}],"outputs":[{"name":"","type":"MODEL","links":[3548],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[13936.2919921875,12.050485610961914],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":10,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":908,"type":"VAEDecode","pos":[13934.587890625,-92.61396026611328],"size":[210,46],"flags":{},"order":9,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3537},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[13550.5615234375,-92.92960357666016],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3548},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3480},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3540},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3537],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_3m","bong_tangent",20,-1,1,4,0,"fixed","standard",true]},{"id":1321,"type":"Note","pos":[12769.740234375,239.9431915283203],"size":[345.97113037109375,161.35496520996094],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["There are many samplers to try, but res_2m, res_3m, res_2s, and res_3s are very reliable. If you want to push quality a bit higher in exchange for time, you could even try res_5s.\n\nres_2m and res_3m begin with higher order steps (one res_2s step, and two res_3s steps, respectively) to initialize the sampling process. Ultimately, the result is faster convergence in terms of wall time, as fewer steps end up being necessary."],"color":"#432","bgcolor":"#653"}],"links":[[18,14,0,7,4,"VAE"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[3323,490,0,1224,0,"CLIP"],[3480,1224,0,907,1,"CONDITIONING"],[3534,1317,1,490,0,"*"],[3535,1317,2,14,0,"*"],[3537,907,0,908,0,"LATENT"],[3539,1317,0,13,0,"*"],[3540,7,3,907,3,"LATENT"],[3548,13,0,907,0,"MODEL"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.9194342495775452,"offset":[-11336.810477400342,443.2870544682993]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/hidream unsampling data WF.json ================================================ {"last_node_id":637,"last_link_id":2029,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":11,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,5.5,"resample",true]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,false,0.5,1,"constant",0,-1,false]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[210,46],"flags":{"collapsed":false},"order":10,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",60,-1,1,1,0,"fixed","unsample",true]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":637,"type":"CLIPTextEncode","pos":[963.5917358398438,453.83306884765625],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,true,0.5,1,"beta57",0,10,false]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":2.1762913579017154,"offset":[427.0670817937978,488.9238245904811]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/hidream unsampling data.json ================================================ {"last_node_id":637,"last_link_id":2029,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":11,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,5.5,"resample",true]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,false,0.5,1,"constant",0,-1,false]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[210,46],"flags":{"collapsed":false},"order":10,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",60,-1,1,1,0,"fixed","unsample",true]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":637,"type":"CLIPTextEncode","pos":[963.5917358398438,453.83306884765625],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,true,0.5,1,"beta57",0,10,false]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":2.1762913579017154,"offset":[427.0670817937978,488.9238245904811]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/hidream unsampling pseudoimplicit.json ================================================ {"last_node_id":637,"last_link_id":2029,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":11,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[210,46],"flags":{"collapsed":false},"order":10,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":637,"type":"CLIPTextEncode","pos":[963.5917358398438,453.83306884765625],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",30,-1,1,1,0,"fixed","unsample",true]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,4,"resample",true]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["pseudoimplicit",false,false,0.5,1,"beta57",0,30,false]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["pseudoimplicit",false,false,0.5,1,"beta57",0,4,false]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7449402268886909,"offset":[544.7968662691544,737.2296697550046]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/hidream unsampling.json ================================================ {"last_node_id":637,"last_link_id":2029,"nodes":[{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2026}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2025},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":11,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":9,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,5.5,"resample",true]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[210,46],"flags":{"collapsed":false},"order":10,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2027}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2029},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",60,-1,1,1,0,"fixed","unsample",true]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2024}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":637,"type":"CLIPTextEncode","pos":[963.5917358398438,453.83306884765625],"size":[278.4529113769531,88],"flags":{"collapsed":false},"order":3,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2028}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2029],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":636,"type":"ClownModelLoader","pos":[599.3463745117188,-176.31788635253906],"size":[315,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2025],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2024,2028],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2026,2027],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn_fast","clip_l_hidream.safetensors","clip_g_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.5,1,"constant",0,-1,false]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":6,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,1,1,"beta57",0,15,false]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2013,629,0,630,3,"LATENT"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"],[2024,636,1,107,0,"CLIP"],[2025,636,0,632,0,"MODEL"],[2026,636,2,629,4,"VAE"],[2027,636,2,591,1,"VAE"],[2028,636,1,637,0,"CLIP"],[2029,637,0,630,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7449402268886909,"offset":[802.8733998149229,690.5491177830577]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/intro to clownsampling.json ================================================ {"last_node_id":876,"last_link_id":2046,"nodes":[{"id":453,"type":"VAEDecode","pos":[-303.0476379394531,3073.681640625],"size":[210,46],"flags":{"collapsed":false},"order":228,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1923},{"name":"vae","localized_name":"vae","type":"VAE","link":1940,"slot_index":1}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","shape":3,"links":[1365],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":606,"type":"LoraLoader","pos":[-2194.87353515625,3180.94482421875],"size":[359.7619323730469,126],"flags":{},"order":177,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1890},{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[1904],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[1937,1938],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["csbw_cascade_dark_ema.safetensors",1,1]},{"id":454,"type":"SaveImage","pos":[-303.3555603027344,3184.454345703125],"size":[753.4503784179688,734.7869262695312],"flags":{},"order":229,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1365}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":625,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[-648.0692138671875,3944.1982421875],"size":[310.79998779296875,82],"flags":{},"order":0,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1948],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[1536,1536]},{"id":626,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[-1372.7569580078125,3947.591064453125],"size":[310.79998779296875,82],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1951],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[24,24]},{"id":624,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[-1013.2625732421875,3947.5908203125],"size":[310.79998779296875,82],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1947],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[36,36]},{"id":609,"type":"UNETLoader","pos":[-1020.5138549804688,3045.097412109375],"size":[356.544677734375,82],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[1926],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader"},"widgets_values":["stage_b_lite_CSBW_v1.1.safetensors","default"]},{"id":621,"type":"VAELoader","pos":[-637.3134765625,3068.5341796875],"size":[294.6280212402344,58],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[1940],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["stage_a_ft_hq.safetensors"]},{"id":620,"type":"CLIPLoader","pos":[-2564.87353515625,3272.8349609375],"size":[344.635498046875,98],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[1939],"slot_index":0}],"properties":{"Node name for S&R":"CLIPLoader"},"widgets_values":["cascade_text_encoder.safetensors","stable_cascade","default"]},{"id":627,"type":"Note","pos":[-1381.849365234375,4086.07421875],"size":[331.63720703125,415.29815673828125],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage C: the original Stable Cascade version. \n\nStable Cascade latents are actually quite small: typically, a 1024x1024 image will be generated from a stage C latent that is only 24x24 (for comparison, with SDXL or SD1.5, the dimensions are 128x128). \n\n\"Compression\" is just a shorthand method of determining these dimensions, such as 24x24 (1024 / 42 = 24.38, which means a \"compression\" of 42).\n\nThis poses a problem though: Cascade was only trained on a handful of resolutions. The difference between 24x24 and 25x25 is a significant drop in quality and coherence. Therefore, it is best to just set these dimensions directly.\n\nThe best trained resolutions are:\n\n24x24 > 32x32\n30x16 > 40x24 \n\n48x24 also works, but seems to result in more doubling problems than the others.\n\n\n"],"color":"#432","bgcolor":"#653"},{"id":628,"type":"Note","pos":[-1012.45947265625,4084.7783203125],"size":[331.63720703125,415.29815673828125],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage UP: a patched version of Stable Cascade stage C (\"UltraPixel\"). \n\nThe key with these dimensions is to keep the aspect ratio the same as the stage C latent. Typically, best results are with a 1.5x upscale. 2.0x works, but will result in somewhat more issues with doubling, and can be a lot slower. However, the detail level will also be very high.\n\nSome viable resolutions are listed below. Asterisks signify ones that have been verified to work particularly well.\n\n32x32\n36x36 **\n40x40\n42x42\n48x48 *\n\n40x24\n50x30\n60x36 **\n70x42\n80x48 *\n\n72x36 \n80x40 *\n96x48 (very slow!)\n\n\n\n"],"color":"#432","bgcolor":"#653"},{"id":632,"type":"CheckpointLoaderSimple","pos":[-1073.474609375,2726.673583984375],"size":[452.7829895019531,102.89583587646484],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":null},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":null},{"name":"VAE","localized_name":"VAE","type":"VAE","links":null}],"properties":{"Node name for S&R":"CheckpointLoaderSimple"},"widgets_values":["cascade_B-lite_refined_CSBW_v1.1.safetensors"]},{"id":633,"type":"Note","pos":[-1075.468994140625,2892.701416015625],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is the stage B lite CSBW finetune (model only).\n\nhttps://huggingface.co/ClownsharkBatwing/Cascade_Stage_B_CSBW_Refined/blob/main/stage_b_lite_CSBW_v1.1.safetensors"],"color":"#432","bgcolor":"#653"},{"id":634,"type":"Note","pos":[-575.989501953125,2895.603271484375],"size":[547.0546875,91.47331237792969],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is a finetune of stage A. You will get a sharper image, but in images with large white areas, small circular grey halos are sometimes visible.\n\nhttps://huggingface.co/madebyollin/stage-a-ft-hq/blob/main/stage_a_ft_hq.safetensors"],"color":"#432","bgcolor":"#653"},{"id":630,"type":"Note","pos":[-3309.3076171875,3048.958984375],"size":[717.709228515625,165.61032104492188],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I recommend the BF16 version of stage C. There is no visible difference vs. the full precision weights, and it halves the disk space requirements.\n\nhttps://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_bf16.safetensors\n\nIMPORTANT: The original UltraPixel \"safetensors\" is not a safetensors at all - it is a PICKLE, where they lazily (at best) changed the file extension to \".safetensors\"!\n\nI converted it to a real safetensors file, and it's available below:\n\nhttps://huggingface.co/ClownsharkBatwing/ultrapixel_convert/blob/main/ultrapixel_t2i.safetensors"],"color":"#432","bgcolor":"#653"},{"id":584,"type":"UltraCascade_Loader","pos":[-2564.4580078125,3133.043212890625],"size":[345.5117492675781,82.95540618896484],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","shape":3,"links":[1890],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_Loader"},"widgets_values":["stage_c_bf16.safetensors","ultrapixel_t2i.safetensors"]},{"id":635,"type":"Note","pos":[-3307.105712890625,3272.173095703125],"size":[715.61083984375,89.37511444091797],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Any clip G will do. The Cascade version is available at:\n\nhttps://huggingface.co/stabilityai/stable-cascade/blob/main/text_encoder/model.bf16.safetensors\n\n"],"color":"#432","bgcolor":"#653"},{"id":636,"type":"Note","pos":[-3306.760009765625,3418.6708984375],"size":[715.61083984375,113.57872772216797],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The LORA was trained with OneTrainer (https://github.com/Nerogar/OneTrainer) on some of my own SDXL generations. It has deep colors and is strong with wacky paint, illustration, and vector art styles. \n\nCascade learns extremely quickly and is very adept with artistic styles (it knows many artist names).\n\nhttps://huggingface.co/ClownsharkBatwing/CSBW_Style/blob/main/csbw_cascade_dark_ema.safetensors\n"],"color":"#432","bgcolor":"#653"},{"id":629,"type":"Note","pos":[-647.965087890625,4084.8818359375],"size":[331.63720703125,415.29815673828125],"flags":{},"order":15,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage B: the Stable Cascade superresolution model.\n\nAs with stage UP, the key with these dimensions is to keep the aspect ratio the same as the prior latents. Theoretically, any resolution may be used, though some odd distortions can occur when the ideal upscale ratio is not used. It's not entirely clear what those ratios are, so some experimentation may be necessary. \n\nSome resolutions that work particularly well are:\n\n1536x1536 *\n2048x2048 *\n\n1600x960\n2560x1536 **\n2880x1792 *\n3200x1920\n\nIf you use stage B lite, you can hit 4k resolutions without even using more than 12GB of VRAM.\n\nIt's highly recommended to use the CSBW finetune of stage B, as it fixes many of the severe artifact problems the original release had.\n\nNote: CFG is not needed for this stage!"],"color":"#432","bgcolor":"#653"},{"id":637,"type":"Note","pos":[-1838.5732421875,2922.63671875],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":16,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Perturbed attention guidance (PAG) makes an enormous difference with Stable Cascade stages C and UP. Like CFG, it will double the runtime."],"color":"#432","bgcolor":"#653"},{"id":598,"type":"CLIPTextEncode","pos":[-1811.0350341796875,3205.474853515625],"size":[351.592529296875,173.00360107421875],"flags":{},"order":201,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1937}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1907,1911,1914],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["impasto oil painting by Yayoi Kusama and Lisa Frank, thick paint textures, tunning contrasts at night with stylish roughly drawn thick black lines, a nuclear explosion destroying a city, its towering wide glowing nuclear mushroom cloud enveloping the entire skyline, the nuclear fireball lighting up the dark sky"]},{"id":601,"type":"UltraCascade_PerturbedAttentionGuidance","pos":[-1808.5911865234375,3084.306884765625],"size":[344.3999938964844,58],"flags":{},"order":200,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1904}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[1909,1910],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_PerturbedAttentionGuidance"},"widgets_values":[3]},{"id":599,"type":"CLIPTextEncode","pos":[-1814.4205322265625,3435.57763671875],"size":[356.2470703125,110.6326904296875],"flags":{},"order":202,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1938}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1908,1912,1915],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, bad quality, low detail, blurry, unsharp"]},{"id":631,"type":"Note","pos":[-1557.671142578125,2725.4599609375],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":17,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is a checkpoint that, for convenience, includes the stage B lite CSBW finetune, clip G, and stage A (the FT_HQ finetune).\n\nhttps://huggingface.co/ClownsharkBatwing/CSBW_Style/blob/main/cascade_B-lite_refined_CSBW_v1.1.safetensors"],"color":"#432","bgcolor":"#653"},{"id":649,"type":"Note","pos":[2011.257080078125,3860],"size":[282.2704772949219,88],"flags":{},"order":18,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Since \"steps_to_run\" is set to -1,\nthis will run all remaining steps."],"color":"#432","bgcolor":"#653"},{"id":648,"type":"Note","pos":[1661.257080078125,3860],"size":[283.8087463378906,88],"flags":{},"order":19,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Runs the next 10 steps (out of 30)."],"color":"#432","bgcolor":"#653"},{"id":657,"type":"ClownsharKSampler_Beta","pos":[1710,3140],"size":[296.93646240234375,418],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,0,"fixed","standard",true]},{"id":680,"type":"ClownSampler_Beta","pos":[1050,3140],"size":[283.6876220703125,174],"flags":{},"order":21,"mode":0,"inputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"sampler","localized_name":"sampler","type":"SAMPLER","links":[1973]}],"properties":{"Node name for S&R":"ClownSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s",-1,"fixed",true]},{"id":685,"type":"Note","pos":[3440,5450],"size":[280.6243896484375,109.73818969726562],"flags":{},"order":22,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["NOTE: \"epsilon_scales\" is currently unused, but exists as a placeholder. \n\n\"frame_weights\" is for video models such as Hunyuan. This is for use with guides."],"color":"#432","bgcolor":"#653"},{"id":713,"type":"Note","pos":[4574.66552734375,4613.29833984375],"size":[280.0735168457031,88],"flags":{},"order":23,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["INPAINTING TIP: Try using the settings to the right with a feathered mask."],"color":"#432","bgcolor":"#653"},{"id":670,"type":"SigmasSchedulePreview","pos":[3850,5410],"size":[315,270],"flags":{},"order":24,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null}],"outputs":[],"properties":{"Node name for S&R":"SigmasSchedulePreview"},"widgets_values":["hard",0.25,1,1,1,"beta57",30,2.1,0]},{"id":654,"type":"BetaSamplingScheduler","pos":[1420,2780],"size":[210,106],"flags":{},"order":25,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"SIGMAS","localized_name":"SIGMAS","type":"SIGMAS","links":null}],"properties":{"Node name for S&R":"BetaSamplingScheduler"},"widgets_values":[20,0.5,0.7]},{"id":653,"type":"Note","pos":[1390,2940],"size":[252.12789916992188,117.73304748535156],"flags":{},"order":26,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["\"beta57\" is equivalent to the BetaSamplingScheduler node above. I have found the results to be generally superior to the default \"beta\" (where the values are both set to 0.60).\n\n\n"],"color":"#432","bgcolor":"#653"},{"id":643,"type":"Note","pos":[751.2572021484375,4100],"size":[507.688720703125,165.58355712890625],"flags":{},"order":27,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["\"steps_to_run\": When set to -1, it will run all steps per usual. \n\nIf set to a positive value, it will run that number of steps, and then stop and pass the latent off to the next sampler node.\n\nIf the next sampler node's \"sampler_mode\" is set to \"resample\", it will then continue where the first one left off. \n\nThis even works with multistep samplers, as it carries its \"momentum\" from node to the next. This is not the case for \"KSampler (Advanced)\", or any other sampler nodes that I'm aware of."],"color":"#432","bgcolor":"#653"},{"id":724,"type":"CLIPTextEncode","pos":[990,4960],"size":[210,88],"flags":{},"order":28,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":null}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1982,1983],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":722,"type":"ClownGuide_Beta","pos":[1250,5430],"size":[315,290],"flags":{},"order":29,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1984],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,false,0.5,1,"constant",0,1000,false]},{"id":720,"type":"ClownsharKSampler_Beta","pos":[1260,4940],"size":[315,418],"flags":{},"order":178,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1982},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1983},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1984},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0,"multistep/res_2m","beta57",30,-1,1,1,0,"fixed","unsample",true]},{"id":721,"type":"ClownsharKSampler_Beta","pos":[1620,4940],"size":[315,418],"flags":{},"order":183,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1985},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0,"multistep/res_2m","beta57",30,-1,1,5.5,-1,"fixed","resample",true]},{"id":727,"type":"Note","pos":[890,5170],"size":[333.3896179199219,108.9758071899414],"flags":{},"order":30,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["UNSAMPLER SETTINGS: \n\nEta should usually be 0.0. \nCFG should be 1.0, and used with an empty prompt.\n\nDenoise < 1.0 can help with adherence to the unsampled image."],"color":"#432","bgcolor":"#653"},{"id":731,"type":"Note","pos":[1980,5160],"size":[364.70263671875,103.89823150634766],"flags":{},"order":31,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Ensure resampler denoise matches the unsampler denoise.\n\nLow eta values can be used here (try 0.1 to 0.25). Sometimes they can actually improve adherence to the unsampled image."],"color":"#432","bgcolor":"#653"},{"id":733,"type":"Note","pos":[880,5530],"size":[339.3138122558594,133.51815795898438],"flags":{},"order":32,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Typical guide settings for unsampling/resampling with a rectified flow model (AuraFlow, SD3.5, Flux) are to the right.\n\nThis will generally NOT work well with UNSAMPLING SD1.5, SDXL, Cascade, etc.! (These guide nodes however work great as regular guides with these models!)"],"color":"#432","bgcolor":"#653"},{"id":659,"type":"Note","pos":[1427.1387939453125,3646.506591796875],"size":[352.2813415527344,88],"flags":{},"order":33,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["All of the configurations above will have the same output (and runtime) as the chained samplers below."],"color":"#432","bgcolor":"#653"},{"id":610,"type":"ClownsharKSampler_Beta","pos":[-1373.449462890625,3188.063232421875],"size":[311.41375732421875,693.9824829101562],"flags":{},"order":213,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1909},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1907},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1908},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1951},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1949],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,1,"fixed","standard",true]},{"id":612,"type":"ClownsharKSampler_Beta","pos":[-1014.779296875,3187.209228515625],"size":[314.421142578125,693.9824829101562],"flags":{},"order":221,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1910},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1911},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1912},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1949},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1947},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1950],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,-1,"fixed","standard",true]},{"id":613,"type":"ClownsharKSampler_Beta","pos":[-648.0813598632812,3185.39013671875],"size":[309.2452087402344,691.814208984375],"flags":{},"order":226,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1926},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1914},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1915},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1950},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1948},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1923],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,1,-1,"fixed","standard",true]},{"id":716,"type":"Note","pos":[4574.66552734375,4963.29833984375],"size":[270.65277099609375,108.61186218261719],"flags":{},"order":34,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["INPAINTING TIP: Try using the settings to the right with a feathered mask, and \"end_step\" set to the number of sampling steps (or less). This will allow the entire image to change slightly to help heal any seams that may appear."],"color":"#432","bgcolor":"#653"},{"id":714,"type":"ClownGuide_Beta","pos":[4874.66552734375,4503.29833984375],"size":[257.2991638183594,290],"flags":{},"order":35,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,false,1,1,"constant",0,1000,false]},{"id":715,"type":"ClownGuide_Beta","pos":[4874.66552734375,4863.29833984375],"size":[254.67617797851562,290],"flags":{},"order":36,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,true,1,1,"beta57",0,40,false]},{"id":709,"type":"Note","pos":[5570,4480],"size":[280.0735168457031,88],"flags":{},"order":37,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Note: The \"guide_masked\" latent image will control the region that is \"masked out\"! And vice versa with \"guide_unmasked\"."],"color":"#432","bgcolor":"#653"},{"id":688,"type":"ClownGuides_Beta","pos":[5542.884765625,3927.678955078125],"size":[315,450],"flags":{},"order":38,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":null},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1977],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["epsilon",false,true,0.75,0.75,1,1,"beta57","constant",0,0,15,15,false]},{"id":707,"type":"ClownGuide_Beta","pos":[5206.15283203125,3929.6015625],"size":[315,290],"flags":{},"order":39,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["pseudoimplicit",false,false,0.15,1,"beta57",5,15,false]},{"id":706,"type":"Note","pos":[5228.6552734375,4270.94677734375],"size":[280.0735168457031,88],"flags":{},"order":40,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: Try a delayed start (start_step > 0), like shown above with pseudoimplicit, for wacky results!"],"color":"#432","bgcolor":"#653"},{"id":711,"type":"Note","pos":[5570,4610],"size":[280.0735168457031,88],"flags":{},"order":41,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: I recommend drawing your masks on random load image nodes, for convenience."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[5546.97216796875,3772.719970703125],"size":[308.80828857421875,88],"flags":{},"order":42,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["INPAINTING TIP: Use ClownRegionalConditioning ClownGuides together with the same mask!"],"color":"#432","bgcolor":"#653"},{"id":744,"type":"Note","pos":[7538.74755859375,3817.72216796875],"size":[337.9170227050781,389.18304443359375],"flags":{},"order":43,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["It can be confusing at first, trying to understand which area is affected by which conditioning or mask. I suggest starting with prompts like \"blue ice\" and \"red fire\" with region_bleed = 0.0 to clear things up.\n\nTO THE LEFT:\n\nThe two nodes to the left will automatically create an unmasked area, based on what areas are not masked by mask, mask_A, or mask_B.\n\nAs an example:\n\nPositive_A will affect the area masked by \"mask_A\".\n\nPositive_B will affect the area masked by \"mask_B\".\n\nPositive_unmasked will affect the area that is not masked by \"mask_A\" or \"mask_B\".\n\nTO THE RIGHT:\n\nThese two nodes give you manual control over the area for each prompt. This is especially useful for temporal attention with video modes like WAN. The risk is if you fail to ensure every part of the image (or frame) is masked by one of the masks, you'll end up with an unconditioned area that will look like pure noise."],"color":"#432","bgcolor":"#653"},{"id":756,"type":"TemporalCrossAttnMask","pos":[7017.90087890625,4790.6005859375],"size":[210,82],"flags":{},"order":44,"mode":0,"inputs":[],"outputs":[{"name":"temporal_mask","localized_name":"temporal_mask","type":"MASK","links":[1988]}],"properties":{"Node name for S&R":"TemporalCrossAttnMask"},"widgets_values":[1,65]},{"id":757,"type":"TemporalCrossAttnMask","pos":[7017.12060546875,4954.5556640625],"size":[210,82],"flags":{},"order":45,"mode":0,"inputs":[],"outputs":[{"name":"temporal_mask","localized_name":"temporal_mask","type":"MASK","links":[1989],"slot_index":0}],"properties":{"Node name for S&R":"TemporalCrossAttnMask"},"widgets_values":[65,133]},{"id":758,"type":"Note","pos":[7644.2734375,4659.04248046875],"size":[275.73828125,88],"flags":{},"order":46,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Sometimes it is beneficial to allow self-attention masks to overlap slightly. This is similar to the \"edge_width\" parameter above, except it overlaps frames, not spatial components (areas of the image)."],"color":"#432","bgcolor":"#653"},{"id":751,"type":"TemporalSplitAttnMask","pos":[7668.654296875,4808.83642578125],"size":[210,130],"flags":{},"order":47,"mode":0,"inputs":[],"outputs":[{"name":"temporal_mask","localized_name":"temporal_mask","type":"MASK","links":[1986],"slot_index":0}],"properties":{"Node name for S&R":"TemporalSplitAttnMask"},"widgets_values":[1,69,1,65]},{"id":753,"type":"TemporalSplitAttnMask","pos":[7668.654296875,4998.83642578125],"size":[210,130],"flags":{},"order":48,"mode":0,"inputs":[],"outputs":[{"name":"temporal_mask","localized_name":"temporal_mask","type":"MASK","links":[1987],"slot_index":0}],"properties":{"Node name for S&R":"TemporalSplitAttnMask"},"widgets_values":[61,133,65,133]},{"id":749,"type":"Note","pos":[6907.34326171875,4623.6328125],"size":[280.0735168457031,88],"flags":{},"order":49,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The advanced version of the WAN patcher can set a sliding self-attention window. The \"size\" is the number of latent frames (which is 1/4th the number of frames in the final output)."],"color":"#432","bgcolor":"#653"},{"id":689,"type":"Note","pos":[4562.91796875,3811.044189453125],"size":[494.1324462890625,535.6380004882812],"flags":{},"order":50,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Guides are a way of controlling the image generation process without denoising an image directly, but by steering the denoising process itself. This can mimic many of the benefits of unsampling, without the need to spend extra time unsampling the image.\n\nThere are two main guide modes:\n\n\"Epsilon\" can be used in conjunction with unsampling/resampling workflows to dramatically improve results with rectified flow models (AuraFlow, SD3.5, Flux). It can also be used directly. It works by modifying the noise prediction made by the model to align with the guide image.\n\n\"Pseudoimplicit\" works by lying to the model about the state of the denoising process, so that it generates a noise prediction that strongly aligns with the guide image. \"Fully_pseudoimplicit\" is only supported with \"fully_implicit\" and \"diag_implicit\" samplers (all others will default back to pseudoimplicit).\n\nChannelwise and projection modes can have a dramatic effect. I especially recommend trying epsilon with these modes, though they are quite interesting with pseudoimplicit as well. \"projection_mode\" can result in some issues with image details if used for the entire sampling process.\n\nCUTOFF:\n\nFlux has extremely strong self-attention, and has issues with getting \"stuck\" if the guide strength is too high (or used for too many steps), which results in an output that looks nearly identical to the guide. \"cutoff\" does a crude check for how similar the image is to the guide - if it exceeds that value, it will turn off the guide for that step. Try setting to 0.5 or 0.6 when using Flux.\n\nWEIGHT SCHEDULERS:\n\nThese control the weight at each step. For example, with the settings shown:\n\n * the \"unmasked\" region will have a weight of 0.75 for the first 15 steps, then 0.0 for every step after that\n\n * the \"masked\" region will start with a weight of 0.75 for the first step, gradually declining until reaching 0.0 after 15 steps (and remaining at 0.0)\n"],"color":"#432","bgcolor":"#653"},{"id":726,"type":"Note","pos":[5630,5410],"size":[257.97479248046875,159.16941833496094],"flags":{},"order":51,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["VAEEncodeAdvanced is a quality of life node for convenience when using multiple guides.\n\nNote: the mask input is for a black and white image. It is there for convenience with converting any masks you may have saved as black and white images into masks you can use in a workflow."],"color":"#432","bgcolor":"#653"},{"id":691,"type":"ClownsharKSampler_Beta","pos":[5894.66552734375,3923.29833984375],"size":[274.2724609375,418],"flags":{},"order":179,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1977},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_2m","beta57",30,-1,1,5.5,0,"fixed","standard",true]},{"id":695,"type":"ModelSamplingAdvancedResolution","pos":[8110,3210],"size":[260.3999938964844,126],"flags":{},"order":215,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1980},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":701,"type":"Note","pos":[7080,3060],"size":[327.4920959472656,88],"flags":{},"order":52,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Loader nodes are provided for convenience with Flux and SD3.5. The Flux loader can also load the Redux (and ClipVision) models for you."],"color":"#432","bgcolor":"#653"},{"id":694,"type":"FluxGuidanceDisable","pos":[7790,3210],"size":[210,82],"flags":{},"order":208,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1979}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1980],"slot_index":0}],"properties":{"Node name for S&R":"FluxGuidanceDisable"},"widgets_values":[true,true]},{"id":699,"type":"Note","pos":[7760,3030],"size":[253.01846313476562,112.91952514648438],"flags":{},"order":53,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This disables \"Flux Guidance\" (which is actually NOT disabled by setting to 1.0 or 0.0). It can be helpful in many cases where you wish to banish the \"Flux look\" to the bottom of a creepy old well in Transylvania."],"color":"#432","bgcolor":"#653"},{"id":660,"type":"ClownsharKSampler_Beta","pos":[3907.121826171875,3512.491943359375],"size":[293.78173828125,618],"flags":{},"order":205,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1962},{"name":"options 2","type":"OPTIONS","link":1963},{"name":"options 3","type":"OPTIONS","link":1991},{"name":"options 4","type":"OPTIONS","link":1968},{"name":"options 5","type":"OPTIONS","link":1971},{"name":"options 6","type":"OPTIONS","link":1972},{"name":"options 7","type":"OPTIONS","link":1974},{"name":"options 8","type":"OPTIONS","link":1990},{"name":"options 9","type":"OPTIONS","link":2003},{"name":"options 10","type":"OPTIONS","link":2007},{"name":"options 11","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,-1,"fixed","standard",true]},{"id":647,"type":"Note","pos":[1321.257080078125,3860],"size":[288.0400390625,88],"flags":{},"order":54,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Runs the first 7 steps (out of 30)."],"color":"#432","bgcolor":"#653"},{"id":640,"type":"ClownsharKSampler_Beta","pos":[1321.257080078125,4010],"size":[296.93646240234375,418],"flags":{},"order":55,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1952],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,7,1,5.5,0,"fixed","standard",true]},{"id":641,"type":"ClownsharKSampler_Beta","pos":[1671.257080078125,4010],"size":[288.4732666015625,418],"flags":{},"order":182,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1952},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1953],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,10,1,5.5,-1,"fixed","resample",true]},{"id":642,"type":"ClownsharKSampler_Beta","pos":[2021.257080078125,4010],"size":[291.5506286621094,422.6160888671875],"flags":{},"order":203,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1953},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,-1,"fixed","resample",true]},{"id":729,"type":"Note","pos":[1666.1065673828125,4786.38134765625],"size":[210,88],"flags":{},"order":56,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":[" RESAMPLER NODE"],"color":"#432","bgcolor":"#653"},{"id":650,"type":"Note","pos":[1771.257080078125,4480],"size":[453.94183349609375,144.25192260742188],"flags":{},"order":57,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["IMPORTANT: sampler_mode is set to \"resample\"!\n\nALSO: seed is set to -1!\n\nThis means \"continue where the last sampler left off\", as in, use the next available unused seed.\n\nIf you set it to another value, the noise sampler that is used at every step might reuse a seed, which can cause the image to burn.\n\n"],"color":"#432","bgcolor":"#653"},{"id":738,"type":"ReHiDreamPatcher","pos":[6490,4280],"size":[210,82],"flags":{},"order":58,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ReHiDreamPatcher"},"widgets_values":["float32",true]},{"id":739,"type":"ReSD35Patcher","pos":[6490,4420],"size":[210,82],"flags":{},"order":59,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ReSD35Patcher"},"widgets_values":["float32",true]},{"id":740,"type":"ReAuraPatcher","pos":[6490,4560],"size":[210,82],"flags":{},"order":60,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null,"slot_index":0}],"properties":{"Node name for S&R":"ReAuraPatcher"},"widgets_values":[true,true]},{"id":741,"type":"ReWanPatcher","pos":[6490,4680],"size":[210,58],"flags":{},"order":61,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ReWanPatcher"},"widgets_values":[true]},{"id":658,"type":"ClownsharKSampler_Beta","pos":[2070,3140],"size":[296.93646240234375,418],"flags":{},"order":62,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,10000,1,5.5,0,"fixed","standard",true]},{"id":734,"type":"Note","pos":[1830,2950],"size":[433.063232421875,101.85264587402344],"flags":{},"order":63,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["NOTE: steps_to_run = -1 means to run all steps (the usual default approach for any sampler).\n\nOn the right, steps_to_run > steps, so it will run all the way till the end, just like on the left. This is the approach traditionally used in KSampler (Advanced)."],"color":"#432","bgcolor":"#653"},{"id":765,"type":"Note","pos":[-3299.93603515625,2699.88427734375],"size":[389.86285400390625,98.29244232177734],"flags":{},"order":64,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["THESE NODES ARE NOT REQUIRED TO USE RES4LYF!!!\n\nThese descriptions are included only out of a desire to consolidate all CSBW node documentation into one location."],"color":"#432","bgcolor":"#653"},{"id":728,"type":"Note","pos":[1311.5924072265625,4784.7666015625],"size":[213.4912109375,88],"flags":{},"order":65,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":[" UNSAMPLER NODE"],"color":"#432","bgcolor":"#653"},{"id":723,"type":"ClownGuide_Beta","pos":[1620,5430],"size":[315,290],"flags":{},"order":66,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1985],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,true,0.75,1,"beta57",0,10,false]},{"id":766,"type":"Note","pos":[1980.62939453125,5502.46337890625],"size":[366.45068359375,97.77838134765625],"flags":{},"order":67,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: \"projection\" and \"channelwise\" modes can increase the intensity of the effect with epsilon and data guide modes. Sometimes, this effect is very desirable. It's worth experimenting with."],"color":"#432","bgcolor":"#653"},{"id":732,"type":"Note","pos":[1981.25732421875,5003.00537109375],"size":[361.62445068359375,90.62290954589844],"flags":{},"order":68,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: multistep samplers usually adhere to unsampled images more effectively than others."],"color":"#432","bgcolor":"#653"},{"id":768,"type":"ClownGuides_Beta","pos":[4877.3896484375,5235.57373046875],"size":[333.3587951660156,450],"flags":{},"order":69,"mode":0,"inputs":[{"name":"guide_masked","localized_name":"guide_masked","type":"LATENT","shape":7,"link":null},{"name":"guide_unmasked","localized_name":"guide_unmasked","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights_masked","localized_name":"weights_masked","type":"SIGMAS","shape":7,"link":null},{"name":"weights_unmasked","localized_name":"weights_unmasked","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuides_Beta"},"widgets_values":["lure",false,false,1,1,1,1,"linear_quadratic","constant",0,0,8,-1,false]},{"id":769,"type":"Note","pos":[4576.1689453125,5281.28271484375],"size":[266.2802734375,135.71385192871094],"flags":{},"order":70,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["INPAINTING TIP: Try using the settings to the right with your input image connected to both the guide_masked and guide_unmasked inputs. Adjust \"end_step_masked\" to change the strength of the inpainting effect (or weight_masked, or eight_scheduler_masked)."],"color":"#432","bgcolor":"#653"},{"id":725,"type":"VAEEncodeAdvanced","pos":[5930,5410],"size":[255.3756103515625,278],"flags":{},"order":71,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":null}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":null},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":null},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":735,"type":"ClownGuide_Beta","pos":[5147.18896484375,4861.30419921875],"size":[254.67617797851562,290],"flags":{},"order":72,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[1992],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["flow",false,false,1,1,"constant",0,40,false]},{"id":770,"type":"ClownsharKSampler_Beta","pos":[5451.8759765625,4763.7705078125],"size":[315,438],"flags":{},"order":184,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":1992},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1994},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_2m","beta57",30,-1,1,5.5,0,"randomize","standard",true]},{"id":772,"type":"SharkOptions_GuideCond_Beta","pos":[5230.13525390625,5239.04736328125],"size":[210,98],"flags":{},"order":73,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1994]}],"properties":{"Node name for S&R":"SharkOptions_GuideCond_Beta"},"widgets_values":[5.5]},{"id":773,"type":"Note","pos":[5229.7958984375,5385.55810546875],"size":[272.242919921875,112.58575439453125],"flags":{},"order":74,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["When using \"flow\" mode a second set of conditionings can be added that will be used to evolve the guide itself to sync up better with your image during sampling. Try describing the guide with some creative liberties to bend things in the desired stylistic direction."],"color":"#432","bgcolor":"#653"},{"id":774,"type":"Note","pos":[4576.4013671875,5501.66796875],"size":[266.2802734375,135.71385192871094],"flags":{},"order":75,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["TIP: ClownGuides allows you to use multiple input images, each with their own separate schedule and strength settings. There's a lot of creative possibilities here, especially when combined with regional conditioning sharing the same mask!"],"color":"#432","bgcolor":"#653"},{"id":746,"type":"ClownRegionalConditioning_AB","pos":[7918.16943359375,3816.63427734375],"size":[248.7556610107422,350],"flags":{},"order":76,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":null},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":null},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning_AB"},"widgets_values":[1,0,0,"constant",0,-1,"boolean",128,false]},{"id":747,"type":"ClownRegionalConditioning_ABC","pos":[7916.001953125,4221.97314453125],"size":[250.51895141601562,390],"flags":{},"order":77,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_C","localized_name":"conditioning_C","type":"CONDITIONING","shape":7,"link":null},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":null},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":null},{"name":"mask_C","localized_name":"mask_C","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning_ABC"},"widgets_values":[1,0,0,"constant",0,100,"boolean",128,false]},{"id":743,"type":"ClownRegionalConditioning3","pos":[7224.5439453125,4216.19189453125],"size":[287.20001220703125,370],"flags":{},"order":78,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":null},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":null},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning3"},"widgets_values":[1,0,0,"constant",0,100,"boolean",128,false]},{"id":754,"type":"ClownRegionalConditioning_AB","pos":[7261.39306640625,4816.611328125],"size":[248.7556610107422,350],"flags":{},"order":180,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":null},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":1988},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":1989},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning_AB"},"widgets_values":[1,0,0,"constant",0,-1,"boolean",128,false]},{"id":752,"type":"ClownRegionalConditioning_AB","pos":[7918.654296875,4798.83642578125],"size":[248.7556610107422,350],"flags":{},"order":181,"mode":0,"inputs":[{"name":"conditioning_A","localized_name":"conditioning_A","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_B","localized_name":"conditioning_B","type":"CONDITIONING","shape":7,"link":null},{"name":"mask_A","localized_name":"mask_A","type":"MASK","shape":7,"link":1986},{"name":"mask_B","localized_name":"mask_B","type":"MASK","shape":7,"link":1987},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning_AB"},"widgets_values":[1,0,0,"constant",0,-1,"boolean",128,false]},{"id":777,"type":"ClownRegionalConditioning2","pos":[7226.02978515625,3817.949462890625],"size":[287.20001220703125,330],"flags":{},"order":79,"mode":0,"inputs":[{"name":"conditioning_masked","localized_name":"conditioning_masked","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditioning2"},"widgets_values":[1,0,0,"constant",0,-1,"boolean",128,false]},{"id":705,"type":"Note","pos":[6811.22021484375,3808.38671875],"size":[379.8222351074219,549.5839233398438],"flags":{},"order":80,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["\"weight\" affects how strongly the attention mask is applied, which controls how well the masked and unmasked regions are separated. \n\n\"region_bleed\" affects how much the regions can \"talk\" with each other (via self-attention). region_bleed=0.0 will ensure the strongest possible separation, but higher values can help prevent visible seams from forming along the edges of the masked areas.\n\n\nWEIGHT SCHEDULER:\n\nThis controls the weight (strength of separation of the regions) at each step. For example, with the settings shown, the weight will begin at 1.70, and gradually decline before reaching 0.0 after 10 steps (and remaining at 0.0).\n\n\"mask_type\" currently only has the \"gradient\" option, but others may be added later. \n\nYes, this does mean you can use masks with gradients (so you can feather and blur them if you wish)!\n\nMASK_TYPE:\n\nThere are options here that are a bit like causal attention in LLMs. For example, \"boolean_masked\" means the masked area can \"see\" the entire image (via self-attention), while the unmasked area cannot \"see\" the masked area. This is very useful with Flux if you wish to generate a character close to the camera but have an unblurred background. Place the character in the masked area, describe only the background in the unmasked area, select \"boolean_masked\" and set region_bleed = 0.0. \n\nEDGE_WIDTH:\n\nThis creates overlapping self-attention at the boundaries between masked and unmasked areas. This helps to conceal seams. Try values like 50 or 150 to start, and watch the preview.\n\n"],"color":"#432","bgcolor":"#653"},{"id":776,"type":"ClownRegionalConditionings","pos":[9220.0224609375,3819.96826171875],"size":[238.2400665283203,266],"flags":{},"order":222,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":2000},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":null}],"properties":{"Node name for S&R":"ClownRegionalConditionings"},"widgets_values":[0.9,0.25,0,"constant",0,-1,"boolean",false]},{"id":782,"type":"Note","pos":[8261.9765625,4016.659423828125],"size":[272.1261291503906,131.35166931152344],"flags":{},"order":81,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["\"Spineless\" mode means that the region \"has no spine\" and is susceptible to influence by other regions (via self-attention). This is comparable to the \"boolean_masked\" etc. modes in the nodes to the left. For example, \"boolean_masked\" sets the masked area to \"spineless\"."],"color":"#432","bgcolor":"#653"},{"id":785,"type":"Note","pos":[8574.7734375,4022.382080078125],"size":[210,97.39286804199219],"flags":{},"order":82,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Unlimited regions may be set using these nodes. Up to 12 regions have been successfully tested in a single workflow."],"color":"#432","bgcolor":"#653"},{"id":786,"type":"Note","pos":[8979.33203125,4020.87939453125],"size":[212.89056396484375,176.87088012695312],"flags":{},"order":83,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The risk of manual control over all masks is that you miss an area (some part ends up not being covered by any mask) which means it then has no conditioning. \n\nThis is easily avoided by simply not hooking up a mask to the final node. It will use any remaining unmasked area as the final mask."],"color":"#432","bgcolor":"#653"},{"id":755,"type":"Note","pos":[7241.16015625,4666.3251953125],"size":[275.73828125,88],"flags":{},"order":84,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["If sliding self-attention is used, only cross-attention needs to be masked."],"color":"#432","bgcolor":"#653"},{"id":748,"type":"ReWanPatcherAdvanced","pos":[6702.76953125,4816.7041015625],"size":[279.3623352050781,214],"flags":{},"order":85,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ReWanPatcherAdvanced"},"widgets_values":["all","all",true,"standard",60]},{"id":750,"type":"Note","pos":[6444.103515625,4815.1484375],"size":[225.1619873046875,212.99703979492188],"flags":{},"order":86,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Sliding self-attention is useful for generating sequences where the conditioning changes from one frame to another, or for reducing VRAM requirements and reducing inference time when generating long sequences. At least 601 frames can be generated in one shot on a RTX 4090 with the above settings.\n\nThere are two modes: standard and circular. Circular allows the first frame to \"see\" the last frame, whereas standard does not."],"color":"#432","bgcolor":"#653"},{"id":742,"type":"Note","pos":[6420.9990234375,3806.16064453125],"size":[345.86224365234375,263.46356201171875],"flags":{},"order":87,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Regional conditioning requires for you to patch the model with a \"Re\" (for Regional) patcher (shown below) and to use the beta versions of either ClownSampler + SharkSampler, or ClownSharkSampler.\n\nFully compatible with Flux Redux, CFG, etc.\n\nHiDream notes:\nRegional negative conditioning is currently supported with HiDream and is useful for controlling styles (i.e., \"photo\" in a region that should be a painting, and vice versa). \n\nWith HiDream, weight and region_bleed may also be set to negative values. The effect in terms of strength is the same for -0.9 vs 0.9, but it will change whether it operates on initial or final blocks in the model. The effect can be quite different.\n"],"color":"#432","bgcolor":"#653"},{"id":737,"type":"ReFluxPatcher","pos":[6490,4140],"size":[210,82],"flags":{},"order":88,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":778,"type":"ClownRegionalConditioning","pos":[8500,3820],"size":[211.60000610351562,122],"flags":{},"order":185,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1995},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1996],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]},{"id":775,"type":"ClownRegionalConditioning","pos":[8260,3820],"size":[211.60000610351562,122],"flags":{},"order":89,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":null},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1995],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[true,128]},{"id":779,"type":"ClownRegionalConditioning","pos":[8740.115234375,3820.114990234375],"size":[211.60000610351562,122],"flags":{},"order":204,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1996},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[1999],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]},{"id":783,"type":"ClownRegionalConditioning","pos":[8975.990234375,3820.1171875],"size":[211.60000610351562,122],"flags":{},"order":214,"mode":0,"inputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","shape":7,"link":1999},{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null}],"outputs":[{"name":"cond_regions","localized_name":"cond_regions","type":"COND_REGIONS","links":[2000],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning"},"widgets_values":[false,128]},{"id":651,"type":"Note","pos":[1041.0577392578125,2843.751953125],"size":[304.6747741699219,235.28672790527344],"flags":{},"order":90,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["SDE NOISE:\n\n\"eta\" represents how much noise the sampler adds after each step. If set to 0.0, the samplers will be \"ODEs\". If set to > 0.0, they will be \"SDEs\" and/or \"ancestral\". \n\nThe math has been carefully designed for both variance preserving and exploding models: results are particularly good with SD1.5, SDXL, Stable Cascade, Auraflow, SD3.5 Medium, and Flux. \n\nIn most cases, using eta will result in gains in quality and coherence when using at least 20 sampling steps. Best results are with 30 or more. \n"],"color":"#432","bgcolor":"#653"},{"id":638,"type":"Note","pos":[690,2842.6943359375],"size":[321.5638427734375,270.1020202636719],"flags":{},"order":91,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["SPEED:\n\nAll multistep samplers, like Euler, use one model call per step. Therefore, they run at the same speed.\n\nAll others have the number of model calls per step listed at the end of the name in terms of \"stages\" (abbreviated \"s\").\n\nTherefore, \"res_2s\" has 2 stages, and uses 2 model calls per step. Each step will take 2x as long as a Euler step. \"ralston_3s\" will take 3x as long.\n\nImplicit samplers benefit enormously from an extra model call to initialize each step. Therefore, \"gauss-legendre_2s\" will actually use 3 model calls per step.\n\n"],"color":"#432","bgcolor":"#653"},{"id":681,"type":"Note","pos":[691.0578002929688,3171.1572265625],"size":[320.96875,168.8627166748047],"flags":{},"order":92,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["IMPORTANT: the seed here is set to -1! \n\nThis means \"use the next available seed\" (which will be the most recently used seed + 1).\n\nThis setting is irrelevant if eta = 0.0. It is only used for SDE sampling (where noise is added after each step, the amount of which is controlled by \"eta\").\n\n"],"color":"#432","bgcolor":"#653"},{"id":801,"type":"Note","pos":[631.0161743164062,2693.894775390625],"size":[602.4559326171875,93.21308135986328],"flags":{},"order":93,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["MANY COMPLEX WORKFLOWS BECOME MUCH SIMPLER WHEN USING RES4LYF NODES.\n\nA great emphasis has been placed, during the design of these nodes, on usability - ensuring they are not just more powerful than the default KSampler nodes, and don't just provide superior results, but are also ultimately easier to use, encouraging experimentation. "],"color":"#432","bgcolor":"#653"},{"id":655,"type":"Note","pos":[2114.199951171875,2702.9755859375],"size":[321.8917236328125,108.77723693847656],"flags":{},"order":94,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["BONGMath is an algorithm unique to RES4LYF that vastly improves sampling quality and coherence in most cases, with little to no effect on sampling speed.\n\nIt has no effect when eta is set to 0. \n"],"color":"#432","bgcolor":"#653"},{"id":667,"type":"Note","pos":[2630,2720],"size":[242.25900268554688,198.10833740234375],"flags":{},"order":96,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["OPTIONS NODES:\n\nThese can be connected directly to ClownSampler, ClownSharkSampler, and SharkSampler, to control a variety of advanced parameters.\n\nSHARKoptions may be connected to SHARKsampler or clownSHARKsampler.\n\nCLOWNoptions may be connected to CLOWNsampler or CLOWNsharksampler."],"color":"#432","bgcolor":"#653"},{"id":669,"type":"Note","pos":[2894.58544921875,3200.53369140625],"size":[478.7455139160156,399.50189208984375],"flags":{},"order":97,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownOptions_SDE controls the noise that is added after each step (or substep).\n\nNOISE TYPES:\n\nBrownian can give very good results, and is the \"correct\" noise type to use from a mathematical perspective. It does, however, result in a burned image with BONGMath when using many of the higher order samplers (the issue is with \"non-monotonic\" ones, which are typically those \n whose names end with \"5s\" or greater).\n\nNOISE MODES:\n\nThe \"noise mode\" controls how much noise is actually used after each step. The list is roughly sorted in order of strength (hard at the top being the strongest, hard_var at the bottom being the weakest - and the only one that uses \"mathematically correct\" scaling). \n\n\"Sinusoidal\" begins very weak, then becomes strong in the middle of the sampling process before losing strength again.\n\nThe \"soft\" noise types begin very strong, and drop off extremely rapidly.\n\nSUBSTEPS:\n\nAny sampler that is not euler or ddim uses information from multiple model calls (\"stages\") to predict the step. Multistep samplers reuse previous steps as \"stages\", whereas the rest make new model calls. \n\nThe settings for \"substep\" control these intermediate \"substeps\". If eta_substep is set to 0, BONGMath will have no effect."],"color":"#432","bgcolor":"#653"},{"id":661,"type":"ClownOptions_SDE_Beta","pos":[3414.8017578125,3262.863037109375],"size":[315,266],"flags":{},"order":98,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1963],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.5,-1,"randomize"]},{"id":677,"type":"Note","pos":[2900,3650],"size":[471.3785095214844,160.20542907714844],"flags":{},"order":99,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Overshoot > 0 causes the sampler to \"overshoot\" the step size, and then scale backwards to where it was really supposed to end. This is what all other SDE and ancestral sampler implementations do, though I have found it to adversely affect accuracy, especially with high eta values (> 0.7), resulting in softened, simplified images with little detail.\n\nHowever, careful use can soften images and deepen colors with pleasant results.\n\nTo mimic the behavior of the typical SDE and ancestral sampler implementations, set these settings to match those in ClownOptions_SDE."],"color":"#432","bgcolor":"#653"},{"id":683,"type":"Note","pos":[2900,5230],"size":[481.8527526855469,325.1487731933594],"flags":{},"order":100,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["RES4LYF heavily emphasizes giving you control over the sampling process!\n\nYou will often see these green \"sigmas\" inputs that aren't really for sigmas. These are used to control parameters on a step-by-step basis. \n\nIMPORTANT: The values used in the input here are multiplied by the value in ClownSampler/SharkSampler/ClownsharKSampler!\n\nFor example, the KarrasSchedule connected below creates a list of numbers:\n\n1.0, 1.0, 1.0, 1.0, 1.0\n\n(The rest is automatically filled in with 0.0.)\n\nThese are then multiplied by the value for \"eta\" (0.5) in the connected ClownsharKSampler node:\n\n0.5, 0.5, 0.5, 0.5, 0.5\n\nThe result is the sampler sets \"eta\" to 0.5 for the first 5 steps, and then 0.0 for every step after that. \n\nTry connecting something like the BetaScheduler while using \"beta57\" as your sampling scheduler!"],"color":"#432","bgcolor":"#653"},{"id":676,"type":"ClownOptions_StepSize_Beta","pos":[3420,3660],"size":[316.0789794921875,130],"flags":{},"order":101,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1968],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_StepSize_Beta"},"widgets_values":["hard","hard",0,0]},{"id":668,"type":"Note","pos":[2900,2720],"size":[476.7748718261719,425.3497314453125],"flags":{},"order":102,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["SharkOptions controls the initial noise generated before starting the sampling process. \n\nNOISE TYPES:\n\nPerlin is great with Stable Cascade, and with Flux will frequently result in a less blurred image (and a somewhat less saturated look, which can be helpful for realism).\n\nThe \"color\" noise modes have more low frequency information (structure), brown being greater than pink. White is neutral, while blue and especially violet have extra high frequency information (details).\n\nhires-pyramid-bicubic can generate exceptionally sharp images in many cases. The other pyramid noise types, and studentt, are often great for painterly styles.\n\nOTHER OPTIONS:\n\n\"noise_stdev\" increases the \"size\" of the noise. Values around 1.05 to 1.1 can have a wonderful effect with some painterly styles, with a boost in saturation.\n\n\"denoise_alt\" overrides the denoise setting. It has a very different effect that can often be easier to control when doing img2img generations with rectified flow models. (It scales the sigmas schedule, instead of slicing it).\n\n\"channelwise_cfg\" changes the cfg method used to one that can be very good with models that use a 16 channel VAE (SD3.5, Flux). Setting a negative value in the \"cfg\" box in any ClownsharKSampler or SharkSampler node is equivalent to using this toggle (for example, cfg = -2.0 is the same as setting cfg = 2.0, and channelwise_cfg = true)."],"color":"#432","bgcolor":"#653"},{"id":666,"type":"SharkOptions_Beta","pos":[3413.490478515625,2880],"size":[257.98193359375,130],"flags":{},"order":103,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1962]}],"properties":{"Node name for S&R":"SharkOptions_Beta"},"widgets_values":["gaussian",1,1,false]},{"id":684,"type":"KarrasScheduler","pos":[3190,5610],"size":[210,130],"flags":{},"order":104,"mode":0,"inputs":[],"outputs":[{"name":"SIGMAS","localized_name":"SIGMAS","type":"SIGMAS","links":[1975,1976],"slot_index":0}],"properties":{"Node name for S&R":"KarrasScheduler"},"widgets_values":[5,1,1,1]},{"id":687,"type":"Note","pos":[2910,5610],"size":[257.2243957519531,88],"flags":{},"order":105,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: use SigmasPreview very heavily so that you can *see* what's going on!"],"color":"#432","bgcolor":"#653"},{"id":671,"type":"Note","pos":[3820,5080],"size":[363.5062255859375,260.6607971191406],"flags":{},"order":106,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This node can be used to visualize the effect of different noise parameters on how much noise is actually added (or removed) during the sampling process.\n\nDelta (Δ) signifies change. So for example, Δt = step size.\n\nThe most important thing to look at is the original sigma (σ) schedule vs σup. \n\nThe difference between σ (white line) and σup (red line above) is how much noise is added by the sampler after each step. If the two overlap, you aren't adding noise, and it's in ODE mode (eta = 0.0).\n\nThe most important thing to try here is higher or lower eta values, and different noise_modes. Try comparing \"hard\" vs \"soft\" vs \"hard_var\" with eta = 0.5."],"color":"#432","bgcolor":"#653"},{"id":686,"type":"SigmasPreview","pos":[3430,5610],"size":[289.7076110839844,128.47837829589844],"flags":{},"order":187,"mode":0,"inputs":[{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","link":1976}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"SigmasPreview"},"widgets_values":[false]},{"id":682,"type":"ClownOptions_Automation_Beta","pos":[3430,5250],"size":[284.9833984375,146],"flags":{},"order":186,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":1975},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"s_noises","localized_name":"s_noises","type":"SIGMAS","shape":7,"link":null},{"name":"s_noises_substep","localized_name":"s_noises_substep","type":"SIGMAS","shape":7,"link":null},{"name":"epsilon_scales","localized_name":"epsilon_scales","type":"SIGMAS","shape":7,"link":null},{"name":"frame_weights","localized_name":"frame_weights","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1974]}],"properties":{"Node name for S&R":"ClownOptions_Automation_Beta"},"widgets_values":[]},{"id":673,"type":"Note","pos":[2900.961181640625,4977.04736328125],"size":[480.85333251953125,190.63368225097656],"flags":{},"order":107,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This option node can be very useful for SAVING TIME! \n\n\"swap_below_error\" is a tolerance threshold where, if the total error at each step falls below the value in the box, it will switch to the sampler specified here.\n\n\"log_err_to_console\" will print these values at each step to the terminal/console/cmd.exe window where ComfyUI is running. This is essential if you wish to choose a reasonable value for \"swap_below_err\".\n\n\"swap_at_step\" will switch after the step specified, no matter what. This is equivalent to chaining two samplers together as shown to the left - it's just more convenient and compact.\n"],"color":"#432","bgcolor":"#653"},{"id":665,"type":"ClownOptions_SwapSampler_Beta","pos":[3430.416015625,5008.54541015625],"size":[287.92266845703125,130],"flags":{},"order":108,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1972],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SwapSampler_Beta"},"widgets_values":["multistep/res_3m",0,30,false]},{"id":798,"type":"ClownOptions_Momentum_Beta","pos":[3433.12158203125,4837.14990234375],"size":[286.6007995605469,58],"flags":{},"order":109,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2003],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Momentum_Beta"},"widgets_values":[0.5]},{"id":803,"type":"Note","pos":[2904.318359375,4827.84912109375],"size":[481.74639892578125,88],"flags":{},"order":110,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Momentum can be used to accelerate convergence in some cases. Use carefully.\n\nMay be best used with chained workflows, with momentum applied only to some portion of early steps."],"color":"#432","bgcolor":"#653"},{"id":672,"type":"Note","pos":[2904.900634765625,4490.91796875],"size":[483.2145690917969,270.3226623535156],"flags":{},"order":111,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Implicit steps refine the sampling process by feeding the output of each step back into its input and rerunning it. This means setting either to \"1\" will increase the runtime 2x, as you're doubling the number of steps. \n\nThey can drastically increase quality. In some cases, results can actually be improved by cutting the step count in half, and running with implicit_steps=1 or implicit_substeps=1 (which results in an equivalent runtime).\n\nWith the other samplers, rebound will add one extra model call per step. \n\nBongmath and predictor-corrector can have significantly different effects. Rebound can as well (but also adds 1 model call per implicit step or substep).\n\nTRUE IMPLICIT SAMPLERS:\n\nIt is recommended to use \"implicit_steps\" with the \"fully_implicit\" samplers, and \"implicit_substeps\" with the \"diag_implicit\" samplers. Both of these sampler types will ignore the \"implicit_type\" settings.\n"],"color":"#432","bgcolor":"#653"},{"id":675,"type":"Note","pos":[2906.080322265625,4199.57763671875],"size":[474.05108642578125,230.29006958007812],"flags":{},"order":112,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownOptions_DetailBoost aims to BREAK the noise scaling math that was so carefully prepared by ClownOptions_SDE. Most users see this as a replacement for the \"Detail Daemon\" node.\n\nTry experimenting with different methods: ones that end with \"normal\" will attempt to preserve luminosity in the image after the adjustments to the noise are made.\n\nIt is worth trying \"sinusoidal\" mode as well, as this is designed to be strongest at middle steps.\n\nEta will increase the intensity of the effect. \n\nIt seems to be best to not have this start on the first step (step 0), and to have it end no more than halfway (end_step = 1/2 of total steps or less). With method = \"model\", this seems to add a lot of detail without losing saturation, increasing luminosity, or triggering mutations."],"color":"#432","bgcolor":"#653"},{"id":763,"type":"ClownOptions_DetailBoost_Beta","pos":[3436.449462890625,4207.4345703125],"size":[282.9725646972656,218],"flags":{},"order":113,"mode":0,"inputs":[{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1991],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_DetailBoost_Beta"},"widgets_values":[1,"model","hard",0.5,3,10]},{"id":761,"type":"Note","pos":[2905.32470703125,3869.700439453125],"size":[471.6525573730469,266.9491882324219],"flags":{},"order":114,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownOptions_SigmaScaling aims to BREAK the noise scaling math that was so carefully prepared by ClownOptions_SDE.\n\n\"noise_anchor_sde\" can make the image look much dirtier with lower values. Try 0.5 for starters, especially with any non-multistep sampler.\n\n\"s_noise\" increases the \"size\" of the noise added with each step. Values around 1.05-1.1 can considerably boost saturation in painterly images. BONGMath is particularly good when this is set to values > 1.0.\n\n\"s_noise_substep\" is not compatible with BONGMath. You will get a terrible image if you use them together.\n\n\"lying\" is equivalent to the popular \"lying sigmas\" approach. Like \"noise_anchor\", values < 1.0 will increase the \"dirty\" look. Try starting with 0.95.\n\n\"lying_inv\" will do the opposite of \"lying\". If you find your images look \"dried out\" or desaturated when using lying, try setting this to a similar value, and have it start at a later step, as shown below."],"color":"#432","bgcolor":"#653"},{"id":760,"type":"ClownOptions_SigmaScaling_Beta","pos":[3436.542724609375,3886.986328125],"size":[272.21429443359375,454],"flags":{},"order":115,"mode":0,"inputs":[{"name":"s_noises","localized_name":"s_noises","type":"SIGMAS","shape":7,"link":null},{"name":"s_noises_substep","localized_name":"s_noises_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1990],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SigmaScaling_Beta"},"widgets_values":[1,1,1,0.9500000000000001,0.9500000000000001,2,8]},{"id":678,"type":"Note","pos":[3783.1923828125,4263.46484375],"size":[363.2837219238281,88],"flags":{},"order":116,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Tip: if your results are too noisy,\ntry setting \"overshoot\" in \"ClownOptions StepSize\" to the same value as \"eta\" used in \"ClownOptions SDE\"! \n\n(Default eta is 0.50 if \"ClownOptions SDE\" is not used)."],"color":"#432","bgcolor":"#653"},{"id":806,"type":"ClownsharkChainsampler_Beta","pos":[5163.4833984375,2735.41357421875],"size":[262.0870056152344,298],"flags":{},"order":206,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",-1,5.5,"resample",true]},{"id":804,"type":"ClownsharKSampler_Beta","pos":[4552.6552734375,2734.24609375],"size":[268.7583312988281,418],"flags":{},"order":117,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2004],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_2m","beta57",20,14,1,5.5,0,"fixed","unsample",true]},{"id":805,"type":"ClownsharkChainsampler_Beta","pos":[4861.5244140625,2737.638671875],"size":[262.0870056152344,318],"flags":{},"order":188,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2004},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":2006},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",1,5.5,"resample",true]},{"id":808,"type":"Note","pos":[4806.57275390625,3320.429443359375],"size":[384.6367492675781,194.1151580810547],"flags":{},"order":118,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownOptions Cycles causes the sampler node to rerun after completion, while reversing the sampling mode (resample becomes unsample, unsample becomes resample). \n\n1.0 cycles implies it returns to where it began. \n\n1.5 cycles implies it returns to where it began, then reverses direction and reruns one last time - so it would end at the end of the step.\n\nThis often has VERY good results with unsampling workflows, various img2img workflows, style transfer, etc. With 1 steps_to_run, it's a lot like \"ClownOptions Implicit\", though results are often better."],"color":"#432","bgcolor":"#653"},{"id":717,"type":"Note","pos":[2635.989501953125,4579.111328125],"size":[237.44444274902344,91.61251831054688],"flags":{},"order":119,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["INPAINTING TIP: Implicit steps can really help, especially with seams! They also can have a significant impact on unsampling, and guides in general."],"color":"#432","bgcolor":"#653"},{"id":662,"type":"ClownOptions_ImplicitSteps_Beta","pos":[3440.08251953125,4551.392578125],"size":[286.5861511230469,130],"flags":{},"order":120,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1971],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_ImplicitSteps_Beta"},"widgets_values":["bongmath","bongmath",0,0]},{"id":811,"type":"ClownOptions_Cycles_Beta","pos":[3860.137451171875,4546.62158203125],"size":[261.53253173828125,202],"flags":{},"order":121,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2007],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,1,-1,1,false]},{"id":812,"type":"Note","pos":[3846.717041015625,4748.94482421875],"size":[308.37188720703125,88],"flags":{},"order":122,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This node is closely related to ImplicitSteps! It is explained in more detail in the \"Cyclosampling\" group above and to the right (northeast)."],"color":"#432","bgcolor":"#653"},{"id":807,"type":"ClownOptions_Cycles_Beta","pos":[4863.31494140625,3128.078125],"size":[261.53253173828125,202],"flags":{},"order":123,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2006]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,5.5,-1,1,false]},{"id":818,"type":"Note","pos":[5294.46533203125,3122.492431640625],"size":[309.0342712402344,192.40728759765625],"flags":{},"order":124,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["eta_decay_scale multiplies the eta by that value after each cycle. This can help the cyclosampling process converge on an output. \n\nFor example, if you started with an eta of 0.5, and eta_decay_scale is set to 0.9, the following etas will be used for successive cycles:\n\n0.5\n0.45 (0.5 * 0.9)\n0.405 (0.5 * 0.9 * 0.9)\n0.3645 (0.5 * 0.9 * 0.9 * 0.9)"],"color":"#432","bgcolor":"#653"},{"id":809,"type":"ClownsharkChainsampler_Beta","pos":[5704.94384765625,2739.097900390625],"size":[262.0870056152344,318],"flags":{},"order":190,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":2009},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",5,5.5,"resample",true],"color":"#2a363b","bgcolor":"#3f5159"},{"id":814,"type":"ClownsharkChainsampler_Beta","pos":[6042.30615234375,2736.216064453125],"size":[262.0870056152344,318],"flags":{},"order":125,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2010],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",5,0.5,"resample",true],"color":"#2a363b","bgcolor":"#3f5159"},{"id":815,"type":"ClownsharkChainsampler_Beta","pos":[6327.12060546875,2732.67724609375],"size":[262.0870056152344,318],"flags":{},"order":189,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2010},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2011],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.25,"multistep/res_2m",5,4,"unsample",true],"color":"#233","bgcolor":"#355"},{"id":817,"type":"ClownsharkChainsampler_Beta","pos":[6619.41552734375,2730.05029296875],"size":[262.0870056152344,318],"flags":{},"order":207,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2011},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_2m",5,0.5,"resample",true],"color":"#2a363b","bgcolor":"#3f5159"},{"id":810,"type":"Note","pos":[5840.63916015625,3313.710693359375],"size":[333.3376770019531,103.0768051147461],"flags":{},"order":126,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The two setups above are equivalent. \n\nI suggest running \"ClownOptions Cycles\" with 10 steps_to_run and just watching the progress bar. It's easier to understand visually."],"color":"#2a363b","bgcolor":"#3f5159"},{"id":813,"type":"ClownOptions_Cycles_Beta","pos":[5706.35546875,3124.623291015625],"size":[261.53253173828125,202],"flags":{},"order":127,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2009],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[1,1,0.25,4,-1,1,false],"color":"#233","bgcolor":"#355"},{"id":816,"type":"Note","pos":[6323.08056640625,3110.669189453125],"size":[274.8790588378906,88],"flags":{},"order":128,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Compare these settings to the \"ClownOptions Cycles\" node to the left (eta 0.25, cfg 4.0).\n"],"color":"#233","bgcolor":"#355"},{"id":692,"type":"ReFluxPatcher","pos":[7490,3210],"size":[210,82],"flags":{},"order":191,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1978}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1979],"slot_index":0}],"properties":{"Node name for S&R":"ReFluxPatcher"},"widgets_values":["float32",true]},{"id":820,"type":"Note","pos":[7770,3350],"size":[251.27003479003906,88],"flags":{},"order":129,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["THIS NODE IS NOT REQUIRED!\n\nEXPERIMENTAL!"],"color":"#432","bgcolor":"#653"},{"id":693,"type":"FluxLoader","pos":[7090,3210],"size":[315,282],"flags":{},"order":130,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1978],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":null},{"name":"vae","localized_name":"vae","type":"VAE","links":null},{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","links":null},{"name":"style_model","localized_name":"style_model","type":"STYLE_MODEL","links":null}],"properties":{"Node name for S&R":"FluxLoader"},"widgets_values":["consolidated_s6700.safetensors","default",".use_ckpt_clip",".none",".use_ckpt_vae","sigclip_vision_patch14_384.safetensors","flux1-redux-dev.safetensors"]},{"id":819,"type":"ClownModelLoader","pos":[7090,2740],"size":[315,266],"flags":{},"order":131,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null},{"name":"clip","localized_name":"clip","type":"CLIP","links":null},{"name":"vae","localized_name":"vae","type":"VAE","links":null}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["hidream_i1_full_fp8.safetensors","fp8_e4m3fn","clip_g_hidream.safetensors","clip_l_hidream.safetensors","t5xxl_fp8_e4m3fn_scaled.safetensors","llama_3.1_8b_instruct_fp8_scaled.safetensors","hidream","ae.sft"]},{"id":698,"type":"Note","pos":[8098.36279296875,3386.5087890625],"size":[282.65814208984375,92.654541015625],"flags":{},"order":132,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This node is similar ModelSamplingAdvanced, except it uses the dimensions of the latent image to set the shift value."],"color":"#432","bgcolor":"#653"},{"id":795,"type":"Image Sharpen FS","pos":[8812.0927734375,2744.748046875],"size":[210,106],"flags":{},"order":133,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":null}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Image Sharpen FS"},"widgets_values":["hard","median",6]},{"id":821,"type":"Note","pos":[8531.4560546875,2746.428466796875],"size":[211.12799072265625,95.03887939453125],"flags":{},"order":134,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Unique method for sharpening images. Can add a lot of \"pop\" to SDXL and AuraFlow outputs that otherwise look soft due to the 4 channel VAE."],"color":"#432","bgcolor":"#653"},{"id":796,"type":"Image Grain Add","pos":[8819.529296875,2974.022216796875],"size":[210,58],"flags":{},"order":135,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":null}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Image Grain Add"},"widgets_values":[0.5]},{"id":822,"type":"Note","pos":[8537.251953125,2954.097412109375],"size":[210.33291625976562,88],"flags":{},"order":136,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Very useful with ClipVision, IPadapter, etc. for avoiding blurry or blown out/oversaturated outputs."],"color":"#432","bgcolor":"#653"},{"id":797,"type":"Image Repeat Tile To Size","pos":[8819.0595703125,3152.188720703125],"size":[210,106],"flags":{},"order":137,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":null}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":823,"type":"Note","pos":[8541.458984375,3162.56103515625],"size":[229.4075927734375,156.3510284423828],"flags":{},"order":138,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Use in conjunction with \"ClownGuide Style\" when upscaling to prevent blurry outputs. \n\nWhen used wisely (not applied to all steps), this can improve results dramatically.\n\nConnect your original (unresized) input image to this node."],"color":"#432","bgcolor":"#653"},{"id":828,"type":"PreviewImage","pos":[9950,2830],"size":[210,26],"flags":{},"order":216,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2015}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":829,"type":"PreviewImage","pos":[9950,3220],"size":[210,26],"flags":{},"order":218,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2018}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":825,"type":"Frequency Separation Hard Light","pos":[9990,3070],"size":[260.3999938964844,66],"flags":{},"order":217,"mode":0,"inputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","shape":7,"link":2016},{"name":"original","localized_name":"original","type":"IMAGE","shape":7,"link":null},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","shape":7,"link":2017}],"outputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","links":null},{"name":"original","localized_name":"original","type":"IMAGE","links":[2019],"slot_index":1},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Frequency Separation Hard Light"},"widgets_values":[]},{"id":830,"type":"PreviewImage","pos":[10300,3090],"size":[210,26],"flags":{},"order":223,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":831,"type":"Note","pos":[9205.6611328125,2745.06982421875],"size":[293.7847900390625,149.87860107421875],"flags":{},"order":139,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Unique frequency separation method. Try combining the low pass layer from one image, and the high pass layer from another, such as with faces with carefully matched overlapping alignment, or other scenes. Better at transferring compositional information such as lighting and hue than the frequency separation method in Photoshop."],"color":"#432","bgcolor":"#653"},{"id":827,"type":"Image Median Blur","pos":[9443.4658203125,3153.701416015625],"size":[210,58],"flags":{},"order":192,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2013}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2014],"slot_index":0}],"properties":{"Node name for S&R":"Image Median Blur"},"widgets_values":[40]},{"id":832,"type":"Image Gaussian Blur","pos":[9442.099609375,3277.42578125],"size":[210,58],"flags":{},"order":140,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":null}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Image Gaussian Blur"},"widgets_values":[40]},{"id":824,"type":"Frequency Separation Hard Light","pos":[9680,3070],"size":[260.3999938964844,66],"flags":{},"order":209,"mode":0,"inputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","shape":7,"link":null},{"name":"original","localized_name":"original","type":"IMAGE","shape":7,"link":2012},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","shape":7,"link":2014}],"outputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","links":[2015,2016],"slot_index":0},{"name":"original","localized_name":"original","type":"IMAGE","links":null},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","links":[2017,2018],"slot_index":2}],"properties":{"Node name for S&R":"Frequency Separation Hard Light"},"widgets_values":[]},{"id":833,"type":"Note","pos":[9539.69921875,2749.275146484375],"size":[255.63558959960938,88],"flags":{},"order":141,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Median blur is edge-aware, and usually gives better results than gaussian blur, if images are carefully aligned first."],"color":"#432","bgcolor":"#653"},{"id":840,"type":"Frequency Separation Hard Light","pos":[11160,3190],"size":[260.3999938964844,66],"flags":{},"order":211,"mode":0,"inputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","shape":7,"link":null},{"name":"original","localized_name":"original","type":"IMAGE","shape":7,"link":2025},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","shape":7,"link":2026}],"outputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","links":[],"slot_index":0},{"name":"original","localized_name":"original","type":"IMAGE","links":null},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","links":[2022],"slot_index":2}],"properties":{"Node name for S&R":"Frequency Separation Hard Light"},"widgets_values":[]},{"id":838,"type":"Image Median Blur","pos":[10920,3270],"size":[210,58],"flags":{},"order":194,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2024}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2026],"slot_index":0}],"properties":{"Node name for S&R":"Image Median Blur"},"widgets_values":[40]},{"id":842,"type":"Image Median Blur","pos":[10930,3010],"size":[210,58],"flags":{},"order":193,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2031}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[2032],"slot_index":0}],"properties":{"Node name for S&R":"Image Median Blur"},"widgets_values":[40]},{"id":826,"type":"LoadImage","pos":[9212.66796875,3090.664306640625],"size":[210,314],"flags":{},"order":142,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2012,2013],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["00107-496528661.png","image"]},{"id":843,"type":"LoadImage","pos":[10690,2920],"size":[210,314],"flags":{},"order":143,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2030,2031],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["00109-3396456281.png","image"]},{"id":835,"type":"Frequency Separation Hard Light","pos":[11480,3030],"size":[260.3999938964844,66],"flags":{},"order":219,"mode":0,"inputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","shape":7,"link":2033},{"name":"original","localized_name":"original","type":"IMAGE","shape":7,"link":null},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","shape":7,"link":2022}],"outputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","links":null},{"name":"original","localized_name":"original","type":"IMAGE","links":[2023],"slot_index":1},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Frequency Separation Hard Light"},"widgets_values":[]},{"id":837,"type":"PreviewImage","pos":[11800,3050],"size":[210,26],"flags":{},"order":224,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2023}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":841,"type":"Frequency Separation Hard Light","pos":[11160,2910],"size":[260.3999938964844,66],"flags":{},"order":210,"mode":0,"inputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","shape":7,"link":null},{"name":"original","localized_name":"original","type":"IMAGE","shape":7,"link":2030},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","shape":7,"link":2032}],"outputs":[{"name":"high_pass","localized_name":"high_pass","type":"IMAGE","links":[2033],"slot_index":0},{"name":"original","localized_name":"original","type":"IMAGE","links":null},{"name":"low_pass","localized_name":"low_pass","type":"IMAGE","links":[],"slot_index":2}],"properties":{"Node name for S&R":"Frequency Separation Hard Light"},"widgets_values":[]},{"id":836,"type":"LoadImage","pos":[10680,3210],"size":[213.1792755126953,314],"flags":{},"order":144,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2024,2025],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["00107-496528661.png","image"]},{"id":844,"type":"Note","pos":[10287.0224609375,3208.888916015625],"size":[255.63558959960938,88],"flags":{},"order":145,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This will output the original image."],"color":"#432","bgcolor":"#653"},{"id":845,"type":"Note","pos":[11723.419921875,2862.596923828125],"size":[285.8372497558594,88.79490661621094],"flags":{},"order":146,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This will combine high frequency (detail) information from the first image with the low frequency (color, hue, lighting) information from the second image."],"color":"#432","bgcolor":"#653"},{"id":794,"type":"Image Get Color Swatches","pos":[8580,5060],"size":[295.6000061035156,26],"flags":{},"order":147,"mode":0,"inputs":[{"name":"image_color_swatches","localized_name":"image_color_swatches","type":"IMAGE","link":null}],"outputs":[{"name":"color_swatches","localized_name":"color_swatches","type":"COLOR_SWATCHES","links":[2034],"slot_index":0}],"properties":{"Node name for S&R":"Image Get Color Swatches"},"widgets_values":[]},{"id":848,"type":"Note","pos":[8750,4900],"size":[328.192138671875,88],"flags":{},"order":148,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This configuration is equivalent to \"Masks From Colors\"."],"color":"#432","bgcolor":"#653"},{"id":792,"type":"Masks From Color Swatches","pos":[8900,5040],"size":[315,46],"flags":{},"order":195,"mode":0,"inputs":[{"name":"image_color_mask","localized_name":"image_color_mask","type":"IMAGE","link":null},{"name":"color_swatches","localized_name":"color_swatches","type":"COLOR_SWATCHES","link":2034}],"outputs":[{"name":"masks","localized_name":"masks","type":"MASK","links":null}],"properties":{"Node name for S&R":"Masks From Color Swatches"},"widgets_values":[]},{"id":851,"type":"Masks Unpack 8","pos":[9280,4590],"size":[140,166],"flags":{},"order":149,"mode":0,"inputs":[{"name":"masks","localized_name":"masks","type":"MASK","link":null}],"outputs":[{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null}],"properties":{"Node name for S&R":"Masks Unpack 8"},"widgets_values":[]},{"id":852,"type":"Masks Unpack 4","pos":[9280,4430],"size":[140,86],"flags":{},"order":150,"mode":0,"inputs":[{"name":"masks","localized_name":"masks","type":"MASK","link":null}],"outputs":[{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null}],"properties":{"Node name for S&R":"Masks Unpack 4"},"widgets_values":[]},{"id":849,"type":"Note","pos":[8590,4370],"size":[296.4569396972656,149.35540771484375],"flags":{},"order":151,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["image_color_swatches:\n\nThis is an image with colors drawn one at a time, top to bottom. It will set the order of the masks outputted in the connected \"unpack\" node to be the same as the order they appear in the \"swatches\" image.\n\nNote: white is the background and is ignored!"],"color":"#432","bgcolor":"#653"},{"id":793,"type":"Masks From Colors","pos":[8910,4430],"size":[330,46],"flags":{},"order":152,"mode":0,"inputs":[{"name":"image_color_swatches","localized_name":"image_color_swatches","type":"IMAGE","link":null},{"name":"image_color_mask","localized_name":"image_color_mask","type":"IMAGE","link":null}],"outputs":[{"name":"masks","localized_name":"masks","type":"MASK","links":[2036,2037],"slot_index":0}],"properties":{"Node name for S&R":"Masks From Colors"},"widgets_values":[]},{"id":855,"type":"MaskPreview+","pos":[8970,4590],"size":[210,26],"flags":{},"order":197,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2037}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview+"},"widgets_values":[]},{"id":854,"type":"Note","pos":[8600,4580],"size":[284.8203125,146.1818084716797],"flags":{},"order":153,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["image_color_mask:\n\nDraw a mask using the same colors used in the swatches. \n\nI *strongly* suggest using Mask Preview as shown to the right to get a feel for this."],"color":"#432","bgcolor":"#653"},{"id":847,"type":"MaskFromRGBCMYBW+","pos":[8300,4640],"size":[224.02476501464844,246],"flags":{},"order":154,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":null}],"outputs":[{"name":"red","localized_name":"red","type":"MASK","links":null},{"name":"green","localized_name":"green","type":"MASK","links":null},{"name":"blue","localized_name":"blue","type":"MASK","links":null},{"name":"cyan","localized_name":"cyan","type":"MASK","links":null},{"name":"magenta","localized_name":"magenta","type":"MASK","links":null},{"name":"yellow","localized_name":"yellow","type":"MASK","links":null},{"name":"black","localized_name":"black","type":"MASK","links":null},{"name":"white","localized_name":"white","type":"MASK","links":null}],"properties":{"Node name for S&R":"MaskFromRGBCMYBW+"},"widgets_values":[0.15,0.15,0.15]},{"id":846,"type":"Note","pos":[8270,4370],"size":[286.9356994628906,206.45907592773438],"flags":{},"order":155,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["These nodes can be useful in situations where you want to composite complex masks with many regions, without overlap, and without missing areas. They allow these to be made easily in an editor such as MSPaint.\n\nThey are somewhat similar in function to the node shown below from ComfyUI Essentials. They will all get the job done. The only advantage of the \"Masks From Colors\" nodes is that any color may be used, theoretically allowing dozens of zones to be drawn. For 8 or fewer zones (most cases) either may be used."],"color":"#432","bgcolor":"#653"},{"id":850,"type":"Masks Unpack 16","pos":[9280,4840],"size":[140,326],"flags":{},"order":196,"mode":0,"inputs":[{"name":"masks","localized_name":"masks","type":"MASK","link":2036}],"outputs":[{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null},{"name":"masks","localized_name":"masks","type":"MASK","links":null}],"properties":{"Node name for S&R":"Masks Unpack 16"},"widgets_values":[]},{"id":858,"type":"VAEEncode","pos":[12693.298828125,3026.815673828125],"size":[140,46],"flags":{},"order":212,"mode":0,"inputs":[{"name":"pixels","localized_name":"pixels","type":"IMAGE","link":2039},{"name":"vae","localized_name":"vae","type":"VAE","link":null}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[2040],"slot_index":0}],"properties":{"Node name for S&R":"VAEEncode"},"widgets_values":[]},{"id":862,"type":"Note","pos":[12226.9638671875,2733.324951171875],"size":[307.74560546875,219.57456970214844],"flags":{},"order":156,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ConditioningBatch4 (and 8) apply conditioning to each tile in the order received by the sampler. This is very useful for ensuring coherent results. This WF avoids the creation of seams, and is efficient, only requiring 4 tiles for upscales. \n\nConditioningBatch4 currently is not supported by the negative conditioning input. Use a standard negative prompt (or nothing).\n\nIf you separate the tiles individually, you should be able to use Flux Redux conditioning for each of the ConditioningBatch4 inputs."],"color":"#432","bgcolor":"#653"},{"id":860,"type":"VAEDecode","pos":[13233.298828125,2776.815673828125],"size":[140,46],"flags":{},"order":225,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2041},{"name":"vae","localized_name":"vae","type":"VAE","link":null}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2042],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":853,"type":"ConditioningBatch4","pos":[12613.298828125,2746.815673828125],"size":[228.39999389648438,86],"flags":{},"order":157,"mode":0,"inputs":[{"name":"conditioning_0","localized_name":"conditioning_0","type":"CONDITIONING","link":null},{"name":"conditioning_1","localized_name":"conditioning_1","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_2","localized_name":"conditioning_2","type":"CONDITIONING","shape":7,"link":null},{"name":"conditioning_3","localized_name":"conditioning_3","type":"CONDITIONING","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[2038],"slot_index":0}],"properties":{"Node name for S&R":"ConditioningBatch4"},"widgets_values":[]},{"id":856,"type":"ClownsharKSampler_Beta","pos":[12883.298828125,2776.815673828125],"size":[315,418],"flags":{},"order":220,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2038},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2043},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2040},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2041],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_2m","beta57",30,-1,1,5.5,0,"randomize","standard",true]},{"id":863,"type":"CLIPTextEncode","pos":[12610.970703125,2882.634033203125],"size":[229.78173828125,88],"flags":{},"order":158,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":null}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2043]}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":[""]},{"id":864,"type":"ImageResize+","pos":[12208.77734375,3038.4794921875],"size":[210,218],"flags":{},"order":159,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":null}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2044]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[1792,1792,"nearest","stretch","always",0]},{"id":857,"type":"ImageTile+","pos":[12453.298828125,3036.815673828125],"size":[210,234],"flags":{},"order":198,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":2044}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2039],"slot_index":0},{"name":"tile_width","localized_name":"tile_width","type":"INT","links":null},{"name":"tile_height","localized_name":"tile_height","type":"INT","links":null},{"name":"overlap_x","localized_name":"overlap_x","type":"INT","links":null},{"name":"overlap_y","localized_name":"overlap_y","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageTile+"},"widgets_values":[2,2,0,128,128]},{"id":859,"type":"ImageUntile+","pos":[13413.298828125,2776.815673828125],"size":[210,130],"flags":{},"order":227,"mode":0,"inputs":[{"name":"tiles","localized_name":"tiles","type":"IMAGE","link":2042}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"ImageUntile+"},"widgets_values":[128,128,2,2]},{"id":868,"type":"Note","pos":[9664.4189453125,3771.01416015625],"size":[284.8223571777344,176.87057495117188],"flags":{},"order":160,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Styles are supported for:\n\nHiDream (outstanding results)\n\nFlux (best results are with style loras, as the base model is severely lacking understanding of styles)\n\nWAN \n\nUse of the \"Re...\" patcher nodes is required, as custom model code is used."],"color":"#432","bgcolor":"#653"},{"id":869,"type":"TorchCompileModels","pos":[7795.904296875,2736.478515625],"size":[247.29759216308594,178],"flags":{},"order":161,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":703,"type":"SD35Loader","pos":[7438.462890625,2740.95849609375],"size":[315,218],"flags":{},"order":162,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null},{"name":"clip","localized_name":"clip","type":"CLIP","links":null},{"name":"vae","localized_name":"vae","type":"VAE","links":null}],"properties":{"Node name for S&R":"SD35Loader"},"widgets_values":["sd3.5_medium.safetensors","default",".use_ckpt_clip",".none",".none",".use_ckpt_vae"]},{"id":700,"type":"Note","pos":[7480,3060],"size":[225.09121704101562,88],"flags":{},"order":163,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This node must be used when using regional conditioning with Flux. "],"color":"#432","bgcolor":"#653"},{"id":870,"type":"Note","pos":[8087.9755859375,2736.00830078125],"size":[291.73583984375,88],"flags":{},"order":164,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Generic compile node for many models."],"color":"#432","bgcolor":"#653"},{"id":696,"type":"ModelSamplingAdvanced","pos":[8136.62109375,3062.419677734375],"size":[210,82],"flags":{},"order":165,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":null}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"ModelSamplingAdvanced"},"widgets_values":["exponential",3]},{"id":697,"type":"Note","pos":[8088.63037109375,2876.072998046875],"size":[299.7002868652344,122.62284851074219],"flags":{},"order":166,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ModelSamplingAdvanced many different models, including AuraFlow, SD3.5, Flux, and more, including video models.\n\nWhen \"scaling\" is set to \"exponential\" it uses the method employed by Flux, which is actually quite good with SD3.5. \"linear\" is the default method used by SD3.5."],"color":"#432","bgcolor":"#653"},{"id":788,"type":"ClownGuide_Style_Beta","pos":[9993.591796875,4034.93017578125],"size":[243.85076904296875,286],"flags":{},"order":167,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,15,false]},{"id":865,"type":"Note","pos":[9667.3056640625,4012.013427734375],"size":[283.76544189453125,448.7384338378906],"flags":{},"order":168,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownGuide style: the settings shown are the ones you will generally use. WCT is the more accurate of the two methods. If you have issues, you can fall back to AdaIN. \n\nIt is best to use this on the first 1/2 of steps or so. Be sure to provide some information about the style in the prompt for best results \"cel-shaded anime illustration of...\" \"\"gritty illustration of....\" \"analog photo of\".\n\nThe mask current has no effect, but is there as a placeholder as regional style methods are under development.\n\n\nIf you are using CFG = 1.0 (typical with distilled models such as Flux or HiDream Dev), synweight has no effect and can be ignored.\n\nSynweight simply applies the same style to the opposite conditioning (so if apply_to = positive, and synweight is at 0.5, it will use it at 0.5 strength on the negative). In the vast majority of cases, it's best to leave synweight at the default. Occasionally, setting it to 0.5 or 0.0 can be helpful, but it can result in burning the image due to issues with CFG. \n\nStandard guides may be inputted into this node, if you wish to use them together.\n"],"color":"#432","bgcolor":"#653"},{"id":871,"type":"Note","pos":[9669.4208984375,4527.666015625],"size":[277.0335998535156,102.53260040283203],"flags":{},"order":169,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownGuide Mean: somewhat similar effect, but does not require the \"Re...\" patcher nodes and works with all models. Effect is typically considerably less precise."],"color":"#432","bgcolor":"#653"},{"id":866,"type":"Note","pos":[10292.3017578125,4024.706787109375],"size":[284.8223571777344,176.87057495117188],"flags":{},"order":170,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["BLUR KILLER TIP:\n\nWhen generating photography with Flux or HiDream (where blur can be frustratingly difficult to avoid), try using a style guide for the first 1/3rd of steps that is a sharp photograph with similar lighting/hues to what you are aiming for. You might need to try a handful of photos before landing on a \"hit\", but the right one will eliminate blur 100%, even with a close up portrait photograph.\n"],"color":"#432","bgcolor":"#653"},{"id":872,"type":"ClownGuide_AdaIN_MMDiT_Beta","pos":[10680,3810],"size":[246.13087463378906,430],"flags":{},"order":171,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_AdaIN_MMDiT_Beta"},"widgets_values":[1,"constant","","","20","0.5",0,15,false]},{"id":874,"type":"ClownGuide_AttnInj_MMDiT_Beta","pos":[10990,3810],"size":[272.0969543457031,718],"flags":{},"order":172,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_AttnInj_MMDiT_Beta"},"widgets_values":[1,"constant","0,1,3","1.0","20","0.5",0,0,1,0,0,0,0,0,0,0,0,0,0,15,false]},{"id":873,"type":"Note","pos":[10590,4300],"size":[348.2928771972656,313.42919921875],"flags":{},"order":173,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownGuide AdaIN and AttnInj:\n\nAdvanced experimental nodes for HiDream. Very strong effect and can be used together with all other guide nodes.\n\nBest used like a monkey in a missile silo. Start pushing buttons and you'll win eventually!\n\nList the blocks you wish the effect to be applied to, and the weight of the effect on that block, in the same order. \"all\" will use all blocks of that type, and if only one weight is listed, it will use that for all blocks listed.\n\nThere are 16 double blocks, and 32 single blocks. Each is numbered beginning at 0. For example, the following block numberings are equivalent for double_blocks:\n\nall\n0-15\n0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\n"],"color":"#432","bgcolor":"#653"},{"id":875,"type":"Note","pos":[10980,4590],"size":[301.1705017089844,233.60943603515625],"flags":{},"order":174,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Injects calculated attention from the guide into the main sampling process. This will carry over some compositional information, as well as lighting. It can be very interesting in combination with ClownGuide Style or ClownGuide AdaIN (MMDiT).\n\nimg_v will have the most color/style information with the least effect on composition.\n\nimg_k will increase the amount of compositional information.\n\nimg_q will increase the compositional information to the point where it can begin looking more like a traditional guide mode."],"color":"#432","bgcolor":"#653"},{"id":867,"type":"ClownGuide_Mean_Beta","pos":[9997.337890625,4526.90869140625],"size":[241.34442138671875,238],"flags":{},"order":175,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":null}],"properties":{"Node name for S&R":"ClownGuide_Mean_Beta"},"widgets_values":[1,1,"constant",0,15,false]},{"id":679,"type":"SharkSampler_Beta","pos":[1370,3140],"size":[285.713623046875,386],"flags":{},"order":199,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sampler","localized_name":"sampler","type":"SAMPLER","shape":7,"link":1973},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":2046},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":null},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"SharkSampler_Beta"},"widgets_values":["beta57",30,-1,1,5.5,0,"fixed","standard"]},{"id":876,"type":"SharkOptions_GuiderInput","pos":[1051.8299560546875,3379.638427734375],"size":[282.30291748046875,46],"flags":{},"order":176,"mode":0,"inputs":[{"name":"guider","localized_name":"guider","type":"GUIDER","link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[2046],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_GuiderInput"}},{"id":802,"type":"Note","pos":[690.13916015625,3410.965576171875],"size":[321.8917236328125,108.77723693847656],"flags":{},"order":95,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Typically, SharkSampler slots into workflows where SamplerCustom would have been used.\n\nSharkOptions GuiderInput allows it to be used like SamplerCustomAdvanced, with any guider input of your choosing. It may also be used with ClownSharkSampler."],"color":"#432","bgcolor":"#653"}],"links":[[1365,453,0,454,0,"IMAGE"],[1890,584,0,606,0,"MODEL"],[1904,606,0,601,0,"MODEL"],[1907,598,0,610,1,"CONDITIONING"],[1908,599,0,610,2,"CONDITIONING"],[1909,601,0,610,0,"MODEL"],[1910,601,0,612,0,"MODEL"],[1911,598,0,612,1,"CONDITIONING"],[1912,599,0,612,2,"CONDITIONING"],[1914,598,0,613,1,"CONDITIONING"],[1915,599,0,613,2,"CONDITIONING"],[1923,613,0,453,0,"LATENT"],[1926,609,0,613,0,"MODEL"],[1937,606,1,598,0,"CLIP"],[1938,606,1,599,0,"CLIP"],[1939,620,0,606,1,"CLIP"],[1940,621,0,453,1,"VAE"],[1947,624,0,612,6,"OPTIONS"],[1948,625,0,613,6,"OPTIONS"],[1949,610,0,612,3,"LATENT"],[1950,612,0,613,3,"LATENT"],[1951,626,0,610,6,"OPTIONS"],[1952,640,0,641,3,"LATENT"],[1953,641,0,642,3,"LATENT"],[1962,666,0,660,6,"OPTIONS"],[1963,661,0,660,7,"OPTIONS"],[1968,676,0,660,9,"OPTIONS"],[1971,662,0,660,10,"OPTIONS"],[1972,665,0,660,11,"OPTIONS"],[1973,680,0,679,3,"SAMPLER"],[1974,682,0,660,12,"OPTIONS"],[1975,684,0,682,0,"SIGMAS"],[1976,684,0,686,0,"SIGMAS"],[1977,688,0,691,5,"GUIDES"],[1978,693,0,692,0,"MODEL"],[1979,692,0,694,0,"MODEL"],[1980,694,0,695,0,"MODEL"],[1982,724,0,720,1,"CONDITIONING"],[1983,724,0,720,2,"CONDITIONING"],[1984,722,0,720,5,"GUIDES"],[1985,723,0,721,5,"GUIDES"],[1986,751,0,752,2,"MASK"],[1987,753,0,752,3,"MASK"],[1988,756,0,754,2,"MASK"],[1989,757,0,754,3,"MASK"],[1990,760,0,660,13,"OPTIONS"],[1991,763,0,660,8,"OPTIONS"],[1992,735,0,770,5,"GUIDES"],[1994,772,0,770,6,"OPTIONS"],[1995,775,0,778,0,"COND_REGIONS"],[1996,778,0,779,0,"COND_REGIONS"],[1999,779,0,783,0,"COND_REGIONS"],[2000,783,0,776,0,"COND_REGIONS"],[2003,798,0,660,14,"OPTIONS"],[2004,804,0,805,4,"LATENT"],[2005,805,0,806,4,"LATENT"],[2006,807,0,805,6,"OPTIONS"],[2007,811,0,660,15,"OPTIONS"],[2009,813,0,809,6,"OPTIONS"],[2010,814,0,815,4,"LATENT"],[2011,815,0,817,4,"LATENT"],[2012,826,0,824,1,"IMAGE"],[2013,826,0,827,0,"IMAGE"],[2014,827,0,824,2,"IMAGE"],[2015,824,0,828,0,"IMAGE"],[2016,824,0,825,0,"IMAGE"],[2017,824,2,825,2,"IMAGE"],[2018,824,2,829,0,"IMAGE"],[2019,825,1,830,0,"IMAGE"],[2022,840,2,835,2,"IMAGE"],[2023,835,1,837,0,"IMAGE"],[2024,836,0,838,0,"IMAGE"],[2025,836,0,840,1,"IMAGE"],[2026,838,0,840,2,"IMAGE"],[2030,843,0,841,1,"IMAGE"],[2031,843,0,842,0,"IMAGE"],[2032,842,0,841,2,"IMAGE"],[2033,841,0,835,0,"IMAGE"],[2034,794,0,792,1,"COLOR_SWATCHES"],[2036,793,0,850,0,"MASK"],[2037,793,0,855,0,"MASK"],[2038,853,0,856,1,"CONDITIONING"],[2039,857,0,858,0,"IMAGE"],[2040,858,0,856,3,"LATENT"],[2041,856,0,860,0,"LATENT"],[2042,860,0,859,0,"IMAGE"],[2043,863,0,856,2,"CONDITIONING"],[2044,864,0,857,0,"IMAGE"],[2046,876,0,679,6,"OPTIONS"]],"groups":[{"id":1,"title":"UNSAMPLING SETUP","bounding":[727.6610717773438,4702.3486328125,1679.59423828125,1066.484619140625],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"CHAINED SAMPLER SETUP","bounding":[726.7158203125,3763.15576171875,1680.4798583984375,894.4533081054688],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"INTRODUCTION TO CLOWNSAMPLING","bounding":[603.1690063476562,2607.28857421875,1866.77099609375,983.0913696289062],"color":"#3f789e","font_size":24,"flags":{}},{"id":5,"title":"OPTIONS AND AUTOMATION","bounding":[2599.417236328125,2632.92578125,1724.455078125,3136.83154296875],"color":"#3f789e","font_size":24,"flags":{}},{"id":6,"title":"GUIDES","bounding":[4494.4521484375,3692.791259765625,1757.123291015625,2078.85498046875],"color":"#3f789e","font_size":24,"flags":{}},{"id":7,"title":"LOADERS AND PATCHERS","bounding":[7042.78515625,2636.466552734375,1379.5494384765625,916.3328247070312],"color":"#3f789e","font_size":24,"flags":{}},{"id":8,"title":"ULTRACASCADE","bounding":[-3341.39892578125,2604.916748046875,3831.125244140625,1936.0570068359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":9,"title":"REGIONAL CONDITIONING","bounding":[6335.28125,3695.2412109375,3155.973388671875,1510.357421875],"color":"#3f789e","font_size":24,"flags":{}},{"id":10,"title":"Cyclosampling (looping a sampler node)","bounding":[4505.0283203125,2635.9287109375,2438.49853515625,916.0137939453125],"color":"#3f789e","font_size":24,"flags":{}},{"id":11,"title":"Miscellaneous Image Nodes","bounding":[8475.5224609375,2640.341552734375,610.4679565429688,912.4259033203125],"color":"#3f789e","font_size":24,"flags":{}},{"id":12,"title":"Frequency Separation","bounding":[9150.265625,2642.074462890625,2909.011474609375,911.339111328125],"color":"#3f789e","font_size":24,"flags":{}},{"id":13,"title":"Tiled Upscales with Tiled Conditioning","bounding":[12136.822265625,2643.802490234375,1541.95556640625,664.3409423828125],"color":"#3f789e","font_size":24,"flags":{}},{"id":14,"title":"Style Transfer","bounding":[9615.3837890625,3690.478515625,2453.906494140625,1514.1885986328125],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.6105100000000008,"offset":[1291.1169756467461,-2415.779581669771]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/sd35 medium unsampling data.json ================================================ {"last_node_id":635,"last_link_id":2023,"nodes":[{"id":627,"type":"SD35Loader","pos":[602.6103515625,-123.47957611083984],"size":[315,218],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2014],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2010],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2011,2012],"slot_index":2}],"properties":{"Node name for S&R":"SD35Loader"},"widgets_values":["sd3.5_medium.safetensors","default","clip_l_sd35.safetensors","clip_g_sd35.safetensors","t5xxl_fp16.safetensors","sd35_vae.safetensors"]},{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2010}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":3,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2012}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2014},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[210,46],"flags":{"collapsed":false},"order":9,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2011}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":10,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,5.5,"resample",true]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",60,-1,1,1,0,"fixed","unsample",true]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":4,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,false,0.5,1,"constant",0,-1,false]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["data",false,true,0.35,0.35,"beta57",0,12,false]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2010,627,1,107,0,"CLIP"],[2011,627,2,591,1,"VAE"],[2012,627,2,629,4,"VAE"],[2013,629,0,630,3,"LATENT"],[2014,627,0,632,0,"MODEL"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.7985878990923265,"offset":[672.6014509912476,552.1175843760627]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/sd35 medium unsampling.json ================================================ {"last_node_id":635,"last_link_id":2023,"nodes":[{"id":627,"type":"SD35Loader","pos":[602.6103515625,-123.47957611083984],"size":[315,218],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2014],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[2010],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[2011,2012],"slot_index":2}],"properties":{"Node name for S&R":"SD35Loader"},"widgets_values":["sd3.5_medium.safetensors","default","clip_l_sd35.safetensors","clip_g_sd35.safetensors","t5xxl_fp16.safetensors","sd35_vae.safetensors"]},{"id":628,"type":"LoadImage","pos":[599.166015625,156.38429260253906],"size":[315,314],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2017]},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_14254_.png","image"]},{"id":107,"type":"CLIPTextEncode","pos":[959.4713745117188,-123.3353500366211],"size":[282.33453369140625,173.58438110351562],"flags":{"collapsed":false},"order":2,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","label":"clip","type":"CLIP","link":2010}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","label":"CONDITIONING","type":"CONDITIONING","shape":3,"links":[2018],"slot_index":0}],"title":"Positive Prompt","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["the mournful lamentations of of a female rock singer on stage with chaos behind her, her face screaming her sorrowful refrains the despairing cries of anguished screams howling agonized moans, her pained whispers mournful sighs distant echoes across the smoky stage, fading memories of lost loves, forgotten dreams, shattered hopes, crushed spirits, broken hearts"]},{"id":629,"type":"VAEEncodeAdvanced","pos":[961.6968994140625,123.66181182861328],"size":[278.0284423828125,280.5834045410156],"flags":{},"order":3,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":2017},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":2012}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2013,2020,2022],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[2015]},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":632,"type":"ModelSamplingAdvancedResolution","pos":[962.5586547851562,-316.3705139160156],"size":[277.62237548828125,126],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2014},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":2015}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2016],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":634,"type":"ClownGuide_Beta","pos":[1276.0064697265625,-480.84442138671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":4,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2020},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2021],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,false,0.5,1,"constant",0,-1,false]},{"id":633,"type":"SaveImage","pos":[1921.8458251953125,-123.4797134399414],"size":[436.4179382324219,508.5302429199219],"flags":{},"order":10,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2019}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":631,"type":"ClownsharkChainsampler_Beta","pos":[1605.8143310546875,-124.34080505371094],"size":[280.55523681640625,510],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2005},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2023},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2008],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"multistep/res_3m",-1,5.5,"resample",true]},{"id":630,"type":"ClownsharKSampler_Beta","pos":[1271.7001953125,-124.3408432006836],"size":[291.7499084472656,630],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":2016},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2018},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2013},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":2021},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2005]},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",60,-1,1,1,0,"fixed","unsample",true]},{"id":635,"type":"ClownGuide_Beta","pos":[1604.09326171875,-479.9832763671875],"size":[284.860595703125,290.8609924316406],"flags":{},"order":5,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":2022},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[2023],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["epsilon",false,true,0.5,1,"beta57",0,25,false]},{"id":591,"type":"VAEDecode","pos":[1924.08251953125,-233.2501983642578],"size":[140,46],"flags":{"collapsed":false},"order":9,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","label":"samples","type":"LATENT","link":2008},{"name":"vae","localized_name":"vae","label":"vae","type":"VAE","link":2011}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","label":"IMAGE","type":"IMAGE","shape":3,"links":[2019],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]}],"links":[[2005,630,0,631,4,"LATENT"],[2008,631,0,591,0,"LATENT"],[2010,627,1,107,0,"CLIP"],[2011,627,2,591,1,"VAE"],[2012,627,2,629,4,"VAE"],[2013,629,0,630,3,"LATENT"],[2014,627,0,632,0,"MODEL"],[2015,629,3,632,1,"LATENT"],[2016,632,0,630,0,"MODEL"],[2017,628,0,629,0,"IMAGE"],[2018,107,0,630,1,"CONDITIONING"],[2019,591,0,633,0,"IMAGE"],[2020,629,0,634,0,"LATENT"],[2021,634,0,630,5,"GUIDES"],[2022,629,0,635,0,"LATENT"],[2023,635,0,631,5,"GUIDES"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.635079908265751,"offset":[1291.723098320105,628.7383473687522]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/sdxl regional antiblur.json ================================================ {"last_node_id":730,"last_link_id":2113,"nodes":[{"id":13,"type":"Reroute","pos":[1280,-650],"size":[75,26],"flags":{},"order":12,"mode":0,"inputs":[{"name":"","type":"*","link":2098}],"outputs":[{"name":"","type":"MODEL","links":[1967],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[1280,-610],"size":[75,26],"flags":{},"order":9,"mode":0,"inputs":[{"name":"","type":"*","link":2099}],"outputs":[{"name":"","type":"CLIP","links":[1939,2092,2112],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[1280,-570],"size":[75,26],"flags":{},"order":10,"mode":0,"inputs":[{"name":"","type":"*","link":2100}],"outputs":[{"name":"","type":"VAE","links":[18,1328],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":398,"type":"SaveImage","pos":[1379.9996337890625,-267.2835998535156],"size":[341.7508850097656,561.0067749023438],"flags":{},"order":21,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1329}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":701,"type":"Note","pos":[80,-520],"size":[342.05950927734375,88],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I usually just lazily draw masks in Load Image nodes (with some random image loaded), but for the sake of reproducibility, here's another approach."],"color":"#432","bgcolor":"#653"},{"id":712,"type":"Note","pos":[-210,-520],"size":[245.76409912109375,91.6677017211914],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["So long as these masks are all the same size, the regional conditioning nodes will handle resizing to the image size for you."],"color":"#432","bgcolor":"#653"},{"id":676,"type":"InvertMask","pos":[20,-370],"size":[142.42074584960938,26],"flags":{},"order":7,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2073}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2083],"slot_index":0}],"properties":{"Node name for S&R":"InvertMask"},"widgets_values":[]},{"id":7,"type":"VAEEncodeAdvanced","pos":[719.6110229492188,16.752899169921875],"size":[261.2217712402344,279.3136901855469],"flags":{},"order":16,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":null},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1399],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1024,1024,"red",false,"16_channels"]},{"id":710,"type":"MaskPreview","pos":[180,-190],"size":[210,246],"flags":{},"order":17,"mode":0,"inputs":[{"name":"mask","localized_name":"mask","type":"MASK","link":2054}],"outputs":[],"properties":{"Node name for S&R":"MaskPreview"},"widgets_values":[]},{"id":397,"type":"VAEDecode","pos":[1382.3662109375,-374.17059326171875],"size":[210,46],"flags":{},"order":20,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":2096},{"name":"vae","localized_name":"vae","type":"VAE","link":1328}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1329],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":715,"type":"SolidMask","pos":[-220,-370],"size":[210,106],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2073],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,1024,1024]},{"id":716,"type":"SolidMask","pos":[-220,-220],"size":[210,106],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2065],"slot_index":0}],"properties":{"Node name for S&R":"SolidMask"},"widgets_values":[1,384,864]},{"id":709,"type":"MaskComposite","pos":[190,-370],"size":[210,126],"flags":{},"order":11,"mode":0,"inputs":[{"name":"destination","localized_name":"destination","type":"MASK","link":2083},{"name":"source","localized_name":"source","type":"MASK","link":2065}],"outputs":[{"name":"MASK","localized_name":"MASK","type":"MASK","links":[2054,2091],"slot_index":0}],"properties":{"Node name for S&R":"MaskComposite"},"widgets_values":[256,160,"add"]},{"id":704,"type":"Note","pos":[101.74818420410156,112.67951965332031],"size":[290.7107238769531,155.35317993164062],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["ClownRegionalConditionings:\n\nTry raising or lowering weight, and changing the weight scheduler from beta57 to Karras (weakens more quickly), or to linear quadratic (stronger late).\n\nTry changing region_bleed_start_step (earlier will make the image blend together more), and end_step."],"color":"#432","bgcolor":"#653"},{"id":722,"type":"ClownRegionalConditioning2","pos":[690,-370],"size":[287.75750732421875,330],"flags":{},"order":18,"mode":0,"inputs":[{"name":"conditioning_masked","localized_name":"conditioning_masked","type":"CONDITIONING","shape":7,"link":2094},{"name":"conditioning_unmasked","localized_name":"conditioning_unmasked","type":"CONDITIONING","shape":7,"link":2093},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":2091},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"region_bleeds","localized_name":"region_bleeds","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"conditioning","localized_name":"conditioning","type":"CONDITIONING","links":[2095],"slot_index":0}],"properties":{"Node name for S&R":"ClownRegionalConditioning2"},"widgets_values":[1,0,0,"constant",0,-1,"boolean_masked",32,false]},{"id":703,"type":"Note","pos":[423.10699462890625,-96.14085388183594],"size":[241.9689483642578,386.7543640136719],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["edge_width also creates some overlap around the edges of the mask.\n\nboolean_masked means that the masked area can \"see\" the rest of the image, but the unmasked area cannot. \"boolean\" would mean neither area could see the rest of the image.\n\nTry setting to boolean_unmasked and see what happens!\n\nIf you still have blur, try reducing edge_width (and if you have seams, try increasing it, or setting end_step to something like 20). \n\nAlso verify that you can generate the background prompt alone without blur (if you can't, this won't work). And don't get stuck on one seed.\n\nVaguely human-shaped masks also tend to work better than the blocky one used here."],"color":"#432","bgcolor":"#653"},{"id":725,"type":"ReSDPatcher","pos":[1012.9199829101562,-651.4929809570312],"size":[210,82],"flags":{},"order":8,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":2097}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[2098],"slot_index":0}],"properties":{"Node name for S&R":"ReSDPatcher"},"widgets_values":["float64",true]},{"id":724,"type":"CheckpointLoaderSimple","pos":[549.1465454101562,-653.311767578125],"size":[416.2424011230469,98],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[2097],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[2099],"slot_index":1},{"name":"VAE","localized_name":"VAE","type":"VAE","links":[2100],"slot_index":2}],"properties":{"Node name for S&R":"CheckpointLoaderSimple"},"widgets_values":["_SDXL_/juggernautXL_v9Rundiffusionphoto2.safetensors"]},{"id":730,"type":"CLIPTextEncode","pos":[712.8302612304688,358.5015869140625],"size":[273.04931640625,94.66851806640625],"flags":{"collapsed":false},"order":15,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2112}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2113],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["low quality, low detail, blurry, unsharp, low resolution, jpeg artifacts"]},{"id":662,"type":"CLIPTextEncode","pos":[460,-370],"size":[210,88],"flags":{"collapsed":false},"order":13,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1939}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2094],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a woman wearing a red flannel shirt and a cute shark plush blue hat"]},{"id":723,"type":"CLIPTextEncode","pos":[460,-240],"size":[210,88],"flags":{"collapsed":false},"order":14,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2092}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2093],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["a photo from the ground of a college campus"]},{"id":401,"type":"ClownsharKSampler_Beta","pos":[1010,-370],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1967},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":2095},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2113},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1399},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[2096],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_3s","karras",60,-1,1,7,2,"fixed","standard",true]}],"links":[[18,14,0,7,4,"VAE"],[1328,14,0,397,1,"VAE"],[1329,397,0,398,0,"IMAGE"],[1399,7,3,401,3,"LATENT"],[1939,490,0,662,0,"CLIP"],[1967,13,0,401,0,"MODEL"],[2054,709,0,710,0,"MASK"],[2065,716,0,709,1,"MASK"],[2073,715,0,676,0,"MASK"],[2083,676,0,709,0,"MASK"],[2091,709,0,722,2,"MASK"],[2092,490,0,723,0,"CLIP"],[2093,723,0,722,1,"CONDITIONING"],[2094,662,0,722,0,"CONDITIONING"],[2095,722,0,401,1,"CONDITIONING"],[2096,401,0,397,0,"LATENT"],[2097,724,0,725,0,"MODEL"],[2098,725,0,13,0,"*"],[2099,724,1,490,0,"*"],[2100,724,2,14,0,"*"],[2112,490,0,730,0,"CLIP"],[2113,730,0,401,2,"CONDITIONING"]],"groups":[],"config":{},"extra":{"ds":{"scale":2.322515441988848,"offset":[1367.132902556087,589.0262767308418]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/sdxl style transfer.json ================================================ {"last_node_id":1394,"last_link_id":3744,"nodes":[{"id":13,"type":"Reroute","pos":[13508.9013671875,-109.2831802368164],"size":[75,26],"flags":{},"order":18,"mode":0,"inputs":[{"name":"","type":"*","link":3741}],"outputs":[{"name":"","type":"MODEL","links":[3740],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":14,"type":"Reroute","pos":[13508.9013671875,-29.283178329467773],"size":[75,26],"flags":{},"order":16,"mode":0,"inputs":[{"name":"","type":"*","link":3744}],"outputs":[{"name":"","type":"VAE","links":[18,2696],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[13508.9013671875,-69.28317260742188],"size":[75,26],"flags":{},"order":15,"mode":0,"inputs":[{"name":"","type":"*","link":3743}],"outputs":[{"name":"","type":"CLIP","links":[2881,3581],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[14108.255859375,675.60693359375],"size":[246.31312561035156,286],"flags":{},"order":26,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3709},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3699}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3604],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":970,"type":"CLIPTextEncode","pos":[13688.255859375,165.60690307617188],"size":[281.9206848144531,109.87118530273438],"flags":{},"order":19,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3627],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, jpeg artifacts, low quality, bad quality, unsharp"]},{"id":1378,"type":"Reroute","pos":[13184.07421875,533.128662109375],"size":[75,26],"flags":{},"order":13,"mode":0,"inputs":[{"name":"","type":"*","link":3721}],"outputs":[{"name":"","type":"IMAGE","links":[3724,3729],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1379,"type":"Reroute","pos":[13185.853515625,168.15780639648438],"size":[75,26],"flags":{},"order":17,"mode":0,"inputs":[{"name":"","type":"*","link":3725}],"outputs":[{"name":"","type":"IMAGE","links":[3726],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[15220,-259.5838928222656],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":31,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13400,560],"size":[261.2217712402344,298],"flags":{"collapsed":true},"order":24,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3688},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3727},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18},{"name":"width","type":"INT","pos":[10,160.00003051757812],"widget":{"name":"width"},"link":3732},{"name":"height","type":"INT","pos":[10,184.00003051757812],"widget":{"name":"height"},"link":3733}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983,3710],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3709],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1371,"type":"Image Repeat Tile To Size","pos":[13390,500],"size":[210,146],"flags":{"collapsed":true},"order":21,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3726},{"name":"width","type":"INT","pos":[10,36],"widget":{"name":"width"},"link":3730},{"name":"height","type":"INT","pos":[10,60],"widget":{"name":"height"},"link":3731}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3727,3728],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":1380,"type":"SetImageSize","pos":[13380,320],"size":[210,102],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[3730,3732],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[3731,3733],"slot_index":1}],"properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1344,768]},{"id":1377,"type":"Image Comparer (rgthree)","pos":[15742.4619140625,-253.3526153564453],"size":[461.9190368652344,413.5953369140625],"flags":{},"order":32,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":3720},{"name":"image_b","type":"IMAGE","dir":3,"link":3729}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_ogxbu_00017_.png&type=temp&subfolder=&rand=0.8732033562598724"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_ogxbu_00018_.png&type=temp&subfolder=&rand=0.08327234118228466"}]]},{"id":908,"type":"VAEDecode","pos":[15217.7802734375,-312.1965637207031],"size":[210,46],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697,3720],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1383,"type":"Note","pos":[14428.40234375,580.1749877929688],"size":[261.9539489746094,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Samplers like res_2s in this cycling node will also work and are faster. res_2m and res_3m are even faster, but sometimes the effect takes longer in wall time to fully kick in."],"color":"#432","bgcolor":"#653"},{"id":1384,"type":"Note","pos":[14793.0322265625,518.4120483398438],"size":[261.9539489746094,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_2m or res_3m can be used here instead and are faster, but are less likely to fully clean up lingering artifacts."],"color":"#432","bgcolor":"#653"},{"id":1385,"type":"Note","pos":[14398.345703125,768.2096557617188],"size":[261.9539489746094,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["method = AdaIN is faster and uses less memory, but is less accurate. Some prefer the effect."],"color":"#432","bgcolor":"#653"},{"id":1328,"type":"ClownOptions_SDE_Beta","pos":[14186.4755859375,-132.6126251220703],"size":[315,266],"flags":{"collapsed":true},"order":4,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3707],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1381,"type":"Note","pos":[13881.6279296875,-217.62835693359375],"size":[261.9539489746094,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease \"steps_to_run\" in ClownsharKSampler to change the effective denoise level."],"color":"#432","bgcolor":"#653"},{"id":1382,"type":"Note","pos":[14718.0498046875,-295.4144592285156],"size":[268.1851806640625,124.49711608886719],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increasing cycles will increase the amount of change, but take longer.\n\nCycles will rerun the same step over and over, forwards and backwards, iteratively refining an image at a controlled noise level."],"color":"#432","bgcolor":"#653"},{"id":1373,"type":"LoadImage","pos":[12810.2314453125,534.0346069335938],"size":[315,314],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3721],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Composition)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (476).png","image"]},{"id":1362,"type":"PreviewImage","pos":[13380,620],"size":[210,246],"flags":{},"order":23,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":3682}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":1390,"type":"Note","pos":[13148.0439453125,257.643310546875],"size":[210,88],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Color Match SOMETIMES helps accelerate style transfer.\n"],"color":"#432","bgcolor":"#653"},{"id":1376,"type":"Note","pos":[13710.3271484375,473.56817626953125],"size":[265.1909484863281,137.36415100097656],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease weight in ClownGuide to alter adherence to the input image.\n\nFor now, set to low weights or bypass if using any model except HiDream. The HiDream code was adapted so that this composition guide doesn't fight the style guide. Others will be added soon."],"color":"#432","bgcolor":"#653"},{"id":1350,"type":"ColorMatch","pos":[13380,160],"size":[210,102],"flags":{"collapsed":false},"order":22,"mode":0,"inputs":[{"name":"image_ref","localized_name":"image_ref","type":"IMAGE","link":3728},{"name":"image_target","localized_name":"image_target","type":"IMAGE","link":3724}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3682,3688],"slot_index":0}],"properties":{"Node name for S&R":"ColorMatch"},"widgets_values":["mkl",0]},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14758.255859375,-64.39308166503906],"size":[340.20001220703125,510],"flags":{},"order":29,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3698},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,4,"resample",true]},{"id":1393,"type":"ReSDPatcher","pos":[13246.306640625,-162.28057861328125],"size":[210,82],"flags":{},"order":14,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3742}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3741],"slot_index":0}],"properties":{"Node name for S&R":"ReSDPatcher"},"widgets_values":["float64",true]},{"id":1394,"type":"CheckpointLoaderSimple","pos":[12837.810546875,-94.67196655273438],"size":[375.491943359375,98],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[3742],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[3743],"slot_index":1},{"name":"VAE","localized_name":"VAE","type":"VAE","links":[3744],"slot_index":2}],"properties":{"Node name for S&R":"CheckpointLoaderSimple"},"widgets_values":["_SDXL_/zavychromaxl_v70.safetensors"]},{"id":1374,"type":"LoadImage","pos":[12805.896484375,167.56053161621094],"size":[315,314],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3725],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Style Guide)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image May 13, 2025, 09_18_45 AM.png","image"]},{"id":1333,"type":"CLIPTextEncode","pos":[13688.255859375,-44.393089294433594],"size":[280.6252746582031,164.06936645507812],"flags":{"collapsed":false},"order":20,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3581}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3602,3626],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["the inside of a car driving down a creepy road"]},{"id":1318,"type":"ClownGuide_Beta","pos":[13828.255859375,675.60693359375],"size":[263.102783203125,290],"flags":{},"order":25,"mode":4,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3710},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3699,3708],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.25,1,"constant",0,-1,false]},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[14418.0478515625,-325.06365966796875],"size":[265.2884826660156,202],"flags":{},"order":12,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,-1,"none",-1,4,false]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[14008.255859375,-64.39308166503906],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3740},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3602},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3708},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3578],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"exponential/res_2s","beta57",20,14,1,4,201,"fixed","unsample",true]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[14378.255859375,-64.39308166503906],"size":[340.20001220703125,570],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3626},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3627},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3578},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3604},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":3707},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3698],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",1,4,"resample",true]}],"links":[[18,14,0,7,4,"VAE"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2983,7,0,907,3,"LATENT"],[3469,981,0,908,0,"LATENT"],[3533,1317,0,980,6,"OPTIONS"],[3578,907,0,980,4,"LATENT"],[3581,490,0,1333,0,"CLIP"],[3602,1333,0,907,1,"CONDITIONING"],[3604,1308,0,980,5,"GUIDES"],[3626,1333,0,980,1,"CONDITIONING"],[3627,970,0,980,2,"CONDITIONING"],[3682,1350,0,1362,0,"IMAGE"],[3688,1350,0,7,0,"IMAGE"],[3698,980,0,981,4,"LATENT"],[3699,1318,0,1308,3,"GUIDES"],[3707,1328,0,980,7,"OPTIONS"],[3708,1318,0,907,5,"GUIDES"],[3709,7,1,1308,0,"LATENT"],[3710,7,0,1318,0,"LATENT"],[3720,908,0,1377,0,"IMAGE"],[3721,1373,0,1378,0,"*"],[3724,1378,0,1350,1,"IMAGE"],[3725,1374,0,1379,0,"*"],[3726,1379,0,1371,0,"IMAGE"],[3727,1371,0,7,1,"IMAGE"],[3728,1371,0,1350,0,"IMAGE"],[3729,1378,0,1377,1,"IMAGE"],[3730,1380,0,1371,1,"INT"],[3731,1380,1,1371,2,"INT"],[3732,1380,0,7,5,"INT"],[3733,1380,1,7,6,"INT"],[3740,13,0,907,0,"MODEL"],[3741,1393,0,13,0,"*"],[3742,1394,0,1393,0,"MODEL"],[3743,1394,1,490,0,"*"],[3744,1394,2,14,0,"*"]],"groups":[{"id":1,"title":"Model Loaders","bounding":[12796.72265625,-401.9004211425781,822.762451171875,436.0693359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Sampling","bounding":[13652.6533203125,-402.70721435546875,1470.8076171875,1409.0289306640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Input Prep","bounding":[12797.1396484375,77.69412231445312,817.4218139648438,820.6239624023438],"color":"#3f789e","font_size":24,"flags":{}},{"id":4,"title":"Save and Compare","bounding":[15180.705078125,-399.09112548828125,1050.6468505859375,615.8845825195312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.486436280241595,"offset":[-10958.961513232216,457.651089011118]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/style transfer.json ================================================ {"last_node_id":1408,"last_link_id":3768,"nodes":[{"id":14,"type":"Reroute","pos":[13508.9013671875,-29.283178329467773],"size":[75,26],"flags":{},"order":22,"mode":0,"inputs":[{"name":"","type":"*","link":3737}],"outputs":[{"name":"","type":"VAE","links":[18,2696,3767],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":490,"type":"Reroute","pos":[13508.9013671875,-69.28317260742188],"size":[75,26],"flags":{},"order":21,"mode":0,"inputs":[{"name":"","type":"*","link":3736}],"outputs":[{"name":"","type":"CLIP","links":[2881,3581],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":970,"type":"CLIPTextEncode","pos":[13688.255859375,165.60690307617188],"size":[281.9206848144531,109.87118530273438],"flags":{},"order":25,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":2881}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[2882,3627],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["blurry, out of focus, shallow depth of field, jpeg artifacts, low quality, bad quality, unsharp"]},{"id":1379,"type":"Reroute","pos":[13185.853515625,168.15780639648438],"size":[75,26],"flags":{},"order":23,"mode":0,"inputs":[{"name":"","type":"*","link":3747}],"outputs":[{"name":"","type":"IMAGE","links":[3726],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":909,"type":"SaveImage","pos":[15220,-259.5838928222656],"size":[457.3382263183594,422.2065124511719],"flags":{},"order":39,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":2697}],"outputs":[],"properties":{"Node name for S&R":"SaveImage","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["ComfyUI"]},{"id":1380,"type":"SetImageSize","pos":[13324.7197265625,323.0480041503906],"size":[210,102],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"width","localized_name":"width","type":"INT","links":[3730,3732],"slot_index":0},{"name":"height","localized_name":"height","type":"INT","links":[3731,3733],"slot_index":1}],"properties":{"Node name for S&R":"SetImageSize"},"widgets_values":[1344,768]},{"id":1377,"type":"Image Comparer (rgthree)","pos":[15742.4619140625,-253.3526153564453],"size":[461.9190368652344,413.5953369140625],"flags":{},"order":40,"mode":0,"inputs":[{"name":"image_a","type":"IMAGE","dir":3,"link":3720},{"name":"image_b","type":"IMAGE","dir":3,"link":3768}],"outputs":[],"properties":{"comparer_mode":"Slide"},"widgets_values":[[{"name":"A","selected":true,"url":"/api/view?filename=rgthree.compare._temp_zdjno_00005_.png&type=temp&subfolder=&rand=0.40554525758657745"},{"name":"B","selected":true,"url":"/api/view?filename=rgthree.compare._temp_zdjno_00006_.png&type=temp&subfolder=&rand=0.28640062579003533"}]]},{"id":908,"type":"VAEDecode","pos":[15217.7802734375,-312.1965637207031],"size":[210,46],"flags":{"collapsed":true},"order":38,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3469},{"name":"vae","localized_name":"vae","type":"VAE","link":2696}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[2697,3720],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":[]},{"id":1383,"type":"Note","pos":[14428.40234375,580.1749877929688],"size":[261.9539489746094,88],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Samplers like res_2s in this cycling node will also work and are faster. res_2m and res_3m are even faster, but sometimes the effect takes longer in wall time to fully kick in."],"color":"#432","bgcolor":"#653"},{"id":1384,"type":"Note","pos":[14793.0322265625,518.4120483398438],"size":[261.9539489746094,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_2m or res_3m can be used here instead and are faster, but are less likely to fully clean up lingering artifacts."],"color":"#432","bgcolor":"#653"},{"id":1328,"type":"ClownOptions_SDE_Beta","pos":[14186.4755859375,-132.6126251220703],"size":[315,266],"flags":{"collapsed":true},"order":3,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3707],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["gaussian","gaussian","hard","hard",0.5,0.75,-1,"fixed"]},{"id":1381,"type":"Note","pos":[13881.6279296875,-217.62835693359375],"size":[261.9539489746094,88],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease \"steps_to_run\" in ClownsharKSampler to change the effective denoise level."],"color":"#432","bgcolor":"#653"},{"id":1385,"type":"Note","pos":[14429.50390625,729.0418701171875],"size":[261.9539489746094,88],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["method = AdaIN is faster and uses less memory, but is less accurate. Some prefer the effect."],"color":"#432","bgcolor":"#653"},{"id":1386,"type":"ClownModelLoader","pos":[12855.7509765625,-269.1963806152344],"size":[335.2314453125,266],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3734],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[3736],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[3737],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["sd3.5_medium.safetensors","default","clip_g_sd35.safetensors","clip_l_sd35.safetensors","t5xxl_fp16.safetensors",".none","sd3","sd35_vae.safetensors"]},{"id":1378,"type":"Reroute","pos":[13184.07421875,533.128662109375],"size":[75,26],"flags":{},"order":24,"mode":0,"inputs":[{"name":"","type":"*","link":3751}],"outputs":[{"name":"","type":"IMAGE","links":[3742],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1333,"type":"CLIPTextEncode","pos":[13688.255859375,-44.393089294433594],"size":[280.6252746582031,164.06936645507812],"flags":{"collapsed":false},"order":26,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":3581}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[3602,3626],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode","cnr_id":"comfy-core","ver":"0.3.29"},"widgets_values":["evil blacklight mountains by a frozen lake at night at night, wild dangerous looking illustration ,dark pop art style, glowing inverted blackness, nothing"]},{"id":980,"type":"ClownsharkChainsampler_Beta","pos":[14378.255859375,-64.39308166503906],"size":[340.20001220703125,570],"flags":{},"order":36,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3626},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":3627},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3578},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3763},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":3533},{"name":"options 2","type":"OPTIONS","link":3707},{"name":"options 3","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3698],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_5s",1,7,"resample",true]},{"id":907,"type":"ClownsharKSampler_Beta","pos":[14008.255859375,-64.39308166503906],"size":[340.55120849609375,666.8208618164062],"flags":{},"order":35,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":3765},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":3602},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":2882},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":2983},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3708},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3578],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":[0.5,"multistep/res_2m","beta57",20,14,1,1,202,"fixed","unsample",true]},{"id":981,"type":"ClownsharkChainsampler_Beta","pos":[14758.255859375,-64.39308166503906],"size":[340.20001220703125,510],"flags":{},"order":37,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":3698},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[3469],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_5s",-1,7,"resample",true]},{"id":1373,"type":"LoadImage","pos":[12835.318359375,168.2541046142578],"size":[315,314],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3747],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Composition)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ComfyUI_00492_.png","image"]},{"id":431,"type":"ModelSamplingAdvancedResolution","pos":[13212.6708984375,-154.3930206298828],"size":[260.3999938964844,126],"flags":{},"order":31,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3735},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1398}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3764],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["exponential",1.35,0.85]},{"id":13,"type":"Reroute","pos":[13508.9013671875,-109.2831802368164],"size":[75,26],"flags":{},"order":33,"mode":0,"inputs":[{"name":"","type":"*","link":3764}],"outputs":[{"name":"","type":"MODEL","links":[3765],"slot_index":0}],"properties":{"showOutputText":false,"horizontal":false}},{"id":1387,"type":"ReSD35Patcher","pos":[13242.98046875,-303.1613464355469],"size":[210,82],"flags":{},"order":20,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3734}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[3735],"slot_index":0}],"properties":{"Node name for S&R":"ReSD35Patcher"},"widgets_values":["float64",true]},{"id":1308,"type":"ClownGuide_Style_Beta","pos":[14122.4169921875,684.2660522460938],"size":[246.31312561035156,286],"flags":{},"order":32,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3709},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3740}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3762],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","scattersort",1,1,"constant",0,-1,false]},{"id":1389,"type":"ClownGuide_Style_TileSize","pos":[14761.21484375,704.8385009765625],"size":[223.3114471435547,106],"flags":{},"order":34,"mode":0,"inputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":3762}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3763],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_TileSize"},"widgets_values":[256,192,64]},{"id":1400,"type":"Note","pos":[14773.240234375,866.2615966796875],"size":[298.4509582519531,104.02301025390625],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Your image dimensions need to be neatly divisible by these tile dimensions or you will get an error. This node currently will only have an effect with \"scattersort\". It will cause the image to follow your style reference's composition as well."],"color":"#432","bgcolor":"#653"},{"id":1376,"type":"Note","pos":[13703.93359375,509.9842529296875],"size":[261.9539489746094,88],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increase or decrease weight in ClownGuide to alter adherence to the input image."],"color":"#432","bgcolor":"#653"},{"id":1318,"type":"ClownGuide_Beta","pos":[13823.8046875,679.1676025390625],"size":[263.102783203125,290],"flags":{},"order":29,"mode":4,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":3710},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[3708,3740],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Beta"},"widgets_values":["inversion",false,false,0.5,1,"constant",0,-1,false]},{"id":1401,"type":"Note","pos":[13818.6318359375,1056.417724609375],"size":[271.7456970214844,88],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This bypassed node can improve adherence to the composition, but the tradeoff is less movement with the style."],"color":"#432","bgcolor":"#653"},{"id":1402,"type":"Note","pos":[14120.05859375,1058.747314453125],"size":[271.7456970214844,88],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["WCT is slower, but also an excellent style mode."],"color":"#432","bgcolor":"#653"},{"id":1390,"type":"LoadImage","pos":[12836.228515625,550.88427734375],"size":[315,314],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3751],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"title":"Load Image (Style Guide)","properties":{"Node name for S&R":"LoadImage"},"widgets_values":["6a985aaa-8a95-4382-97a9-91cdf96f43d3-Moraine_Lake_Dennis_Frates_Alamy_Stock_Photo.jpg","image"]},{"id":1403,"type":"Note","pos":[12890.732421875,-557.807373046875],"size":[271.7456970214844,88],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["If you wish to use another model, just load it in the ClownModelLoader (which is an efficiency node) or via your usual loader nodes. There is a Flux loader specifically for loading Redux as well. "],"color":"#432","bgcolor":"#653"},{"id":1405,"type":"Note","pos":[12480.912109375,-186.05596923828125],"size":[271.7456970214844,88],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["If you load the wrong clip, you may get some very strange errors from ComfyUI about an \"attn_mask\" etc."],"color":"#432","bgcolor":"#653"},{"id":1404,"type":"Note","pos":[13214.4140625,-591.9750366210938],"size":[561.9423828125,149.42193603515625],"flags":{},"order":15,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["You will need to use the appropriate patcher node to use other models.\n\nSD1.5, SDXL: ReSDPatcher\nStable Cascade: natively supported by https://github.com/ClownsharkBatwing/UltraCascade\nSD3.5: ReSD3.5Patcher\nFlux: ReFluxPatcher\nHiDream: ReHiDreamPatcher\nAuraFlow: ReAuraPatcher\nWAN: ReWanPatcher\nLTXV: ReLTXVPatcher"],"color":"#432","bgcolor":"#653"},{"id":1406,"type":"Note","pos":[14420.2861328125,-528.9069213867188],"size":[261.9539489746094,88],"flags":{},"order":16,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_5s is a very high quality sampler that can really help SD3.5M become a much more coherent model. It is slow, however. Try res_2s or even res_2m if you want more speed."],"color":"#322","bgcolor":"#533"},{"id":1371,"type":"Image Repeat Tile To Size","pos":[13345.26171875,497.8262939453125],"size":[210,146],"flags":{"collapsed":true},"order":27,"mode":4,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":3726},{"name":"width","type":"INT","pos":[10,36],"widget":{"name":"width"},"link":3730},{"name":"height","type":"INT","pos":[10,60],"widget":{"name":"height"},"link":3731}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":[3727],"slot_index":0}],"properties":{"Node name for S&R":"Image Repeat Tile To Size"},"widgets_values":[1024,1024,true]},{"id":1407,"type":"Note","pos":[13314.3076171875,171.45277404785156],"size":[271.7456970214844,88],"flags":{},"order":17,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Enable the bypassed ImageRepeatToTile node if you're using Flux and getting blurry outputs."],"color":"#432","bgcolor":"#653"},{"id":1382,"type":"Note","pos":[14718.0498046875,-295.4144592285156],"size":[288.7483215332031,156.81048583984375],"flags":{},"order":18,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Increasing cycles will increase the amount of change, but take longer.\n\nCycles will rerun the same step over and over, forwards and backwards, iteratively refining an image at a controlled noise level.\n\nTry reducing cycles if you want to stay very close to the original composition."],"color":"#432","bgcolor":"#653"},{"id":1317,"type":"ClownOptions_Cycles_Beta","pos":[14418.048828125,-327.3294982910156],"size":[265.2884826660156,202],"flags":{},"order":19,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[3533],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[5,1,0.5,"none",-1,7,true]},{"id":7,"type":"VAEEncodeAdvanced","pos":[13343.19140625,556.8784790039062],"size":[261.2217712402344,298],"flags":{"collapsed":false},"order":28,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":3742},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":3727},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":18},{"name":"width","type":"INT","pos":[10,160],"widget":{"name":"width"},"link":3732},{"name":"height","type":"INT","pos":[10,184],"widget":{"name":"height"},"link":3733}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[2983,3710,3766],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":[3709],"slot_index":1},{"name":"mask","localized_name":"mask","type":"MASK","links":[],"slot_index":2},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[1398],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced","cnr_id":"RES4LYF","ver":"5ce9b5a77c227bf864e447a1e65305bf6cada5c2"},"widgets_values":["false",1344,768,"red",false,"16_channels"]},{"id":1408,"type":"VAEDecode","pos":[15377.6826171875,-315.0729064941406],"size":[210,46],"flags":{"collapsed":true},"order":30,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":3766},{"name":"vae","localized_name":"vae","type":"VAE","link":3767}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[3768],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"}}],"links":[[18,14,0,7,4,"VAE"],[1398,7,3,431,1,"LATENT"],[2696,14,0,908,1,"VAE"],[2697,908,0,909,0,"IMAGE"],[2881,490,0,970,0,"CLIP"],[2882,970,0,907,2,"CONDITIONING"],[2983,7,0,907,3,"LATENT"],[3469,981,0,908,0,"LATENT"],[3533,1317,0,980,6,"OPTIONS"],[3578,907,0,980,4,"LATENT"],[3581,490,0,1333,0,"CLIP"],[3602,1333,0,907,1,"CONDITIONING"],[3626,1333,0,980,1,"CONDITIONING"],[3627,970,0,980,2,"CONDITIONING"],[3698,980,0,981,4,"LATENT"],[3707,1328,0,980,7,"OPTIONS"],[3708,1318,0,907,5,"GUIDES"],[3709,7,1,1308,0,"LATENT"],[3710,7,0,1318,0,"LATENT"],[3720,908,0,1377,0,"IMAGE"],[3726,1379,0,1371,0,"IMAGE"],[3727,1371,0,7,1,"IMAGE"],[3730,1380,0,1371,1,"INT"],[3731,1380,1,1371,2,"INT"],[3732,1380,0,7,5,"INT"],[3733,1380,1,7,6,"INT"],[3734,1386,0,1387,0,"MODEL"],[3735,1387,0,431,0,"MODEL"],[3736,1386,1,490,0,"*"],[3737,1386,2,14,0,"*"],[3740,1318,0,1308,3,"GUIDES"],[3742,1378,0,7,0,"IMAGE"],[3747,1373,0,1379,0,"*"],[3751,1390,0,1378,0,"*"],[3762,1308,0,1389,0,"GUIDES"],[3763,1389,0,980,5,"GUIDES"],[3764,431,0,13,0,"*"],[3765,13,0,907,0,"MODEL"],[3766,7,0,1408,0,"LATENT"],[3767,14,0,1408,1,"VAE"],[3768,1408,0,1377,1,"IMAGE"]],"groups":[{"id":1,"title":"Model Loaders","bounding":[12796.72265625,-401.9004211425781,822.762451171875,436.0693359375],"color":"#3f789e","font_size":24,"flags":{}},{"id":2,"title":"Sampling","bounding":[13652.6533203125,-402.70721435546875,1470.8076171875,1409.0289306640625],"color":"#3f789e","font_size":24,"flags":{}},{"id":3,"title":"Input Prep","bounding":[12797.1396484375,77.69412231445312,817.4218139648438,820.6239624023438],"color":"#3f789e","font_size":24,"flags":{}},{"id":4,"title":"Save and Compare","bounding":[15180.705078125,-399.09112548828125,1050.6468505859375,615.8845825195312],"color":"#3f789e","font_size":24,"flags":{}}],"config":{},"extra":{"ds":{"scale":1.188365497732567,"offset":[-11346.93636409885,735.4056846100609]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"ue_links":[],"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/ultracascade txt2img style transfer.json ================================================ {"last_node_id":43,"last_link_id":52,"nodes":[{"id":1,"type":"VAEDecode","pos":[2240,3610],"size":[210,46],"flags":{"collapsed":false},"order":37,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1},{"name":"vae","localized_name":"vae","type":"VAE","link":2,"slot_index":1}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","shape":3,"links":[5],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":2,"type":"LoraLoader","pos":[-24.50164031982422,3718.225341796875],"size":[359.7619323730469,126],"flags":{},"order":18,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3},{"name":"clip","localized_name":"clip","type":"CLIP","link":4}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[7],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[6,8],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["csbw_cascade_dark_ema.safetensors",1,1]},{"id":4,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[1890,4480],"size":[310.79998779296875,82],"flags":{},"order":0,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[22],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[1536,1536]},{"id":5,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[797.6149291992188,4484.87158203125],"size":[310.79998779296875,82],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[12],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[24,24]},{"id":6,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[1157.109375,4484.87158203125],"size":[310.79998779296875,82],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[17],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[36,36]},{"id":8,"type":"VAELoader","pos":[1900,3600],"size":[294.6280212402344,58],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[2,51],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["stage_a_ft_hq.safetensors"]},{"id":10,"type":"UltraCascade_Loader","pos":[-394.08612060546875,3670.32373046875],"size":[345.5117492675781,82.95540618896484],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","shape":3,"links":[3],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_Loader"},"widgets_values":["stage_c_bf16.safetensors","ultrapixel_t2i.safetensors"]},{"id":13,"type":"CLIPTextEncode","pos":[355.95135498046875,3972.858154296875],"size":[356.2470703125,110.6326904296875],"flags":{},"order":25,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":8}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[11],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, bad quality, low detail, blurry, unsharp"]},{"id":9,"type":"CLIPLoader","pos":[-394.50164794921875,3810.115478515625],"size":[344.635498046875,98],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[4],"slot_index":0}],"properties":{"Node name for S&R":"CLIPLoader"},"widgets_values":["cascade_text_encoder.safetensors","stable_cascade","default"]},{"id":20,"type":"VAELoader","pos":[-376.8145751953125,3973.57080078125],"size":[315,58],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[24,25]}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["effnet_encoder.safetensors"]},{"id":22,"type":"UltraCascade_StageC_VAEEncode_Exact","pos":[-140,4520],"size":[302.3999938964844,102],"flags":{},"order":21,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":34},{"name":"vae","localized_name":"vae","type":"VAE","link":25}],"outputs":[{"name":"stage_c","localized_name":"stage_c","type":"LATENT","links":[31,32],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_StageC_VAEEncode_Exact"},"widgets_values":[36,36]},{"id":19,"type":"UltraCascade_StageC_VAEEncode_Exact","pos":[-140,4160],"size":[302.3999938964844,102],"flags":{},"order":20,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":33},{"name":"vae","localized_name":"vae","type":"VAE","link":24}],"outputs":[{"name":"stage_c","localized_name":"stage_c","type":"LATENT","links":[27,28],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_StageC_VAEEncode_Exact"},"widgets_values":[24,24]},{"id":17,"type":"ClownGuide_Style_Beta","pos":[190,4160],"size":[244.26441955566406,286],"flags":{},"order":26,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":27},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[23],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":18,"type":"ClownGuide_Style_Beta","pos":[470,4160],"size":[244.26441955566406,286],"flags":{},"order":29,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":28},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":23}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[29],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["negative","WCT",1,1,"constant",0,-1,false]},{"id":12,"type":"UltraCascade_PerturbedAttentionGuidance","pos":[361.78070068359375,3621.58740234375],"size":[344.3999938964844,58],"flags":{},"order":23,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":7}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[9],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_PerturbedAttentionGuidance"},"widgets_values":[3]},{"id":3,"type":"SaveImage","pos":[2240,3720],"size":[753.4503784179688,734.7869262695312],"flags":{},"order":38,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":5}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]},{"id":27,"type":"ClownOptions_Cycles_Beta","pos":[1158.6995849609375,3539.621337890625],"size":[315,130],"flags":{},"order":7,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[]}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,0.5,5.5]},{"id":21,"type":"ClownGuide_Style_Beta","pos":[190,4520],"size":[244.26441955566406,286],"flags":{},"order":27,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":32},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[26],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,20,false]},{"id":11,"type":"CLIPTextEncode","pos":[359.33685302734375,3742.75537109375],"size":[351.592529296875,173.00360107421875],"flags":{},"order":24,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":6}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[10],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["impasto oil painting by Yayoi Kusama and Lisa Frank, thick paint textures, tunning contrasts at night with stylish roughly drawn thick black lines, a nuclear explosion destroying a city, its towering wide glowing nuclear mushroom cloud enveloping the entire skyline, the nuclear fireball lighting up the dark sky"]},{"id":7,"type":"UNETLoader","pos":[1520,3580],"size":[356.544677734375,82],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[40],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader"},"widgets_values":["stage_b_lite_CSBW_v1.1.safetensors","default"]},{"id":31,"type":"UltraCascade_StageB_Patcher","pos":[1901.8192138671875,3508.625244140625],"size":[235.1999969482422,26],"flags":{},"order":19,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":40}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[41],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_StageB_Patcher"},"widgets_values":[]},{"id":15,"type":"ClownsharKSampler_Beta","pos":[1155.5926513671875,3724.48974609375],"size":[314.421142578125,693.9824829101562],"flags":{},"order":34,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":16},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":30},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":17},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[35],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,1,1,5.5,100,"fixed","standard",true]},{"id":26,"type":"ClownsharkChainsampler_Beta","pos":[1520.32470703125,3723.215087890625],"size":[315,510],"flags":{},"order":36,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":35},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":37},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_3s",-1,5.5,"resample",true]},{"id":14,"type":"ClownsharKSampler_Beta","pos":[796.9224243164062,3725.34375],"size":[311.41375732421875,693.9824829101562],"flags":{},"order":32,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":9},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":10},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":11},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":29},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":12},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[16,43],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,1,"fixed","standard",true]},{"id":23,"type":"ClownGuide_Style_Beta","pos":[470,4520],"size":[244.26441955566406,286],"flags":{},"order":30,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":31},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":26}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[30,37],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["negative","WCT",1,1,"constant",0,20,false]},{"id":24,"type":"LoadImage","pos":[-497.6204833984375,4160.34375],"size":[315,314],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[33,34,49],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["ChatGPT Image May 13, 2025, 09_38_14 AM.png","image"]},{"id":16,"type":"ClownsharKSampler_Beta","pos":[1890,3720],"size":[309.2452087402344,691.814208984375],"flags":{},"order":35,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":41},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":43},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":52},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":22},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,1,-1,"fixed","standard",true]},{"id":38,"type":"Note","pos":[-398.6913757324219,3401.711669921875],"size":[336.9422302246094,88],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Check out the \"ultracascade txt2img\" workflow for non-style related explanations of this workflow."],"color":"#432","bgcolor":"#653"},{"id":39,"type":"Note","pos":[-515.8250732421875,4543.421875],"size":[342.7132263183594,118.7740249633789],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This image serves as a style/color palette reference.\n\nInclude something about the style in the prompt (painting, illustration, pen drawing, etc.) or use ClipVision (which is very good with Cascade) if you wish to ensure that more than just the color palette is transferred."],"color":"#432","bgcolor":"#653"},{"id":32,"type":"ClownGuide_Style_Beta","pos":[2040,4710],"size":[236.5709686279297,286],"flags":{},"order":33,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":47},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":45}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[52],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["negative","WCT",1,1,"constant",0,-1,false]},{"id":33,"type":"ClownGuide_Style_Beta","pos":[1775.3868408203125,4709.03857421875],"size":[238.49423217773438,286],"flags":{},"order":31,"mode":0,"inputs":[{"name":"guide","localized_name":"guide","type":"LATENT","shape":7,"link":48},{"name":"mask","localized_name":"mask","type":"MASK","shape":7,"link":null},{"name":"weights","localized_name":"weights","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null}],"outputs":[{"name":"guides","localized_name":"guides","type":"GUIDES","links":[45],"slot_index":0}],"properties":{"Node name for S&R":"ClownGuide_Style_Beta"},"widgets_values":["positive","WCT",1,1,"constant",0,-1,false]},{"id":34,"type":"VAEEncode","pos":[1598.26904296875,4709.12841796875],"size":[140,46],"flags":{},"order":28,"mode":0,"inputs":[{"name":"pixels","localized_name":"pixels","type":"IMAGE","link":50},{"name":"vae","localized_name":"vae","type":"VAE","link":51}],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[47,48],"slot_index":0}],"properties":{"Node name for S&R":"VAEEncode"},"widgets_values":[]},{"id":35,"type":"ImageResize+","pos":[1359.3343505859375,4709.12890625],"size":[210,218],"flags":{},"order":22,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":49}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[50],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[1536,1536,"lanczos","stretch","always",0]},{"id":40,"type":"Note","pos":[778.7919921875,4684.98095703125],"size":[342.7132263183594,118.7740249633789],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Set end_step to -1 (which means \"infinity\", \"run until the end\") or 10000, etc. if you wish to use the style guide for all steps. Sometimes this can cause a bit of a CFG burned look, so mileage may vary. "],"color":"#432","bgcolor":"#653"},{"id":36,"type":"Note","pos":[1889.730712890625,3353.24462890625],"size":[314.823486328125,88],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This patcher is only needed if you wish to use the style guide with stage B. It'll improve adherence to the colors in the style guide."],"color":"#432","bgcolor":"#653"},{"id":37,"type":"Note","pos":[1153.2738037109375,3351.5126953125],"size":[410.0306701660156,88],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Connect ClownOptions Cycles to the node below to increase the effect even more. It will cause it to rerun the single step this node is set to run (steps_to_run == 1), by unsampling, sampling, unsampling, sampling, etc. in a loop."],"color":"#432","bgcolor":"#653"},{"id":42,"type":"SharkOptions_Beta","pos":[478.9419860839844,3353.24462890625],"size":[230.37158203125,130],"flags":{},"order":15,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"SharkOptions_Beta"},"widgets_values":["perlin",1,1,false]},{"id":43,"type":"Note","pos":[97.72860717773438,3353.8193359375],"size":[336.9422302246094,88],"flags":{},"order":17,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["TIP: Try connecting the options nodes to the right to some of the samplers. It'll replace the default noise types with perlin, which can be quite good with Cascade."],"color":"#432","bgcolor":"#653"},{"id":41,"type":"ClownOptions_SDE_Beta","pos":[801.105712890625,3352.283203125],"size":[301.5363464355469,266],"flags":{},"order":16,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["perlin","perlin","hard","hard",0.5,0.5,-1,"fixed"]}],"links":[[1,16,0,1,0,"LATENT"],[2,8,0,1,1,"VAE"],[3,10,0,2,0,"MODEL"],[4,9,0,2,1,"CLIP"],[5,1,0,3,0,"IMAGE"],[6,2,1,11,0,"CLIP"],[7,2,0,12,0,"MODEL"],[8,2,1,13,0,"CLIP"],[9,12,0,14,0,"MODEL"],[10,11,0,14,1,"CONDITIONING"],[11,13,0,14,2,"CONDITIONING"],[12,5,0,14,6,"OPTIONS"],[16,14,0,15,3,"LATENT"],[17,6,0,15,6,"OPTIONS"],[22,4,0,16,6,"OPTIONS"],[23,17,0,18,3,"GUIDES"],[24,20,0,19,1,"VAE"],[25,20,0,22,1,"VAE"],[26,21,0,23,3,"GUIDES"],[27,19,0,17,0,"LATENT"],[28,19,0,18,0,"LATENT"],[29,18,0,14,5,"GUIDES"],[30,23,0,15,5,"GUIDES"],[31,22,0,23,0,"LATENT"],[32,22,0,21,0,"LATENT"],[33,24,0,19,0,"IMAGE"],[34,24,0,22,0,"IMAGE"],[35,15,0,26,4,"LATENT"],[37,23,0,26,5,"GUIDES"],[40,7,0,31,0,"MODEL"],[41,31,0,16,0,"MODEL"],[43,14,0,16,3,"LATENT"],[45,33,0,32,3,"GUIDES"],[47,34,0,32,0,"LATENT"],[48,34,0,33,0,"LATENT"],[49,24,0,35,0,"IMAGE"],[50,35,0,34,0,"IMAGE"],[51,8,0,34,1,"VAE"],[52,32,0,16,5,"GUIDES"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.2100000000000006,"offset":[2416.6858398230765,-3132.1930084977703]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/ultracascade txt2img.json ================================================ {"last_node_id":33,"last_link_id":23,"nodes":[{"id":1,"type":"VAEDecode","pos":[1867.32421875,3610.962158203125],"size":[210,46],"flags":{"collapsed":false},"order":29,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1},{"name":"vae","localized_name":"vae","type":"VAE","link":2,"slot_index":1}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","shape":3,"links":[5],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":2,"type":"LoraLoader","pos":[-24.50164031982422,3718.225341796875],"size":[359.7619323730469,126],"flags":{},"order":22,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":3},{"name":"clip","localized_name":"clip","type":"CLIP","link":4}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[7],"slot_index":0},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[6,8],"slot_index":1}],"properties":{"Node name for S&R":"LoraLoader"},"widgets_values":["csbw_cascade_dark_ema.safetensors",1,1]},{"id":4,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[1522.302734375,4481.47900390625],"size":[310.79998779296875,82],"flags":{},"order":0,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[22],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[1536,1536]},{"id":5,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[797.6149291992188,4484.87158203125],"size":[310.79998779296875,82],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[12],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[24,24]},{"id":6,"type":"SharkOptions_UltraCascade_Latent_Beta","pos":[1157.109375,4484.87158203125],"size":[310.79998779296875,82],"flags":{},"order":2,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[17],"slot_index":0}],"properties":{"Node name for S&R":"SharkOptions_UltraCascade_Latent_Beta"},"widgets_values":[36,36]},{"id":7,"type":"UNETLoader","pos":[1149.8580322265625,3582.3779296875],"size":[356.544677734375,82],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[18],"slot_index":0}],"properties":{"Node name for S&R":"UNETLoader"},"widgets_values":["stage_b_lite_CSBW_v1.1.safetensors","default"]},{"id":8,"type":"VAELoader","pos":[1533.0584716796875,3605.814697265625],"size":[294.6280212402344,58],"flags":{},"order":4,"mode":0,"inputs":[],"outputs":[{"name":"VAE","localized_name":"VAE","type":"VAE","links":[2],"slot_index":0}],"properties":{"Node name for S&R":"VAELoader"},"widgets_values":["stage_a_ft_hq.safetensors"]},{"id":10,"type":"UltraCascade_Loader","pos":[-394.08612060546875,3670.32373046875],"size":[345.5117492675781,82.95540618896484],"flags":{},"order":5,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","shape":3,"links":[3],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_Loader"},"widgets_values":["stage_c_bf16.safetensors","ultrapixel_t2i.safetensors"]},{"id":11,"type":"CLIPTextEncode","pos":[359.33685302734375,3742.75537109375],"size":[351.592529296875,173.00360107421875],"flags":{},"order":24,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":6}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[10,14,19],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["impasto oil painting by Yayoi Kusama and Lisa Frank, thick paint textures, tunning contrasts at night with stylish roughly drawn thick black lines, a nuclear explosion destroying a city, its towering wide glowing nuclear mushroom cloud enveloping the entire skyline, the nuclear fireball lighting up the dark sky"]},{"id":12,"type":"UltraCascade_PerturbedAttentionGuidance","pos":[361.78070068359375,3621.58740234375],"size":[344.3999938964844,58],"flags":{},"order":23,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":7}],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":[9,13],"slot_index":0}],"properties":{"Node name for S&R":"UltraCascade_PerturbedAttentionGuidance"},"widgets_values":[3]},{"id":13,"type":"CLIPTextEncode","pos":[355.95135498046875,3972.858154296875],"size":[356.2470703125,110.6326904296875],"flags":{},"order":25,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":8}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[11,15,20],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["low quality, bad quality, low detail, blurry, unsharp"]},{"id":14,"type":"ClownsharKSampler_Beta","pos":[796.9224243164062,3725.34375],"size":[311.41375732421875,693.9824829101562],"flags":{},"order":26,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":9},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":10},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":11},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":12},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[16],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,1,"fixed","standard",true]},{"id":16,"type":"ClownsharKSampler_Beta","pos":[1522.29052734375,3722.670654296875],"size":[309.2452087402344,691.814208984375],"flags":{},"order":28,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":18},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":19},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":20},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":21},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":22},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,1,-1,"fixed","standard",true]},{"id":9,"type":"CLIPLoader","pos":[-394.50164794921875,3810.115478515625],"size":[344.635498046875,98],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":[4],"slot_index":0}],"properties":{"Node name for S&R":"CLIPLoader"},"widgets_values":["cascade_text_encoder.safetensors","stable_cascade","default"]},{"id":15,"type":"ClownsharKSampler_Beta","pos":[1155.5926513671875,3724.48974609375],"size":[314.421142578125,693.9824829101562],"flags":{},"order":27,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":13},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":14},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":15},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":16},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":17},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[21],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"exponential/res_3s","beta57",30,-1,1,5.5,100,"fixed","standard",true]},{"id":20,"type":"Note","pos":[1150,4640],"size":[331.63720703125,415.29815673828125],"flags":{},"order":7,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage UP: a patched version of Stable Cascade stage C (\"UltraPixel\"). \n\nThe key with these dimensions is to keep the aspect ratio the same as the stage C latent. Typically, best results are with a 1.5x upscale. 2.0x works, but will result in somewhat more issues with doubling, and can be a lot slower. However, the detail level will also be very high.\n\nSome viable resolutions are listed below. Asterisks signify ones that have been verified to work particularly well.\n\n32x32\n36x36 **\n40x40\n42x42\n48x48 *\n\n40x24\n50x30\n60x36 **\n70x42\n80x48 *\n\n72x36 \n80x40 *\n96x48 (very slow!)\n\n\n\n"],"color":"#432","bgcolor":"#653"},{"id":21,"type":"Note","pos":[1520,4640],"size":[331.63720703125,415.29815673828125],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage B: the Stable Cascade superresolution model.\n\nAs with stage UP, the key with these dimensions is to keep the aspect ratio the same as the prior latents. Theoretically, any resolution may be used, though some odd distortions can occur when the ideal upscale ratio is not used. It's not entirely clear what those ratios are, so some experimentation may be necessary. \n\nSome resolutions that work particularly well are:\n\n1536x1536 *\n2048x2048 *\n\n1600x960\n2560x1536 **\n2880x1792 *\n3200x1920\n\nIf you use stage B lite, you can hit 4k resolutions without even using more than 12GB of VRAM.\n\nIt's highly recommended to use the CSBW finetune of stage B, as it fixes many of the severe artifact problems the original release had.\n\nNote: CFG is not needed for this stage!"],"color":"#432","bgcolor":"#653"},{"id":19,"type":"Note","pos":[780,4640],"size":[331.63720703125,415.29815673828125],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Stage C: the original Stable Cascade version. \n\nStable Cascade latents are actually quite small: typically, a 1024x1024 image will be generated from a stage C latent that is only 24x24 (for comparison, with SDXL or SD1.5, the dimensions are 128x128). \n\n\"Compression\" is just a shorthand method of determining these dimensions, such as 24x24 (1024 / 42 = 24.38, which means a \"compression\" of 42).\n\nThis poses a problem though: Cascade was only trained on a handful of resolutions. The difference between 24x24 and 25x25 is a significant drop in quality and coherence. Therefore, it is best to just set these dimensions directly.\n\nThe best trained resolutions are:\n\n24x24 > 32x32\n30x16 > 40x24 \n\n48x24 also works, but seems to result in more doubling problems than the others.\n\n\n"],"color":"#432","bgcolor":"#653"},{"id":23,"type":"Note","pos":[-1140,3810],"size":[715.61083984375,89.37511444091797],"flags":{},"order":10,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Any clip G will do. The Cascade version is available at:\n\nhttps://huggingface.co/stabilityai/stable-cascade/blob/main/text_encoder/model.bf16.safetensors\n\n"],"color":"#432","bgcolor":"#653"},{"id":22,"type":"Note","pos":[-1140,3590],"size":[717.709228515625,165.61032104492188],"flags":{},"order":11,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["I recommend the BF16 version of stage C. There is no visible difference vs. the full precision weights, and it halves the disk space requirements.\n\nhttps://huggingface.co/stabilityai/stable-cascade/blob/main/stage_c_bf16.safetensors\n\nIMPORTANT: The original UltraPixel \"safetensors\" is not a safetensors at all - it is a PICKLE, where they lazily (at best) changed the file extension to \".safetensors\"!\n\nI converted it to a real safetensors file, and it's available below:\n\nhttps://huggingface.co/ClownsharkBatwing/ultrapixel_convert/blob/main/ultrapixel_t2i.safetensors"],"color":"#432","bgcolor":"#653"},{"id":26,"type":"Note","pos":[570,3250],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":12,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is a checkpoint that, for convenience, includes the stage B lite CSBW finetune, clip G, and stage A (the FT_HQ finetune).\n\nhttps://huggingface.co/ClownsharkBatwing/CSBW_Style/blob/main/cascade_B-lite_refined_CSBW_v1.1.safetensors"],"color":"#432","bgcolor":"#653"},{"id":27,"type":"Note","pos":[1050,3420],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":13,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is the stage B lite CSBW finetune (model only).\n\nhttps://huggingface.co/ClownsharkBatwing/Cascade_Stage_B_CSBW_Refined/blob/main/stage_b_lite_CSBW_v1.1.safetensors"],"color":"#432","bgcolor":"#653"},{"id":25,"type":"Note","pos":[305.43292236328125,3455.5634765625],"size":[457.5304870605469,94.27093505859375],"flags":{},"order":14,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Perturbed attention guidance (PAG) makes an enormous difference with Stable Cascade stages C and UP. Like CFG, it will double the runtime."],"color":"#432","bgcolor":"#653"},{"id":29,"type":"Note","pos":[1534.365478515625,3422.38427734375],"size":[547.0546875,91.47331237792969],"flags":{},"order":15,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["This is a finetune of stage A. You will get a sharper image, but in images with large white areas, small circular grey halos are sometimes visible.\n\nhttps://huggingface.co/madebyollin/stage-a-ft-hq/blob/main/stage_a_ft_hq.safetensors"],"color":"#432","bgcolor":"#653"},{"id":28,"type":"CheckpointLoaderSimple","pos":[1054.370849609375,3250],"size":[452.7829895019531,102.89583587646484],"flags":{},"order":16,"mode":0,"inputs":[],"outputs":[{"name":"MODEL","localized_name":"MODEL","type":"MODEL","links":null},{"name":"CLIP","localized_name":"CLIP","type":"CLIP","links":null},{"name":"VAE","localized_name":"VAE","type":"VAE","links":null}],"properties":{"Node name for S&R":"CheckpointLoaderSimple"},"widgets_values":["cascade_B-lite_refined_CSBW_v1.1.safetensors"]},{"id":24,"type":"Note","pos":[-1140,3960],"size":[715.61083984375,113.57872772216797],"flags":{},"order":17,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["The LORA was trained with OneTrainer (https://github.com/Nerogar/OneTrainer) on some of my own SDXL generations. It has deep colors and is strong with wacky paint, illustration, and vector art styles. \n\nCascade learns extremely quickly and is very adept with artistic styles (it knows many artist names).\n\nhttps://huggingface.co/ClownsharkBatwing/CSBW_Style/blob/main/csbw_cascade_dark_ema.safetensors\n"],"color":"#432","bgcolor":"#653"},{"id":30,"type":"Note","pos":[796.0823364257812,3575.965576171875],"size":[315.20135498046875,88],"flags":{},"order":18,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["res_3s can be replaced with res_2s or even res_2m or res_3m (in the multistep folder in the sampler_name dropdown) if more speed is desired."],"color":"#432","bgcolor":"#653"},{"id":33,"type":"Note","pos":[-220,4190],"size":[336.9422302246094,88],"flags":{},"order":19,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["TIP: Try connecting the options nodes to the right to some of the samplers. It'll replace the default noise types with perlin, which can be quite good with Cascade."],"color":"#432","bgcolor":"#653"},{"id":31,"type":"SharkOptions_Beta","pos":[150,4190],"size":[234.2189178466797,130],"flags":{},"order":20,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"SharkOptions_Beta"},"widgets_values":["perlin",1,1,false]},{"id":32,"type":"ClownOptions_SDE_Beta","pos":[420,4190],"size":[281.34088134765625,266],"flags":{},"order":21,"mode":0,"inputs":[{"name":"etas","localized_name":"etas","type":"SIGMAS","shape":7,"link":null},{"name":"etas_substep","localized_name":"etas_substep","type":"SIGMAS","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownOptions_SDE_Beta"},"widgets_values":["perlin","perlin","hard","hard",0.5,0.5,-1,"fixed"]},{"id":3,"type":"SaveImage","pos":[1871.823974609375,3716.926025390625],"size":[670.7464599609375,700.1661987304688],"flags":{},"order":30,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":5}],"outputs":[],"properties":{},"widgets_values":["ComfyUI"]}],"links":[[1,16,0,1,0,"LATENT"],[2,8,0,1,1,"VAE"],[3,10,0,2,0,"MODEL"],[4,9,0,2,1,"CLIP"],[5,1,0,3,0,"IMAGE"],[6,2,1,11,0,"CLIP"],[7,2,0,12,0,"MODEL"],[8,2,1,13,0,"CLIP"],[9,12,0,14,0,"MODEL"],[10,11,0,14,1,"CONDITIONING"],[11,13,0,14,2,"CONDITIONING"],[12,5,0,14,6,"OPTIONS"],[13,12,0,15,0,"MODEL"],[14,11,0,15,1,"CONDITIONING"],[15,13,0,15,2,"CONDITIONING"],[16,14,0,15,3,"LATENT"],[17,6,0,15,6,"OPTIONS"],[18,7,0,16,0,"MODEL"],[19,11,0,16,1,"CONDITIONING"],[20,13,0,16,2,"CONDITIONING"],[21,15,0,16,3,"LATENT"],[22,4,0,16,6,"OPTIONS"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.2100000000000006,"offset":[2786.903339088035,-3170.107825364122]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/wan img2vid 720p (fp8 fast).json ================================================ {"last_node_id":67,"last_link_id":138,"nodes":[{"id":56,"type":"PreviewImage","pos":[480,600],"size":[210,246],"flags":{},"order":8,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":118}],"outputs":[],"properties":{"Node name for S&R":"PreviewImage"},"widgets_values":[]},{"id":8,"type":"VAEDecode","pos":[1140,80],"size":[210,46],"flags":{},"order":12,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":121},{"name":"vae","localized_name":"vae","type":"VAE","link":137}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[56],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]},{"id":6,"type":"CLIPTextEncode","pos":[30,20],"size":[422.84503173828125,164.31304931640625],"flags":{},"order":6,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":134}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[97],"slot_index":0}],"title":"CLIP Text Encode (Positive Prompt)","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["trump and putin kissing, two men in love making out"],"color":"#232","bgcolor":"#353"},{"id":61,"type":"LoadImage","pos":[-169.0706024169922,588.6607666015625],"size":[315,314],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[128],"slot_index":0},{"name":"MASK","localized_name":"MASK","type":"MASK","links":null}],"properties":{"Node name for S&R":"LoadImage"},"widgets_values":["pasted/image (371).png","image"]},{"id":55,"type":"ImageResize+","pos":[190.57818603515625,590.173583984375],"size":[251.91366577148438,218],"flags":{},"order":4,"mode":0,"inputs":[{"name":"image","localized_name":"image","type":"IMAGE","link":128}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[118,119,120],"slot_index":0},{"name":"width","localized_name":"width","type":"INT","links":null},{"name":"height","localized_name":"height","type":"INT","links":null}],"properties":{"Node name for S&R":"ImageResize+"},"widgets_values":[1280,720,"nearest","fill / crop","always",0]},{"id":51,"type":"CLIPVisionEncode","pos":[191.15573120117188,457.861572265625],"size":[253.60000610351562,78],"flags":{},"order":9,"mode":0,"inputs":[{"name":"clip_vision","localized_name":"clip_vision","type":"CLIP_VISION","link":94},{"name":"image","localized_name":"image","type":"IMAGE","link":120}],"outputs":[{"name":"CLIP_VISION_OUTPUT","localized_name":"CLIP_VISION_OUTPUT","type":"CLIP_VISION_OUTPUT","links":[107],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionEncode"},"widgets_values":["none"]},{"id":7,"type":"CLIPTextEncode","pos":[29.393102645874023,230.72264099121094],"size":[425.27801513671875,180.6060791015625],"flags":{},"order":7,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":135}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[98],"slot_index":0}],"title":"CLIP Text Encode (Negative Prompt)","properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["Overexposure, static, blurred details, subtitles, paintings, pictures, still, overall gray, worst quality, low quality, JPEG compression residue, ugly, mutilated, redundant fingers, poorly painted hands, poorly painted faces, deformed, disfigured, deformed limbs, fused fingers, cluttered background, three legs, a lot of people in the background, upside down"],"color":"#322","bgcolor":"#533"},{"id":49,"type":"CLIPVisionLoader","pos":[-169.1327362060547,459.3064880371094],"size":[315,58],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"CLIP_VISION","localized_name":"CLIP_VISION","type":"CLIP_VISION","links":[94],"slot_index":0}],"properties":{"Node name for S&R":"CLIPVisionLoader"},"widgets_values":["clip_vision_vit_h.safetensors"]},{"id":66,"type":"ClownModelLoader","pos":[-330.852294921875,28.57785415649414],"size":[315,266],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[138],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[134,135],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[136,137],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors","fp8_e4m3fn_fast","umt5_xxl_fp8_e4m3fn_scaled.safetensors",".none",".none",".none","wan","wan_2.1_vae.safetensors"]},{"id":54,"type":"ClownsharKSampler_Beta","pos":[780,190],"size":[337.16485595703125,661.9249267578125],"flags":{},"order":11,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":114},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":115},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":113},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[121],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_2m","beta57",30,-1,1,5.5,0,"fixed","standard",true]},{"id":65,"type":"TorchCompileModels","pos":[479.10052490234375,-32.837005615234375],"size":[273.09326171875,178],"flags":{},"order":5,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":138}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[],"slot_index":0}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":50,"type":"WanImageToVideo","pos":[478.8801574707031,204.63995361328125],"size":[269.6244201660156,210],"flags":{},"order":10,"mode":0,"inputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","link":97},{"name":"negative","localized_name":"negative","type":"CONDITIONING","link":98},{"name":"vae","localized_name":"vae","type":"VAE","link":136},{"name":"clip_vision_output","localized_name":"clip_vision_output","type":"CLIP_VISION_OUTPUT","shape":7,"link":107},{"name":"start_image","localized_name":"start_image","type":"IMAGE","shape":7,"link":119}],"outputs":[{"name":"positive","localized_name":"positive","type":"CONDITIONING","links":[114],"slot_index":0},{"name":"negative","localized_name":"negative","type":"CONDITIONING","links":[115],"slot_index":1},{"name":"latent","localized_name":"latent","type":"LATENT","links":[113],"slot_index":2}],"properties":{"Node name for S&R":"WanImageToVideo"},"widgets_values":[1280,720,33,1]},{"id":28,"type":"SaveAnimatedWEBP","pos":[1140,190],"size":[595.4246215820312,665.2847290039062],"flags":{},"order":13,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":56}],"outputs":[],"properties":{},"widgets_values":["ComfyUI",16,false,100,"default"]},{"id":67,"type":"Note","pos":[208.15240478515625,-120.98509979248047],"size":[244.7659149169922,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["TorchCompileModels may not work on older GPUs. After the first run, should lead to significant time savings with GPUs such as the 4090."],"color":"#432","bgcolor":"#653"}],"links":[[56,8,0,28,0,"IMAGE"],[94,49,0,51,0,"CLIP_VISION"],[97,6,0,50,0,"CONDITIONING"],[98,7,0,50,1,"CONDITIONING"],[107,51,0,50,3,"CLIP_VISION_OUTPUT"],[113,50,2,54,3,"LATENT"],[114,50,0,54,1,"CONDITIONING"],[115,50,1,54,2,"CONDITIONING"],[118,55,0,56,0,"IMAGE"],[119,55,0,50,4,"IMAGE"],[120,55,0,51,1,"IMAGE"],[121,54,0,8,0,"LATENT"],[128,61,0,55,0,"IMAGE"],[134,66,1,6,0,"CLIP"],[135,66,1,7,0,"CLIP"],[136,66,2,50,2,"VAE"],[137,66,2,8,1,"VAE"],[138,66,0,65,0,"MODEL"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.6105100000000012,"offset":[2635.71214060565,417.84191139269006]},"VHS_latentpreview":false,"VHS_latentpreviewrate":0},"version":0.4} ================================================ FILE: example_workflows/wan txt2img (fp8 fast).json ================================================ {"last_node_id":698,"last_link_id":1748,"nodes":[{"id":676,"type":"CLIPTextEncode","pos":[2651.457763671875,139.2773895263672],"size":[311.1542663574219,134.35691833496094],"flags":{},"order":4,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1745}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1743],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["a woman picks up a coffee cup and smiles, then suddenly throws it out the window in her dirty apartment"]},{"id":7,"type":"CLIPTextEncode","pos":[2650.5888671875,336.779296875],"size":[310.6131286621094,150.69346618652344],"flags":{},"order":5,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1746}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[1630],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"]},{"id":666,"type":"EmptyHunyuanLatentVideo","pos":[2751.183349609375,552.1126708984375],"size":[210,130],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"LATENT","localized_name":"LATENT","type":"LATENT","links":[1631,1741],"slot_index":0}],"properties":{"Node name for S&R":"EmptyHunyuanLatentVideo"},"widgets_values":[480,480,65,1]},{"id":696,"type":"ClownModelLoader","pos":[2220,340],"size":[382.9175109863281,266],"flags":{},"order":1,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1744],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1745,1746],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1747],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["wan2.1_t2v_14B_fp8_e4m3fn.safetensors","fp8_e4m3fn_fast","umt5_xxl_fp8_e4m3fn_scaled.safetensors",".none",".none",".none","wan","wan_2.1_vae.safetensors"]},{"id":698,"type":"Note","pos":[2347.1943359375,-37.566280364990234],"size":[244.7659149169922,88],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["TorchCompileModels may not work on older GPUs. After the first run, should lead to significant time savings with GPUs such as the 4090."],"color":"#432","bgcolor":"#653"},{"id":346,"type":"ModelSamplingAdvancedResolution","pos":[2340,140],"size":[260.3999938964844,126],"flags":{},"order":3,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1744},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1741}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1684,1748],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":665,"type":"ClownsharKSampler_Beta","pos":[3010,140],"size":[310.3046875,656.2719116210938],"flags":{},"order":7,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1684},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":1743},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":1630},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1631},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1643],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0.5,"multistep/res_3m","beta57",20,-1,1,5.5,896816,"fixed","standard",true]},{"id":667,"type":"SaveAnimatedWEBP","pos":[3360,140],"size":[315,366],"flags":{},"order":9,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":1632}],"outputs":[],"properties":{},"widgets_values":["ComfyUI",16,false,100,"default"]},{"id":697,"type":"TorchCompileModels","pos":[2673.776611328125,-98.98099517822266],"size":[260.8105163574219,178],"flags":{},"order":6,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1748}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":668,"type":"VAEDecode","pos":[3359.884521484375,32.89006805419922],"size":[210,46],"flags":{},"order":8,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1643},{"name":"vae","localized_name":"vae","type":"VAE","link":1747}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1632],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]}],"links":[[1630,7,0,665,2,"CONDITIONING"],[1631,666,0,665,3,"LATENT"],[1632,668,0,667,0,"IMAGE"],[1643,665,0,668,0,"LATENT"],[1684,346,0,665,0,"MODEL"],[1741,666,0,346,1,"LATENT"],[1743,676,0,665,1,"CONDITIONING"],[1744,696,0,346,0,"MODEL"],[1745,696,1,676,0,"CLIP"],[1746,696,1,7,0,"CLIP"],[1747,696,2,668,1,"VAE"],[1748,346,0,697,0,"MODEL"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.6105100000000008,"offset":[-558.9420074905141,402.3405679733133]},"node_versions":{"comfy-core":"0.3.26","comfyui_controlnet_aux":"1e9eac6377c882da8bb360c7544607036904362c","ComfyUI-VideoHelperSuite":"c36626c6028faca912eafcedbc71f1d342fb4d2a"},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: example_workflows/wan vid2vid.json ================================================ {"last_node_id":406,"last_link_id":1039,"nodes":[{"id":7,"type":"CLIPTextEncode","pos":[971.2105712890625,537.63671875],"size":[436.48480224609375,118.3749771118164],"flags":{},"order":11,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1017}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[832],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"]},{"id":346,"type":"ModelSamplingAdvancedResolution","pos":[1152.6932373046875,133.92713928222656],"size":[260.3999938964844,126],"flags":{},"order":13,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1018},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","link":1027}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1010,1011],"slot_index":0}],"properties":{"Node name for S&R":"ModelSamplingAdvancedResolution"},"widgets_values":["exponential",1.35,0.85]},{"id":391,"type":"TorchCompileModels","pos":[1438.64501953125,80.51760864257812],"size":[258.1737365722656,178],"flags":{},"order":14,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","link":1010}],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":null}],"properties":{"Node name for S&R":"TorchCompileModels"},"widgets_values":["inductor",false,"default",false,64,0]},{"id":365,"type":"SaveAnimatedWEBP","pos":[2500,310],"size":[315,366],"flags":{},"order":19,"mode":0,"inputs":[{"name":"images","localized_name":"images","type":"IMAGE","link":945}],"outputs":[],"properties":{},"widgets_values":["ComfyUI",16,false,100,"default",""]},{"id":393,"type":"ClownModelLoader","pos":[626.4608154296875,313.0701904296875],"size":[315,266],"flags":{},"order":0,"mode":0,"inputs":[],"outputs":[{"name":"model","localized_name":"model","type":"MODEL","links":[1018],"slot_index":0},{"name":"clip","localized_name":"clip","type":"CLIP","links":[1016,1017],"slot_index":1},{"name":"vae","localized_name":"vae","type":"VAE","links":[1012,1013],"slot_index":2}],"properties":{"Node name for S&R":"ClownModelLoader"},"widgets_values":["wan2.1_t2v_14B_fp8_e4m3fn.safetensors","fp8_e4m3fn","umt5_xxl_fp8_e4m3fn_scaled.safetensors",".none",".none",".none","wan","wan_2.1_vae.safetensors"]},{"id":394,"type":"ClownsharkChainsampler_Beta","pos":[1799.4302978515625,313.5021667480469],"size":[315,530],"flags":{},"order":16,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1028},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":1029},{"name":"options 2","type":"OPTIONS","link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1033],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",1,5.5,"resample",true]},{"id":324,"type":"ClownsharKSampler_Beta","pos":[1433.78466796875,314.1369934082031],"size":[337.03857421875,670],"flags":{},"order":15,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":1011},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":997},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":832},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1026},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1028],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharKSampler_Beta"},"widgets_values":[0,"multistep/res_2m","beta57",20,12,1,1,0,"fixed","unsample",true]},{"id":395,"type":"ClownOptions_Cycles_Beta","pos":[1843.936767578125,125.13945007324219],"size":[210,130],"flags":{},"order":1,"mode":0,"inputs":[{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"options","localized_name":"options","type":"OPTIONS","links":[1029],"slot_index":0}],"properties":{"Node name for S&R":"ClownOptions_Cycles_Beta"},"widgets_values":[10,1,0.5,5.5]},{"id":6,"type":"CLIPTextEncode","pos":[966.9983520507812,314.1016540527344],"size":[447.32421875,169.55857849121094],"flags":{},"order":10,"mode":0,"inputs":[{"name":"clip","localized_name":"clip","type":"CLIP","link":1016}],"outputs":[{"name":"CONDITIONING","localized_name":"CONDITIONING","type":"CONDITIONING","links":[997],"slot_index":0}],"properties":{"Node name for S&R":"CLIPTextEncode"},"widgets_values":["A pretty black woman with thick gorgeous hair walks slowly through a tall, modern colonnade of concrete and glass, cradling a sleek silver laptop under her arm. She wears a sand-colored coat with a high collar and sharp tailoring, the buttons neatly fastened, exuding a quiet, focused confidence. Her complexion is porcelain-smooth, lightly touched by the soft overcast light that filters down through the glass canopy. Dark, straight hair is neatly parted and tucked behind one ear, moving ever so slightly as she walks. Her expression is thoughtful, eyes cast downward in introspection, lips gently pressed into a faint, unreadable line.\n\nThe camera begins off-center, panning slowly to align with the corridor’s clean architectural symmetry. Repeating vertical columns frame her movement, creating a visual rhythm that guides the viewer’s eye toward the vanishing point ahead. As she walks, she shifts just slightly to the side, a natural adjustment that causes the fabric of her coat to pull gently at the seams, adding a subtle sense of motion.\n\nReflections drift along the windows beside her — faint, soft, and ghostlike. The ambient light is cool and diffused, lending the scene a contemplative, almost suspended feeling. Her presence is calm, deliberate, as though she’s carrying not just the laptop, but something unspoken — a sense of purpose shaped quietly in her mind."]},{"id":397,"type":"Note","pos":[639.0505981445312,128.31825256347656],"size":[301.3404235839844,112.45540618896484],"flags":{},"order":2,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Sometimes the first frame looks noisy with WAN. You can either throw it away, use more steps, use a more accurate sampler (2s > 2m, 3s > 2s), or ensure you aren't using a \"fast\" mode for the weights, such as fp8_e4m3fn_fast, which results in a significant hit to quality."],"color":"#432","bgcolor":"#653"},{"id":398,"type":"Note","pos":[1451.546142578125,1060.8258056640625],"size":[295.7769470214844,88],"flags":{},"order":3,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["More \"steps_to_run\" will increase the amount of denoise. Values between 12 and 15 are a good place to start.\n"],"color":"#432","bgcolor":"#653"},{"id":403,"type":"Frames Slice Latent","pos":[555.5986938476562,1154.378173828125],"size":[210,82],"flags":{},"order":4,"mode":0,"inputs":[{"name":"frames","localized_name":"frames","type":"LATENT","link":null}],"outputs":[{"name":"latent","localized_name":"latent","type":"LATENT","links":null}],"properties":{"Node name for S&R":"Frames Slice Latent"},"widgets_values":[0,1]},{"id":402,"type":"Frames Slice","pos":[555.5987548828125,990.6537475585938],"size":[210,82],"flags":{},"order":5,"mode":0,"inputs":[{"name":"frames","localized_name":"frames","type":"IMAGE","link":null}],"outputs":[{"name":"image","localized_name":"image","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"Frames Slice"},"widgets_values":[0,1]},{"id":401,"type":"Note","pos":[550.497802734375,712.9439086914062],"size":[239.34762573242188,200.06405639648438],"flags":{},"order":6,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Can use anything that will load a video as a sequence of frames. The core node \"Load Image\" will work in place of this one, if you are loading an animated .webp.\n\nThis node allows you to set the number of frames loaded.\n\nThe nodes below will also allow you to pick and choose ranges of frames. Be sure to use Image Preview to verify you're picking the ones you want!"],"color":"#432","bgcolor":"#653"},{"id":316,"type":"VHS_LoadVideo","pos":[808.8834228515625,711.6345825195312],"size":[319.19403076171875,808.9393920898438],"flags":{},"order":7,"mode":0,"inputs":[{"name":"meta_batch","localized_name":"meta_batch","type":"VHS_BatchManager","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":null}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[1014],"slot_index":0},{"name":"frame_count","localized_name":"frame_count","type":"INT","links":null},{"name":"audio","localized_name":"audio","type":"AUDIO","links":null},{"name":"video_info","localized_name":"video_info","type":"VHS_VIDEOINFO","links":null}],"properties":{"Node name for S&R":"VHS_LoadVideo"},"widgets_values":{"video":"3206567-hd_1080_1920_25fps.mp4","force_rate":0,"force_size":"Disabled","custom_width":512,"custom_height":512,"frame_load_cap":35,"skip_first_frames":0,"select_every_nth":1,"choose video to upload":"image","videopreview":{"hidden":false,"paused":false,"params":{"force_rate":0,"frame_load_cap":35,"skip_first_frames":0,"select_every_nth":1,"filename":"3206567-hd_1080_1920_25fps.mp4","type":"input","format":"video/mp4"},"muted":false}}},{"id":392,"type":"VAEEncodeAdvanced","pos":[1157.1488037109375,712.4218139648438],"size":[244.18490600585938,278],"flags":{},"order":12,"mode":0,"inputs":[{"name":"image_1","localized_name":"image_1","type":"IMAGE","shape":7,"link":1014},{"name":"image_2","localized_name":"image_2","type":"IMAGE","shape":7,"link":null},{"name":"mask","localized_name":"mask","type":"IMAGE","shape":7,"link":null},{"name":"latent","localized_name":"latent","type":"LATENT","shape":7,"link":null},{"name":"vae","localized_name":"vae","type":"VAE","shape":7,"link":1013}],"outputs":[{"name":"latent_1","localized_name":"latent_1","type":"LATENT","links":[1026,1027],"slot_index":0},{"name":"latent_2","localized_name":"latent_2","type":"LATENT","links":null},{"name":"mask","localized_name":"mask","type":"MASK","links":null},{"name":"empty_latent","localized_name":"empty_latent","type":"LATENT","links":[],"slot_index":3},{"name":"width","localized_name":"width","type":"INT","links":[],"slot_index":4},{"name":"height","localized_name":"height","type":"INT","links":[],"slot_index":5}],"properties":{"Node name for S&R":"VAEEncodeAdvanced"},"widgets_values":["false",368,640,"red",false,"16_channels"]},{"id":396,"type":"ClownsharkChainsampler_Beta","pos":[2146.74755859375,313.50225830078125],"size":[315,510],"flags":{},"order":17,"mode":0,"inputs":[{"name":"model","localized_name":"model","type":"MODEL","shape":7,"link":null},{"name":"positive","localized_name":"positive","type":"CONDITIONING","shape":7,"link":null},{"name":"negative","localized_name":"negative","type":"CONDITIONING","shape":7,"link":null},{"name":"sigmas","localized_name":"sigmas","type":"SIGMAS","shape":7,"link":null},{"name":"latent_image","localized_name":"latent_image","type":"LATENT","shape":7,"link":1033},{"name":"guides","localized_name":"guides","type":"GUIDES","shape":7,"link":null},{"name":"options","localized_name":"options","type":"OPTIONS","shape":7,"link":null}],"outputs":[{"name":"output","localized_name":"output","type":"LATENT","links":[1035],"slot_index":0},{"name":"denoised","localized_name":"denoised","type":"LATENT","links":null},{"name":"options","localized_name":"options","type":"OPTIONS","links":null}],"properties":{"Node name for S&R":"ClownsharkChainsampler_Beta"},"widgets_values":[0.5,"exponential/res_2s",-1,5.5,"resample",true]},{"id":400,"type":"Note","pos":[2090.21728515625,70.1985855102539],"size":[324.38916015625,177.81007385253906],"flags":{},"order":8,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Each full cycle reruns the node twice:\n\nresample -> unsample -> resample -> ... \n\nHigher values will change the video more.\n\nres_2m and 3m will preserve more of the initial structure. Res_2s and especially 3s will result in more dramatic change.\n\nIf you use more steps_to_run in ClownsharKSampler, you'll need fewer cycles here."],"color":"#432","bgcolor":"#653"},{"id":399,"type":"Note","pos":[2153.567626953125,887.000244140625],"size":[303.7249755859375,88],"flags":{},"order":9,"mode":0,"inputs":[],"outputs":[],"properties":{},"widgets_values":["Using a sampler such as res_2s instead of res_2m in this node can reduce or eliminate first frame noise. It's not always necessary, mileage may vary."],"color":"#432","bgcolor":"#653"},{"id":325,"type":"VAEDecode","pos":[2496.82080078125,203.3095703125],"size":[210,46],"flags":{},"order":18,"mode":0,"inputs":[{"name":"samples","localized_name":"samples","type":"LATENT","link":1035},{"name":"vae","localized_name":"vae","type":"VAE","link":1012}],"outputs":[{"name":"IMAGE","localized_name":"IMAGE","type":"IMAGE","links":[945],"slot_index":0}],"properties":{"Node name for S&R":"VAEDecode"},"widgets_values":[]}],"links":[[832,7,0,324,2,"CONDITIONING"],[945,325,0,365,0,"IMAGE"],[997,6,0,324,1,"CONDITIONING"],[1010,346,0,391,0,"MODEL"],[1011,346,0,324,0,"MODEL"],[1012,393,2,325,1,"VAE"],[1013,393,2,392,4,"VAE"],[1014,316,0,392,0,"IMAGE"],[1016,393,1,6,0,"CLIP"],[1017,393,1,7,0,"CLIP"],[1018,393,0,346,0,"MODEL"],[1026,392,0,324,3,"LATENT"],[1027,392,0,346,1,"LATENT"],[1028,324,0,394,4,"LATENT"],[1029,395,0,394,6,"OPTIONS"],[1033,394,0,396,4,"LATENT"],[1035,396,0,325,0,"LATENT"]],"groups":[],"config":{},"extra":{"ds":{"scale":1.464100000000001,"offset":[1126.1541105871463,24.96236469373386]},"node_versions":{"comfy-core":"0.3.26","comfyui_controlnet_aux":"1e9eac6377c882da8bb360c7544607036904362c","ComfyUI-VideoHelperSuite":"c36626c6028faca912eafcedbc71f1d342fb4d2a"},"VHS_latentpreview":false,"VHS_latentpreviewrate":0,"VHS_MetadataImage":true,"VHS_KeepIntermediate":true},"version":0.4} ================================================ FILE: flux/controlnet.py ================================================ #Original code can be found on: https://github.com/XLabs-AI/x-flux/blob/main/src/flux/controlnet.py #modified to support different types of flux controlnets import torch import math from torch import Tensor, nn from einops import rearrange, repeat from .layers import (DoubleStreamBlock, EmbedND, LastLayer, MLPEmbedder, SingleStreamBlock, timestep_embedding) from .model import Flux import comfy.ldm.common_dit class MistolineCondDownsamplBlock(nn.Module): def __init__(self, dtype=None, device=None, operations=None): super().__init__() self.encoder = nn.Sequential( operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device) ) def forward(self, x): return self.encoder(x) class MistolineControlnetBlock(nn.Module): def __init__(self, hidden_size, dtype=None, device=None, operations=None): super().__init__() self.linear = operations.Linear(hidden_size, hidden_size, dtype=dtype, device=device) self.act = nn.SiLU() def forward(self, x): return self.act(self.linear(x)) class ControlNetFlux(Flux): def __init__(self, latent_input=False, num_union_modes=0, mistoline=False, control_latent_channels=None, image_model=None, dtype=None, device=None, operations=None, **kwargs): super().__init__(final_layer=False, dtype=dtype, device=device, operations=operations, **kwargs) self.main_model_double = 19 self.main_model_single = 38 self.mistoline = mistoline # add ControlNet blocks if self.mistoline: control_block = lambda : MistolineControlnetBlock(self.hidden_size, dtype=dtype, device=device, operations=operations) else: control_block = lambda : operations.Linear(self.hidden_size, self.hidden_size, dtype=dtype, device=device) self.controlnet_blocks = nn.ModuleList([]) for _ in range(self.params.depth): self.controlnet_blocks.append(control_block()) self.controlnet_single_blocks = nn.ModuleList([]) for _ in range(self.params.depth_single_blocks): self.controlnet_single_blocks.append(control_block()) self.num_union_modes = num_union_modes self.controlnet_mode_embedder = None if self.num_union_modes > 0: self.controlnet_mode_embedder = operations.Embedding(self.num_union_modes, self.hidden_size, dtype=dtype, device=device) self.gradient_checkpointing = False self.latent_input = latent_input if control_latent_channels is None: control_latent_channels = self.in_channels else: control_latent_channels *= 2 * 2 #patch size self.pos_embed_input = operations.Linear(control_latent_channels, self.hidden_size, bias=True, dtype=dtype, device=device) if not self.latent_input: if self.mistoline: self.input_cond_block = MistolineCondDownsamplBlock(dtype=dtype, device=device, operations=operations) else: self.input_hint_block = nn.Sequential( operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device) ) def forward_orig( self, img: Tensor, img_ids: Tensor, controlnet_cond: Tensor, txt: Tensor, txt_ids: Tensor, timesteps: Tensor, y: Tensor, guidance: Tensor = None, control_type: Tensor = None, ) -> Tensor: if img.ndim != 3 or txt.ndim != 3: raise ValueError("Input img and txt tensors must have 3 dimensions.") # running on sequences img img = self.img_in(img) controlnet_cond = self.pos_embed_input(controlnet_cond) img = img + controlnet_cond vec = self.time_in(timestep_embedding(timesteps, 256)) if self.params.guidance_embed: vec = vec + self.guidance_in(timestep_embedding(guidance, 256)) vec = vec + self.vector_in(y) txt = self.txt_in(txt) if self.controlnet_mode_embedder is not None and len(control_type) > 0: control_cond = self.controlnet_mode_embedder(torch.tensor(control_type, device=img.device), out_dtype=img.dtype).unsqueeze(0).repeat((txt.shape[0], 1, 1)) txt = torch.cat([control_cond, txt], dim=1) txt_ids = torch.cat([txt_ids[:,:1], txt_ids], dim=1) ids = torch.cat((txt_ids, img_ids), dim=1) pe = self.pe_embedder(ids) controlnet_double = () for i in range(len(self.double_blocks)): img, txt = self.double_blocks[i](img=img, txt=txt, vec=vec, pe=pe) controlnet_double = controlnet_double + (self.controlnet_blocks[i](img),) img = torch.cat((txt, img), 1) controlnet_single = () for i in range(len(self.single_blocks)): img = self.single_blocks[i](img, vec=vec, pe=pe) controlnet_single = controlnet_single + (self.controlnet_single_blocks[i](img[:, txt.shape[1] :, ...]),) repeat = math.ceil(self.main_model_double / len(controlnet_double)) if self.latent_input: out_input = () for x in controlnet_double: out_input += (x,) * repeat else: out_input = (controlnet_double * repeat) out = {"input": out_input[:self.main_model_double]} if len(controlnet_single) > 0: repeat = math.ceil(self.main_model_single / len(controlnet_single)) out_output = () if self.latent_input: for x in controlnet_single: out_output += (x,) * repeat else: out_output = (controlnet_single * repeat) out["output"] = out_output[:self.main_model_single] return out def forward(self, x, timesteps, context, y, guidance=None, hint=None, **kwargs): patch_size = 2 if self.latent_input: hint = comfy.ldm.common_dit.pad_to_patch_size(hint, (patch_size, patch_size)) elif self.mistoline: hint = hint * 2.0 - 1.0 hint = self.input_cond_block(hint) else: hint = hint * 2.0 - 1.0 hint = self.input_hint_block(hint) hint = rearrange(hint, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) bs, c, h, w = x.shape x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size)) img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) h_len = ((h + (patch_size // 2)) // patch_size) w_len = ((w + (patch_size // 2)) // patch_size) img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype) img_ids[..., 1] = img_ids[..., 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype)[:, None] img_ids[..., 2] = img_ids[..., 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype)[None, :] img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype) return self.forward_orig(img, img_ids, hint, context, txt_ids, timesteps, y, guidance, control_type=kwargs.get("control_type", [])) ================================================ FILE: flux/layers.py ================================================ # Adapted from: https://github.com/black-forest-labs/flux import math import torch from torch import Tensor, nn from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar import torch.nn.functional as F import einops from einops import rearrange from torch import Tensor from dataclasses import dataclass from .math import attention, rope, apply_rope import comfy.ldm.common_dit class EmbedND(nn.Module): def __init__(self, dim: int, theta: int, axes_dim: list): super().__init__() self.dim = dim self.theta = theta self.axes_dim = axes_dim def forward(self, ids: Tensor) -> Tensor: n_axes = ids.shape[-1] emb = torch.cat( [rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(n_axes)], dim=-3, ) return emb.unsqueeze(1) def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 1000.0): """ Create sinusoidal timestep embeddings. :param t: a 1-D Tensor of N indices, one per batch element. These may be fractional. :param dim: the dimension of the output. :param max_period: controls the minimum frequency of the embeddings. :return: an (N, D) Tensor of positional embeddings. """ t = time_factor * t half = dim // 2 freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=t.device) / half) args = t[:, None].float() * freqs[None] embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) if dim % 2: embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) if torch.is_floating_point(t): embedding = embedding.to(t) return embedding class MLPEmbedder(nn.Module): def __init__(self, in_dim: int, hidden_dim: int, dtype=None, device=None, operations=None): super().__init__() self.in_layer = operations.Linear( in_dim, hidden_dim, bias=True, dtype=dtype, device=device) self.silu = nn.SiLU() self.out_layer = operations.Linear(hidden_dim, hidden_dim, bias=True, dtype=dtype, device=device) def forward(self, x: Tensor) -> Tensor: return self.out_layer(self.silu(self.in_layer(x))) class RMSNorm(torch.nn.Module): def __init__(self, dim: int, dtype=None, device=None, operations=None): super().__init__() self.scale = nn.Parameter(torch.empty((dim), dtype=dtype, device=device)) # self.scale.shape = 128 def forward(self, x: Tensor): return comfy.ldm.common_dit.rms_norm(x, self.scale, 1e-6) class QKNorm(torch.nn.Module): def __init__(self, dim: int, dtype=None, device=None, operations=None): super().__init__() self.query_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations) self.key_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations) def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple: q = self.query_norm(q) k = self.key_norm(k) return q.to(v), k.to(v) class SelfAttention(nn.Module): def __init__(self, dim: int, num_heads: int = 8, qkv_bias: bool = False, dtype=None, device=None, operations=None): super().__init__() self.num_heads = num_heads # 24 head_dim = dim // num_heads # 128 = 3072 / 24 self.qkv = operations.Linear(dim, dim * 3, bias=qkv_bias, dtype=dtype, device=device) self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations) self.proj = operations.Linear(dim, dim, dtype=dtype, device=device) # dim is usually 3072 @dataclass class ModulationOut: shift: Tensor scale: Tensor gate: Tensor class Modulation(nn.Module): def __init__(self, dim: int, double: bool, dtype=None, device=None, operations=None): super().__init__() self.is_double = double self.multiplier = 6 if double else 3 self.lin = operations.Linear(dim, self.multiplier * dim, bias=True, dtype=dtype, device=device) def forward(self, vec: Tensor) -> tuple: out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(self.multiplier, dim=-1) return (ModulationOut(*out[:3]), ModulationOut(*out[3:]) if self.is_double else None,) class DoubleStreamBlock(nn.Module): def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, dtype=None, device=None, operations=None, idx=-1): super().__init__() self.idx = idx mlp_hidden_dim = int(hidden_size * mlp_ratio) self.num_heads = num_heads self.hidden_size = hidden_size self.img_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations) # in_features=3072, out_features=18432 (3072*6) self.txt_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations) # in_features=3072, out_features=18432 (3072*6) self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) # .qkv: in_features=3072, out_features=9216 .proj: 3072,3072 self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) # .qkv: in_features=3072, out_features=9216 .proj: 3072,3072 self.img_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.txt_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.img_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.txt_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.img_mlp = nn.Sequential( operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device), nn.GELU(approximate="tanh"), operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device), ) # 3072->12288, 12288->3072 (3072*4) self.txt_mlp = nn.Sequential( operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device), nn.GELU(approximate="tanh"), operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device), ) # 3072->12288, 12288->3072 (3072*4) def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, mask=None, idx=0, update_cross_attn=None, style_block=None) -> Tuple[Tensor, Tensor]: # vec 1,3072 # vec 1,3072 #mask.shape 4608,4608 #img_attn.shape 1,4096,3072 txt_attn.shape 1,512,3072 img_len = img.shape[-2] txt_len = txt.shape[-2] img_mod1, img_mod2 = self.img_mod(vec) # -> 3072, 3072 txt_mod1, txt_mod2 = self.txt_mod(vec) img_norm = self.img_norm1(img) txt_norm = self.txt_norm1(txt) img_norm = style_block.img(img_norm, "attn_norm") txt_norm = style_block.txt(txt_norm, "attn_norm") img_norm = img_norm * (1+img_mod1.scale) + img_mod1.shift txt_norm = txt_norm * (1+txt_mod1.scale) + txt_mod1.shift img_norm = style_block.img(img_norm, "attn_norm_mod") txt_norm = style_block.txt(txt_norm, "attn_norm_mod") ### ATTN ### img_qkv = self.img_attn.qkv(img_norm) img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) img_q = style_block.img.ATTN(img_q, "q_proj") img_k = style_block.img.ATTN(img_k, "k_proj") img_v = style_block.img.ATTN(img_v, "v_proj") img_q, img_k = self.img_attn.norm(img_q, img_k, img_v) img_q = style_block.img.ATTN(img_q, "q_norm") img_k = style_block.img.ATTN(img_k, "k_norm") txt_qkv = self.txt_attn.qkv(txt_norm) txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) txt_q = style_block.txt.ATTN(txt_q, "q_proj") txt_k = style_block.txt.ATTN(txt_k, "k_proj") txt_v = style_block.txt.ATTN(txt_v, "v_proj") txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v) txt_q = style_block.txt.ATTN(txt_q, "q_norm") txt_k = style_block.txt.ATTN(txt_k, "k_norm") q, k, v = torch.cat((txt_q, img_q), dim=2), torch.cat((txt_k, img_k), dim=2), torch.cat((txt_v, img_v), dim=2) attn = attention(q, k, v, pe=pe, mask=mask) txt_attn = attn[:,:txt_len] # 1, 768,3072 img_attn = attn[:,txt_len:] img_attn = style_block.img.ATTN(img_attn, "out") txt_attn = style_block.txt.ATTN(txt_attn, "out") img_attn = self.img_attn.proj(img_attn) #to_out txt_attn = self.txt_attn.proj(txt_attn) ### ATTN ### img_attn = style_block.img(img_attn, "attn") txt_attn = style_block.txt(txt_attn, "attn") img_attn *= img_mod1.gate txt_attn *= txt_mod1.gate img_attn = style_block.img(img_attn, "attn_gated") txt_attn = style_block.txt(txt_attn, "attn_gated") img += img_attn txt += txt_attn img = style_block.img(img, "attn_res") txt = style_block.txt(txt, "attn_res") img_norm = self.img_norm2(img) txt_norm = self.txt_norm2(txt) img_norm = style_block.img(img_norm, "ff_norm") txt_norm = style_block.txt(txt_norm, "ff_norm") img_norm = img_norm * (1+img_mod2.scale) + img_mod2.shift txt_norm = txt_norm * (1+txt_mod2.scale) + txt_mod2.shift img_norm = style_block.img(img_norm, "ff_norm_mod") txt_norm = style_block.txt(txt_norm, "ff_norm_mod") img_mlp = self.img_mlp(img_norm) txt_mlp = self.txt_mlp(txt_norm) img_mlp = style_block.img(img_mlp, "ff") txt_mlp = style_block.txt(txt_mlp, "ff") img_mlp *= img_mod2.gate txt_mlp *= txt_mod2.gate img_mlp = style_block.img(img_mlp, "ff_gated") txt_mlp = style_block.txt(txt_mlp, "ff_gated") img += img_mlp txt += txt_mlp img = style_block.img(img, "ff_res") txt = style_block.txt(txt, "ff_res") if update_cross_attn is not None: if not update_cross_attn['skip_cross_attn']: UNCOND = update_cross_attn['UNCOND'] txt_update = self.txt_norm1(txt.cpu()).float() txt_update = (1 + txt_mod1.scale.to(txt_update)) * txt_update + txt_mod1.shift.to(txt_update) if UNCOND: t5_start = update_cross_attn['src_t5_start'] t5_end = update_cross_attn['src_t5_end'] txt_src = txt_update[:,t5_start:t5_end,:].cpu() #.float() self.c_src = txt_src.transpose(-2,-1).squeeze(0) # shape [C,1] else: t5_start = update_cross_attn['tgt_t5_start'] t5_end = update_cross_attn['tgt_t5_end'] lamb = update_cross_attn['lamb'] erase = update_cross_attn['erase'] c_guide = txt_update[:,t5_start:t5_end,:].transpose(-2,-1).squeeze(0) # [C,1] Wv_old = self.txt_attn.qkv.weight.data.to(c_guide) # [C,C] v_star = Wv_old @ c_guide # [C,1] c_src = self.c_src #.cpu() # [C,1] lamb = lamb erase_scale = erase d = c_src.shape[0] C = c_src @ c_src.T # [C,C] I = torch.eye(d, device=C.device, dtype=C.dtype) mat1_v = lamb*Wv_old + erase_scale*(v_star @ c_src.T) # [C,C] mat2_v = lamb*I + erase_scale*(C) # [C,C] I = I.to("cpu") C = C.to("cpu") c_src = c_src.to("cpu") self.c_src = self.c_src.to("cpu") v_star = v_star.to("cpu") Wv_old = Wv_old.to("cpu") c_guide = c_guide.to("cpu") del I, C, c_src, self.c_src, v_star, Wv_old, c_guide #Wv_new = mat1_v @ torch.inverse(mat2_v.float()).to(mat1_v) # [C,C] Wv_new = torch.linalg.solve(mat2_v.T, mat1_v.T).T mat1_v = mat1_v.to("cpu") mat2_v = mat2_v.to("cpu") del mat1_v, mat2_v update_q = update_cross_attn['update_q'] update_k = update_cross_attn['update_k'] update_v = update_cross_attn['update_v'] if not update_q: Wv_new[:3072, :] = self.txt_attn.qkv.weight.data[:3072, :].to(Wv_new) if not update_k: Wv_new[3072:6144,:] = self.txt_attn.qkv.weight.data[3072:6144,:].to(Wv_new) if not update_v: Wv_new[6144: ,:] = self.txt_attn.qkv.weight.data[6144: ,:].to(Wv_new) self.txt_attn.qkv.weight.data.copy_(Wv_new.to(self.txt_attn.qkv.weight.data.dtype)) Wv_new = Wv_new.to("cpu") del Wv_new #torch.cuda.empty_cache() return img, txt class SingleStreamBlock(nn.Module): #attn.shape = 1,4608,3072 mlp.shape = 1,4608,12288 4096*3 = 12288 """ A DiT block with parallel linear layers as described in https://arxiv.org/abs/2302.05442 and adapted modulation interface. """ def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float = 4.0, qk_scale: float = None, dtype=None, device=None, operations=None, idx=-1): super().__init__() self.idx = idx self.hidden_dim = hidden_size #3072 self.num_heads = num_heads #24 head_dim = hidden_size // num_heads self.scale = qk_scale or head_dim**-0.5 #0.08838834764831845 self.mlp_hidden_dim = int(hidden_size * mlp_ratio) #12288== 3072 * 4 # qkv and mlp_in self.linear1 = operations.Linear(hidden_size, 3*hidden_size + self.mlp_hidden_dim, dtype=dtype, device=device) # proj and mlp_out self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, dtype=dtype, device=device) self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations) self.hidden_size = hidden_size #3072 self.pre_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.mlp_act = nn.GELU(approximate="tanh") self.modulation = Modulation(hidden_size, double=False, dtype=dtype, device=device, operations=operations) # vec 1,3072 x 1,9984,3072 def forward(self, img: Tensor, vec: Tensor, pe: Tensor, mask=None, idx=0, style_block=None) -> Tensor: # x 1,9984,3072 if 2 reg embeds, 1,9472,3072 if none # 9216x4096 = 16x1536x1536 mod, _ = self.modulation(vec) img_norm = self.pre_norm(img) img_norm = style_block.img(img_norm, "attn_norm") img_norm = (1 + mod.scale) * img_norm + mod.shift # mod => vec img_norm = style_block.img(img_norm, "attn_norm_mod") ### ATTN ### qkv, mlp = torch.split(self.linear1(img_norm), [3*self.hidden_size, self.mlp_hidden_dim], dim=-1) q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) #q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads) q = style_block.img.ATTN(q, "q_proj") k = style_block.img.ATTN(k, "k_proj") v = style_block.img.ATTN(v, "v_proj") q, k = self.norm(q, k, v) q = style_block.img.ATTN(q, "q_norm") k = style_block.img.ATTN(k, "k_norm") attn = attention(q, k, v, pe=pe, mask=mask) attn = style_block.img.ATTN(attn, "out") ### ATTN ### mlp = style_block.img(mlp, "ff_norm") mlp_act = self.mlp_act(mlp) mlp_act = style_block.img(mlp_act, "ff_norm_mod") img_ff_i = self.linear2(torch.cat((attn, mlp_act), 2)) # effectively FF smooshed into one line img_ff_i = style_block.img(img_ff_i, "ff") img_ff_i *= mod.gate img_ff_i = style_block.img(img_ff_i, "ff_gated") img += img_ff_i img = style_block.img(img, "ff_res") return img class LastLayer(nn.Module): def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None): super().__init__() self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.linear = operations.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device) self.adaLN_modulation = nn.Sequential(nn.SiLU(), operations.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device)) def forward(self, x: Tensor, vec: Tensor) -> Tensor: shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1) x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :] x = self.linear(x) return x def forward_scale_shift(self, x: Tensor, vec: Tensor) -> Tensor: shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1) x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :] return x def forward_linear(self, x: Tensor, vec: Tensor) -> Tensor: x = self.linear(x) return x ================================================ FILE: flux/math.py ================================================ import torch from einops import rearrange from torch import Tensor from comfy.ldm.modules.attention import attention_pytorch import comfy.model_management import math def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor: q, k = apply_rope(q, k, pe) heads = q.shape[1] x = attention_pytorch(q, k, v, heads, skip_reshape=True, mask=mask) return x def rope(pos: Tensor, dim: int, theta: int) -> Tensor: assert dim % 2 == 0 if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled(): device = torch.device("cpu") else: device = pos.device scale = torch.linspace(0, (dim - 2) / dim, steps=dim//2, dtype=torch.float64, device=device) omega = 1.0 / (theta**scale) out = torch.einsum("...n,d->...nd", pos.to(dtype=torch.float32, device=device), omega) out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1) out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2) return out.to(dtype=torch.float32, device=pos.device) def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor): xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2) xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2) xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1] return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) ================================================ FILE: flux/model.py ================================================ # Adapted from: https://github.com/black-forest-labs/flux import torch import torch.nn.functional as F from torch import Tensor, nn from typing import Optional, Callable, Tuple, Dict, List, Any, Union from ..helper import ExtraOptions from dataclasses import dataclass import copy from .layers import ( DoubleStreamBlock, EmbedND, LastLayer, MLPEmbedder, SingleStreamBlock, timestep_embedding, ) from . import layers #from comfy.ldm.flux.layers import timestep_embedding from comfy.ldm.flux.model import Flux as Flux import math import einops from einops import rearrange, repeat import comfy.ldm.common_dit from ..latents import tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d from ..style_transfer import apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch, StyleMMDiT_Model #from ..latents import interpolate_spd @dataclass class FluxParams: in_channels : int out_channels : int vec_in_dim : int context_in_dim : int hidden_size : int mlp_ratio : float num_heads : int depth : int depth_single_blocks: int axes_dim : list theta : int patch_size : int qkv_bias : bool guidance_embed : bool class ReFlux(Flux): def __init__(self, image_model=None, final_layer=True, dtype=None, device=None, operations=None, **kwargs): super().__init__() self.dtype = dtype self.timestep = -1.0 self.threshold_inv = False params = FluxParams(**kwargs) self.params = params #self.params FluxParams(in_channels=16, out_channels=16, vec_in_dim=768, context_in_dim=4096, hidden_size=3072, mlp_ratio=4.0, num_heads=24, depth=19, depth_single_blocks=38, axes_dim=[16, 56, 56], theta=10000, patch_size=2, qkv_bias=True, guidance_embed=False) self.patch_size = params.patch_size self.in_channels = params.in_channels * params.patch_size * params.patch_size # in_channels 64 self.out_channels = params.out_channels * params.patch_size * params.patch_size # out_channels 64 if params.hidden_size % params.num_heads != 0: raise ValueError(f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}") pe_dim = params.hidden_size // params.num_heads if sum(params.axes_dim) != pe_dim: raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}") self.hidden_size = params.hidden_size # 3072 self.num_heads = params.num_heads # 24 self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim) self.img_in = operations.Linear( self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device) # in_features= 64, out_features=3072 self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, dtype=dtype, device=device) # in_features=4096, out_features=3072, bias=True self.time_in = MLPEmbedder( in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations) self.vector_in = MLPEmbedder(params.vec_in_dim, self.hidden_size, dtype=dtype, device=device, operations=operations) # in_features=768, out_features=3072 (first layer) second layer 3072,3072 self.guidance_in =(MLPEmbedder( in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations) if params.guidance_embed else nn.Identity()) self.double_blocks = nn.ModuleList([DoubleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, qkv_bias=params.qkv_bias, dtype=dtype, device=device, operations=operations, idx=_) for _ in range(params.depth)]) self.single_blocks = nn.ModuleList([SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, dtype=dtype, device=device, operations=operations, idx=_) for _ in range(params.depth_single_blocks)]) if final_layer: self.final_layer = layers.LastLayer(self.hidden_size, 1, self.out_channels, dtype=dtype, device=device, operations=operations) def forward_blocks(self, img : Tensor, img_ids : Tensor, txt : Tensor, txt_ids : Tensor, timesteps: Tensor, y : Tensor, guidance : Tensor = None, control = None, update_cross_attn = None, transformer_options = {}, UNCOND : bool = False, SIGMA = None, StyleMMDiT_Model = None, RECON_MODE=False, ) -> Tensor: if img.ndim != 3 or txt.ndim != 3: raise ValueError("Input img and txt tensors must have 3 dimensions.") # running on sequences img img -> 1,4096,3072 img = self.img_in(img) # 1,9216,64 == 768x192 # 1,9216,64 == 1,16,128,256 + 1,16,64,64 # 1,8192,64 with uncond/cond #:,:,64 -> :,:,3072 vec = self.time_in(timestep_embedding(timesteps, 256).to(img.dtype)) # 1 -> 1,3072 if self.params.guidance_embed: if guidance is None: print("Guidance strength is none, not using distilled guidance.") else: vec = vec + self.guidance_in(timestep_embedding(guidance, 256).to(img.dtype)) vec = vec + self.vector_in(y) #y.shape=1,768 y==all 0s txt = self.txt_in(txt) ids = torch.cat((txt_ids, img_ids), dim=1) # img_ids.shape=1,8192,3 txt_ids.shape=1,512,3 #ids.shape=1,8704,3 pe = self.pe_embedder(ids) # pe.shape 1,1,8704,64,2,2 weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0) floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0) mask_zero = None mask = None text_len = txt.shape[1] if not UNCOND and 'AttnMask' in transformer_options: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') if mask_zero is None: mask_zero = torch.ones_like(mask) img_len = transformer_options['AttnMask'].img_len mask_zero[:text_len, :] = mask[:text_len, :] mask_zero[:, :text_len] = mask[:, :text_len] if weight == 0: mask = None if UNCOND and 'AttnMask_neg' in transformer_options: AttnMask = transformer_options['AttnMask_neg'] mask = transformer_options['AttnMask_neg'].attn_mask.mask.to('cuda') if mask_zero is None: mask_zero = torch.ones_like(mask) img_len = transformer_options['AttnMask_neg'].img_len mask_zero[:text_len, :] = mask[:text_len, :] mask_zero[:, :text_len] = mask[:, :text_len] if weight == 0: mask = None elif UNCOND and 'AttnMask' in transformer_options: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') if mask_zero is None: mask_zero = torch.ones_like(mask) img_len = transformer_options['AttnMask'].img_len mask_zero[:text_len, :] = mask[:text_len, :] mask_zero[:, :text_len] = mask[:, :text_len] if weight == 0: mask = None if mask is not None and not type(mask[0][0].item()) == bool: mask = mask.to(img.dtype) if mask_zero is not None and not type(mask_zero[0][0].item()) == bool: mask_zero = mask_zero.to(img.dtype) total_layers = len(self.double_blocks) + len(self.single_blocks) ca_idx = 0 for i, block in enumerate(self.double_blocks): if weight > 0 and mask is not None and weight <= i/total_layers: img, txt = block(img=img, txt=txt, vec=vec, pe=pe, mask=mask_zero, idx=i, update_cross_attn=update_cross_attn) elif (weight < 0 and mask is not None and abs(weight) <= (1 - i/total_layers)): img_tmpZ, txt_tmpZ = img.clone(), txt.clone() img_tmpZ, txt = block(img=img_tmpZ, txt=txt_tmpZ, vec=vec, pe=pe, mask=mask, idx=i, update_cross_attn=update_cross_attn) img, txt_tmpZ = block(img=img , txt=txt , vec=vec, pe=pe, mask=mask_zero, idx=i, update_cross_attn=update_cross_attn) elif floor > 0 and mask is not None and floor >= i/total_layers: mask_tmp = mask.clone() mask_tmp[text_len:, text_len:] = 1.0 img, txt = block(img=img, txt=txt, vec=vec, pe=pe, mask=mask_tmp, idx=i, update_cross_attn=update_cross_attn) elif floor < 0 and mask is not None and abs(floor) >= (1 - i/total_layers): mask_tmp = mask.clone() mask_tmp[text_len:, text_len:] = 1.0 img, txt = block(img=img, txt=txt, vec=vec, pe=pe, mask=mask_tmp, idx=i, update_cross_attn=update_cross_attn) else: img, txt = block(img=img, txt=txt, vec=vec, pe=pe, mask=mask, idx=i, update_cross_attn=update_cross_attn) if control is not None: control_i = control.get("input") if i < len(control_i): add = control_i[i] if add is not None: img[:1] += add if hasattr(self, "pulid_data"): if self.pulid_data: if i % self.pulid_double_interval == 0: for _, node_data in self.pulid_data.items(): if torch.any((node_data['sigma_start'] >= timesteps) & (timesteps >= node_data['sigma_end'])): img = img + node_data['weight'] * self.pulid_ca[ca_idx](node_data['embedding'], img) ca_idx += 1 img = torch.cat((txt, img), 1) #first 256 is txt embed for i, block in enumerate(self.single_blocks): if weight > 0 and mask is not None and weight <= (i+len(self.double_blocks))/total_layers: img = block(img, vec=vec, pe=pe, mask=mask_zero) elif weight < 0 and mask is not None and abs(weight) <= (1 - (i+len(self.double_blocks))/total_layers): img = block(img, vec=vec, pe=pe, mask=mask_zero) elif floor > 0 and mask is not None and floor >= (i+len(self.double_blocks))/total_layers: mask_tmp = mask.clone() mask_tmp[text_len:, text_len:] = 1.0 img = block(img, vec=vec, pe=pe, mask=mask_tmp) elif floor < 0 and mask is not None and abs(floor) >= (1 - (i+len(self.double_blocks))/total_layers): mask_tmp = mask.clone() mask_tmp[text_len:, text_len:] = 1.0 img = block(img, vec=vec, pe=pe, mask=mask_tmp) else: img = block(img, vec=vec, pe=pe, mask=mask) if control is not None: # Controlnet control_o = control.get("output") if i < len(control_o): add = control_o[i] if add is not None: img[:1, txt.shape[1] :, ...] += add if hasattr(self, "pulid_data"): # PuLID attention if self.pulid_data: real_img, txt = img[:, txt.shape[1]:, ...], img[:, :txt.shape[1], ...] if i % self.pulid_single_interval == 0: # Will calculate influence of all nodes at once for _, node_data in self.pulid_data.items(): if torch.any((node_data['sigma_start'] >= timesteps) & (timesteps >= node_data['sigma_end'])): real_img = real_img + node_data['weight'] * self.pulid_ca[ca_idx](node_data['embedding'], real_img) ca_idx += 1 img = torch.cat((txt, real_img), 1) img = img[:, txt.shape[1] :, ...] img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels) 1,8192,3072 -> 1,8192,64 return img def process_img(self, x, index=0, h_offset=0, w_offset=0): bs, c, h, w = x.shape patch_size = self.patch_size x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size)) img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) h_len = ((h + (patch_size // 2)) // patch_size) w_len = ((w + (patch_size // 2)) // patch_size) h_offset = ((h_offset + (patch_size // 2)) // patch_size) w_offset = ((w_offset + (patch_size // 2)) // patch_size) img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype) img_ids[:, :, 0] = img_ids[:, :, 1] + index img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(h_offset, h_len - 1 + h_offset, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1) img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(w_offset, w_len - 1 + w_offset, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0) return img, repeat(img_ids, "h w c -> b (h w) c", b=bs) def _get_img_ids(self, x, bs, h_len, w_len, h_start, h_end, w_start, w_end): img_ids = torch.zeros( (h_len, w_len, 3), device=x.device, dtype=x.dtype) img_ids[..., 1] += torch.linspace(h_start, h_end - 1, steps=h_len, device=x.device, dtype=x.dtype)[:, None] img_ids[..., 2] += torch.linspace(w_start, w_end - 1, steps=w_len, device=x.device, dtype=x.dtype)[None, :] img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) return img_ids def forward(self, x, timestep, context, y, guidance, ref_latents=None, control = None, transformer_options = {}, mask = None, **kwargs ): t = timestep self.max_seq = (128 * 128) // (2 * 2) x_orig = x.clone() b, c, h, w = x.shape h_len = ((h + (self.patch_size // 2)) // self.patch_size) # h_len 96 w_len = ((w + (self.patch_size // 2)) // self.patch_size) # w_len 96 img_len = h_len * w_len img_slice = slice(-img_len, None) #slice(None, img_len) txt_slice = slice(None, -img_len) SIGMA = t[0].clone() #/ 1000 EO = transformer_options.get("ExtraOptions", ExtraOptions("")) if EO is not None: EO.mute = True if EO("zero_heads"): HEADS = 0 else: HEADS = 24 StyleMMDiT = transformer_options.get('StyleMMDiT', StyleMMDiT_Model()) StyleMMDiT.set_len(h_len, w_len, img_slice, txt_slice, HEADS=HEADS) StyleMMDiT.Retrojector = self.Retrojector if hasattr(self, "Retrojector") else None transformer_options['StyleMMDiT'] = None x_tmp = transformer_options.get("x_tmp") if x_tmp is not None: x_tmp = x_tmp.expand(x.shape[0], -1, -1, -1).clone() img = comfy.ldm.common_dit.pad_to_patch_size(x_tmp, (self.patch_size, self.patch_size)) else: img = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size)) y0_style, img_y0_style = None, None img_orig, t_orig, y_orig, context_orig = clone_inputs(img, t, y, context) weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0) floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0) update_cross_attn = transformer_options.get("update_cross_attn") z_ = transformer_options.get("z_") # initial noise and/or image+noise from start of rk_sampler_beta() rk_row = transformer_options.get("row") # for "smart noise" if z_ is not None: x_init = z_[rk_row].to(x) elif 'x_init' in transformer_options: x_init = transformer_options.get('x_init').to(x) # recon loop to extract exact noise pred for scattersort guide assembly RECON_MODE = StyleMMDiT.noise_mode == "recon" recon_iterations = 2 if StyleMMDiT.noise_mode == "recon" else 1 for recon_iter in range(recon_iterations): y0_style = StyleMMDiT.guides y0_style_active = True if type(y0_style) == torch.Tensor else False RECON_MODE = True if StyleMMDiT.noise_mode == "recon" and recon_iter == 0 else False if StyleMMDiT.noise_mode == "recon" and recon_iter == 1: x_recon = x_tmp if x_tmp is not None else x_orig noise_prediction = x_recon + (1-SIGMA.to(x_recon)) * eps.to(x_recon) denoised = x_recon - SIGMA.to(x_recon) * eps.to(x_recon) denoised = StyleMMDiT.apply_recon_lure(denoised, y0_style) new_x = (1-SIGMA.to(denoised)) * denoised + SIGMA.to(denoised) * noise_prediction img_orig = img = comfy.ldm.common_dit.pad_to_patch_size(new_x, (self.patch_size, self.patch_size)) x_init = noise_prediction elif StyleMMDiT.noise_mode == "bonanza": x_init = torch.randn_like(x_init) if y0_style_active: if y0_style.sum() == 0.0 and y0_style.std() == 0.0: y0_style = img_orig.clone() else: SIGMA_ADAIN = (SIGMA * EO("eps_adain_sigma_factor", 1.0)).to(y0_style) y0_style_noised = (1-SIGMA_ADAIN) * y0_style + SIGMA_ADAIN * x_init[0:1].to(y0_style) #always only use first batch of noise to avoid broadcasting img_y0_style_orig = comfy.ldm.common_dit.pad_to_patch_size(y0_style_noised, (self.patch_size, self.patch_size)) mask_zero = None out_list = [] for cond_iter in range(len(transformer_options['cond_or_uncond'])): UNCOND = transformer_options['cond_or_uncond'][cond_iter] == 1 if update_cross_attn is not None: update_cross_attn['UNCOND'] = UNCOND bsz_style = y0_style.shape[0] if y0_style_active else 0 bsz = 1 if RECON_MODE else bsz_style + 1 img, t, y, context = clone_inputs(img_orig, t_orig, y_orig, context_orig, index=cond_iter) mask = None if not UNCOND and 'AttnMask' in transformer_options: # and weight != 0: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') if mask_zero is None: mask_zero = torch.ones_like(mask) mask_zero[txt_slice, txt_slice] = mask[txt_slice, txt_slice] if weight == 0: context = transformer_options['RegContext'].context.to(context.dtype).to(context.device) mask = None else: context = transformer_options['RegContext'].context.to(context.dtype).to(context.device) if UNCOND and 'AttnMask_neg' in transformer_options: # and weight != 0: AttnMask = transformer_options['AttnMask_neg'] mask = transformer_options['AttnMask_neg'].attn_mask.mask.to('cuda') if mask_zero is None: mask_zero = torch.ones_like(mask) mask_zero[txt_slice, txt_slice] = mask[txt_slice, txt_slice] if weight == 0: context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device) mask = None else: context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device) elif UNCOND and 'AttnMask' in transformer_options: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') if mask_zero is None: mask_zero = torch.ones_like(mask) mask_zero[txt_slice, txt_slice] = mask[txt_slice, txt_slice] if weight == 0: # ADDED 5/23/2025 context = transformer_options['RegContext'].context.to(context.dtype).to(context.device) # ADDED 5/26/2025 14:53 mask = None else: A = context B = transformer_options['RegContext'].context context = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :] if y0_style_active and not RECON_MODE: if mask is None: context, y, _ = StyleMMDiT.apply_style_conditioning( UNCOND = UNCOND, base_context = context, base_y = y, base_llama3 = None, ) else: context = context.repeat(bsz_style + 1, 1, 1) y = y.repeat(bsz_style + 1, 1) if y is not None else None img_y0_style = img_y0_style_orig.clone() if mask is not None and not type(mask[0][0].item()) == bool: mask = mask.to(x.dtype) if mask_zero is not None and not type(mask_zero[0][0].item()) == bool: mask_zero = mask_zero.to(x.dtype) clip = self.time_in(timestep_embedding(t, 256).to(x.dtype)) # 1 -> 1,3072 if self.params.guidance_embed: if guidance is None: print("Guidance strength is none, not using distilled guidance.") else: clip = clip + self.guidance_in(timestep_embedding(guidance, 256).to(x.dtype)) clip = clip + self.vector_in(y[:,:self.params.vec_in_dim]) #y.shape=1,768 y==all 0s clip = clip.to(x) img_in_dtype = self.img_in.weight.data.dtype if img_in_dtype not in {torch.bfloat16, torch.float16, torch.float32, torch.float64}: img_in_dtype = x.dtype if ref_latents is not None: h, w = 0, 0 for ref in ref_latents: h_offset = 0 w_offset = 0 if ref.shape[-2] + h > ref.shape[-1] + w: w_offset = w else: h_offset = h kontext, kontext_ids = self.process_img(ref, index=1, h_offset=h_offset, w_offset=w_offset) #kontext = self.img_in(kontext.to(img_in_dtype)) img, img_ids = self.process_img(x) img = torch.cat([img, kontext], dim=1) img_ids = torch.cat([img_ids, kontext_ids], dim=1) h = max(h, ref.shape[-2] + h_offset) w = max(w, ref.shape[-1] + w_offset) img = self.img_in(img.to(img_in_dtype)) img_slice = slice(-2*img_len, None) StyleMMDiT.KONTEXT = 1 for style_block in StyleMMDiT.double_blocks + StyleMMDiT.single_blocks: style_block.KONTEXT = 1 for style_block_imgtxt in [style_block.img, getattr(style_block, "txt")]: style_block_imgtxt.KONTEXT = 1 style_block_imgtxt.ATTN.KONTEXT = 1 StyleMMDiT.datashock_ref = ref_latents[0] else: img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=self.patch_size, pw=self.patch_size) img = self.img_in(img.to(img_in_dtype)) img_ids = self._get_img_ids(img, bsz, h_len, w_len, 0, h_len, 0, w_len) if y0_style_active and not RECON_MODE: img_y0_style = rearrange(img_y0_style_orig, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=self.patch_size, pw=self.patch_size) img_y0_style = self.img_in(img_y0_style.to(img_in_dtype)) # hidden_states 1,4032,2560 for 1024x1024: -> 1,4096,2560 ,64 -> ,2560 (x40) if ref_latents is not None: img_kontext = self.img_in(kontext.to(img_in_dtype)) #img_base = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=self.patch_size, pw=self.patch_size) #img_base = self.img_in(img_base.to(img_in_dtype)) #img_ids = self._get_img_ids(img, bsz, h_len, w_len, 0, h_len, 0, w_len) img_ids = img_ids .repeat(bsz,1,1) #img_y0_style = img_y0_style.repeat(1,bsz,1) # torch.cat([img, img_y0_style], dim=0) img_y0_style = torch.cat([img_y0_style, img_kontext.repeat(bsz-1,1,1)], dim=1) StyleMMDiT.KONTEXT = 2 for style_block in StyleMMDiT.double_blocks + StyleMMDiT.single_blocks: style_block.KONTEXT = 2 for style_block_imgtxt in [style_block.img, getattr(style_block, "txt")]: style_block_imgtxt.KONTEXT = 2 style_block_imgtxt.ATTN.KONTEXT = 2 StyleMMDiT.datashock_ref = None img = torch.cat([img, img_y0_style], dim=0) # txt_ids -> 1,414,3 txt_ids = torch.zeros((bsz, context.shape[-2], 3), device=img.device, dtype=x.dtype) ids = torch.cat((txt_ids, img_ids), dim=-2) # ids -> 1,4446,3 # flipped from hidream rope = self.pe_embedder(ids) # rope -> 1, 4446, 1, 64, 2, 2 txt_init = self.txt_in(context) txt_init_len = txt_init.shape[-2] # 271 img = StyleMMDiT(img, "proj_in") img = img.to(x) if img is not None else None total_layers = len(self.double_blocks) + len(self.single_blocks) # DOUBLE STREAM ca_idx = 0 for bid, (block, style_block) in enumerate(zip(self.double_blocks, StyleMMDiT.double_blocks)): txt = txt_init if weight > 0 and mask is not None and weight < bid/total_layers: img, txt_init = block(img, txt, clip, rope, mask_zero, style_block=style_block) elif (weight < 0 and mask is not None and abs(weight) < (1 - bid/total_layers)): img_tmpZ, txt_tmpZ = img.clone(), txt.clone() # more efficient than the commented lines below being used instead in the loop? img_tmpZ, txt_init = block(img_tmpZ, txt_tmpZ, clip, rope, mask, style_block=style_block) img , txt_tmpZ = block(img , txt , clip, rope, mask_zero, style_block=style_block) elif floor > 0 and mask is not None and floor > bid/total_layers: mask_tmp = mask.clone() mask_tmp[img_slice,img_slice] = 1.0 img, txt_init = block(img, txt, clip, rope, mask_tmp, style_block=style_block) elif floor < 0 and mask is not None and abs(floor) > (1 - bid/total_layers): mask_tmp = mask.clone() mask_tmp[img_slice,img_slice] = 1.0 img, txt_init = block(img, txt, clip, rope, mask_tmp, style_block=style_block) elif update_cross_attn is not None and update_cross_attn['skip_cross_attn']: img, txt_init = block(img, txt, clip, rope, mask, update_cross_attn=update_cross_attn) else: img, txt_init = block(img, txt, clip, rope, mask, update_cross_attn=update_cross_attn, style_block=style_block) if control is not None: control_i = control.get("input") if bid < len(control_i): add = control_i[bid] if add is not None: img[:1] += add if hasattr(self, "pulid_data"): if self.pulid_data: if bid % self.pulid_double_interval == 0: for _, node_data in self.pulid_data.items(): if torch.any((node_data['sigma_start'] >= timestep) & (timestep >= node_data['sigma_end'])): img = img + node_data['weight'] * self.pulid_ca[ca_idx](node_data['embedding'], img) ca_idx += 1 # END DOUBLE STREAM #img = img[0:1] #txt_init = txt_init[0:1] img = torch.cat([txt_init, img], dim=-2) # 4032 + 271 -> 4303 # txt embed from double stream block # flipped from hidream double_layers = len(self.double_blocks) # SINGLE STREAM for bid, (block, style_block) in enumerate(zip(self.single_blocks, StyleMMDiT.single_blocks)): if weight > 0 and mask is not None and weight < (bid+double_layers)/total_layers: img = block(img, clip, rope, mask_zero, style_block=style_block) elif weight < 0 and mask is not None and abs(weight) < (1 - (bid+double_layers)/total_layers): img = block(img, clip, rope, mask_zero, style_block=style_block) elif floor > 0 and mask is not None and floor > (bid+double_layers)/total_layers: mask_tmp = mask.clone() mask_tmp[img_slice,img_slice] = 1.0 img = block(img, clip, rope, mask_tmp, style_block=style_block) elif floor < 0 and mask is not None and abs(floor) > (1 - (bid+double_layers)/total_layers): mask_tmp = mask.clone() mask_tmp[img_slice,img_slice] = 1.0 img = block(img, clip, rope, mask_tmp, style_block=style_block) else: img = block(img, clip, rope, mask, style_block=style_block) if control is not None: # Controlnet control_o = control.get("output") if bid < len(control_o): add = control_o[bid] if add is not None: img[:1, txt_slice, ...] += add if hasattr(self, "pulid_data"): # PuLID attention if self.pulid_data: real_img, txt = img[:, img_slice, ...], img[:, txt_slice, ...] if bid % self.pulid_single_interval == 0: # Will calculate influence of all nodes at once for _, node_data in self.pulid_data.items(): if torch.any((node_data['sigma_start'] >= timestep) & (timestep >= node_data['sigma_end'])): real_img = real_img + node_data['weight'] * self.pulid_ca[ca_idx](node_data['embedding'], real_img) ca_idx += 1 img = torch.cat((txt, real_img), 1) # END SINGLE STREAM img = img[..., img_slice, :] #img = self.final_layer(img, clip) # 4096,2560 -> 4096,64 shift, scale = self.final_layer.adaLN_modulation(clip).chunk(2,dim=1) img = (1 + scale[:, None, :]) * self.final_layer.norm_final(img) + shift[:, None, :] img = StyleMMDiT(img, "proj_out") if y0_style_active and not RECON_MODE: img = img[0:1] #img = img[1:2] #img = self.final_layer.linear(img.to(self.final_layer.linear.weight.data)) img = self.final_layer.linear(img) #img = self.unpatchify(img, img_sizes) img = img[:,:img_len] # accomodate kontext img = rearrange(img, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=self.patch_size, pw=self.patch_size) out_list.append(img) output = torch.cat(out_list, dim=0) eps = output[:, :, :h, :w] if recon_iter == 1: denoised = new_x - SIGMA.to(new_x) * eps.to(new_x) if x_tmp is not None: eps = (x_tmp - denoised.to(x_tmp)) / SIGMA.to(x_tmp) else: eps = (x_orig - denoised.to(x_orig)) / SIGMA.to(x_orig) freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method") freqsep_sigma = transformer_options.get("freqsep_sigma") freqsep_kernel_size = transformer_options.get("freqsep_kernel_size") freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size") freqsep_stride = transformer_options.get("freqsep_stride") freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight") freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight") freqsep_mask = transformer_options.get("freqsep_mask") y0_style_pos = transformer_options.get("y0_style_pos") y0_style_neg = transformer_options.get("y0_style_neg") # end recon loop self.style_dtype = torch.float32 if self.style_dtype is None else self.style_dtype dtype = eps.dtype if self.style_dtype is None else self.style_dtype if y0_style_pos is not None: y0_style_pos_weight = transformer_options.get("y0_style_pos_weight") y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight") y0_style_pos_synweight *= y0_style_pos_weight y0_style_pos_mask = transformer_options.get("y0_style_pos_mask") y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge") y0_style_pos = y0_style_pos.to(dtype) x = x_orig.to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) y0_adain_embed = self.Retrojector.embed(y0_style_pos) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if EO("scattersort_median_LP"): denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7)) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7)) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad) denoised_spatial = denoised_spatial_LP + denoised_spatial_HP denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": if freqsep_mask is not None: freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float() freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact') if hasattr(self, "adain_tile"): tile_h, tile_w = self.adain_tile denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if self.adain_flag: h_off = tile_h // 2 w_off = tile_w // 2 denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] self.adain_flag = False else: h_off = 0 w_off = 0 self.adain_flag = True tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w)) y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w)) tiles_out = [] for i in range(tiles.shape[0]): tile = tiles[i].unsqueeze(0) y0_tile = y0_tiles[i].unsqueeze(0) tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) tile = adain_seq_inplace(tile, y0_tile) tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w)) tiles_out_tensor = torch.cat(tiles_out, dim=0) tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides) if h_off == 0: denoised_pretile = tiles_out_tensor else: denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"): denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median_pw": denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight) elif freqsep_lowpass_method == "gaussian_pw": denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median": denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size) elif freqsep_lowpass_method == "gaussian": denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP if EO("adain_fs_uhp"): y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP #denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) else: denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) if transformer_options.get('y0_standard_guide') is not None: y0_standard_guide = transformer_options.get('y0_standard_guide') y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide) f_cs = self.StyleWCT.get(y0_standard_guide_embed) self.y0_standard_guide = self.Retrojector.unembed(f_cs) if transformer_options.get('y0_inv_standard_guide') is not None: y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide') y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide) f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed) self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs) elif transformer_options['y0_style_method'] == "WCT2": self.WaveletStyleWCT.set(y0_adain_embed, h_len, w_len) denoised_embed = self.WaveletStyleWCT.get(denoised_embed, h_len, w_len) if transformer_options.get('y0_standard_guide') is not None: y0_standard_guide = transformer_options.get('y0_standard_guide') y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide) f_cs = self.WaveletStyleWCT.get(y0_standard_guide_embed, h_len, w_len) self.y0_standard_guide = self.Retrojector.unembed(f_cs) if transformer_options.get('y0_inv_standard_guide') is not None: y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide') y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide) f_cs = self.WaveletStyleWCT.get(y0_inv_standard_guide_embed, h_len, w_len) self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs) denoised_approx = self.Retrojector.unembed(denoised_embed) eps = (x - denoised_approx) / sigma if not UNCOND: if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) else: eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) elif eps.shape[0] == 1 and UNCOND: eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) #eps = eps.float() if y0_style_neg is not None: y0_style_neg_weight = transformer_options.get("y0_style_neg_weight") y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight") y0_style_neg_synweight *= y0_style_neg_weight y0_style_neg_mask = transformer_options.get("y0_style_neg_mask") y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge") y0_style_neg = y0_style_neg.to(dtype) x = x_orig.to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) y0_adain_embed = self.Retrojector.embed(y0_style_neg) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) elif transformer_options['y0_style_method'] == "WCT2": self.WaveletStyleWCT.set(y0_adain_embed, h_len, w_len) denoised_embed = self.WaveletStyleWCT.get(denoised_embed, h_len, w_len) denoised_approx = self.Retrojector.unembed(denoised_embed) if UNCOND: eps = (x - denoised_approx) / sigma eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) elif eps.shape[0] == 1 and not UNCOND: eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0]) #eps = eps.float() if EO("model_eps_out"): self.eps_out = eps.clone() return eps def expand_timesteps(self, t, batch_size, device): if not torch.is_tensor(t): is_mps = device.type == "mps" if isinstance(t, float): dtype = torch.float32 if is_mps else torch.float64 else: dtype = torch.int32 if is_mps else torch.int64 t = Tensor([t], dtype=dtype, device=device) elif len(t.shape) == 0: t = t[None].to(device) # broadcast to batch dimension in a way that's compatible with ONNX/Core ML t = t.expand(batch_size) return t def clone_inputs(*args, index: int=None): if index is None: return tuple(x.clone() for x in args) else: return tuple(x[index].unsqueeze(0).clone() for x in args) ================================================ FILE: flux/redux.py ================================================ import torch import comfy.ops import torch.nn import torch.nn.functional as F ops = comfy.ops.manual_cast class ReReduxImageEncoder(torch.nn.Module): def __init__( self, redux_dim: int = 1152, txt_in_features: int = 4096, device=None, dtype=None, ) -> None: super().__init__() self.redux_dim = redux_dim self.device = device self.dtype = dtype self.style_dtype = None self.redux_up = ops.Linear(redux_dim, txt_in_features * 3, dtype=dtype) self.redux_down = ops.Linear(txt_in_features * 3, txt_in_features, dtype=dtype) def forward(self, sigclip_embeds) -> torch.Tensor: projected_x = self.redux_down(torch.nn.functional.silu(self.redux_up(sigclip_embeds))) return projected_x def feature_match(self, cond, clip_vision_output, mode="WCT"): sigclip_embeds = clip_vision_output.last_hidden_state dense_embed = torch.nn.functional.silu(self.redux_up(sigclip_embeds)) t_sqrt = int(dense_embed.shape[-2] ** 0.5) dense_embed_sq = dense_embed.view(dense_embed.shape[-3], t_sqrt, t_sqrt, dense_embed.shape[-1]) t_cond_sqrt = int(cond[0][0].shape[-2] ** 0.5) dense_embed256 = F.interpolate(dense_embed_sq.transpose(-3,-1), size=(t_cond_sqrt,t_cond_sqrt), mode="bicubic") dense_embed256 = dense_embed256.flatten(-2,-1).transpose(-2,-1) dtype = self.style_dtype if hasattr(self, "style_dtype") and self.style_dtype is not None else dense_embed.dtype pinv_dtype = torch.float32 if dtype != torch.float64 else dtype W = self.redux_down.weight.data.to(dtype) # shape [2560, 64] b = self.redux_down.bias.data.to(dtype) # shape [2560] cond_256 = cond[0][0].clone() if not hasattr(self, "W_pinv"): self.W_pinv = torch.linalg.pinv(W.to(pinv_dtype).cuda()).to(W) #cond_256_embed = (cond_256 - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype) cond_embed256 = (cond_256 - b.to(cond_256)) @ self.W_pinv.T.to(cond_256) if mode == "AdaIN": cond_embed256 = adain_seq_inplace(cond_embed256, dense_embed256) #for adain_iter in range(EO("style_iter", 0)): # cond_embed256 = adain_seq_inplace(cond_embed256, dense_embed256) # cond_embed256 = (cond_embed256 - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype) # cond_embed256 = F.linear(cond_embed256 .to(W), W, b).to(img) # cond_embed256 = adain_seq_inplace(cond_embed256, dense_embed256) elif mode == "WCT": if not hasattr(self, "dense_embed256") or self.dense_embed256 is None or self.dense_embed256.shape != dense_embed256.shape or torch.norm(self.dense_embed256 - dense_embed256) > 0: self.dense_embed256 = dense_embed256 f_s = dense_embed256[0].clone() self.mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - self.mu_s cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh((cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)).cuda()) S_eig = S_eig.to(cov) U_eig = U_eig.to(cov) S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T self.y0_color = whiten.to(f_s_centered) for wct_i in range(cond_embed256.shape[-3]): f_c = cond_embed256[wct_i].clone() mu_c = f_c.mean(dim=0, keepdim=True) f_c_centered = f_c - mu_c cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh((cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)).cuda()) S_eig = S_eig.to(cov) U_eig = U_eig.to(cov) inv_sqrt_eig = S_eig.clamp(min=0).rsqrt() whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T whiten = whiten.to(f_c_centered) f_c_whitened = f_c_centered @ whiten.T f_cs = f_c_whitened @ self.y0_color.T + self.mu_s cond_embed256[wct_i] = f_cs cond[0][0] = self.redux_down(cond_embed256) return (cond,) def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: mean_c = content.mean(1, keepdim=True) std_c = content.std (1, keepdim=True).add_(eps) mean_s = style.mean (1, keepdim=True) std_s = style.std (1, keepdim=True).add_(eps) content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) return content def adain_seq(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: return ((content - content.mean(1, keepdim=True)) / (content.std(1, keepdim=True) + eps)) * (style.std(1, keepdim=True) + eps) + style.mean(1, keepdim=True) ================================================ FILE: helper.py ================================================ import torch import torch.nn.functional as F from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar, List import re import functools import copy from comfy.samplers import SCHEDULER_NAMES from .res4lyf import RESplain # EXTRA_OPTIONS OPS class ExtraOptions(): def __init__(self, extra_options): self.extra_options = extra_options self.mute = False # debugMode 0: Follow self.mute only # debugMode 1: Print with debug flag if not muted # debugMode 2: Never print def __call__(self, option, default=None, ret_type=None, match_all_flags=False, debugMode=0): if isinstance(option, (tuple, list)): if match_all_flags: return all(self(single_option, default, ret_type) for single_option in option) else: return any(self(single_option, default, ret_type) for single_option in option) if default is None: # get flag pattern = rf"^(?:{re.escape(option)}\s*$|{re.escape(option)}=)" return bool(re.search(pattern, self.extra_options, flags=re.MULTILINE)) elif ret_type is None: ret_type = type(default) if ret_type.__module__ != "builtins": mod = __import__(default.__module__) ret_type = lambda v: getattr(mod, v, None) if ret_type == list: pattern = rf"^{re.escape(option)}\s*=\s*([a-zA-Z0-9_.,+-]+)\s*$" match = re.search(pattern, self.extra_options, flags=re.MULTILINE) if match: value = match.group(1) if not self.mute and debugMode != 2: if debugMode == 1: RESplain("Set extra_option: ", option, "=", value, debug=True) else: RESplain("Set extra_option: ", option, "=", value) else: value = default if type(value) == str: value = value.split(',') if type(default[0]) == type: ret_type = default[0] else: ret_type = type(default[0]) value = [ret_type(value[_]) for _ in range(len(value))] else: pattern = rf"^{re.escape(option)}\s*=\s*([a-zA-Z0-9_.+-]+)\s*$" match = re.search(pattern, self.extra_options, flags=re.MULTILINE) if match: if ret_type == bool: value_str = match.group(1).lower() value = value_str in ("true", "1", "yes", "on") else: value = ret_type(match.group(1)) if not self.mute and debugMode != 2: if debugMode == 1: RESplain("Set extra_option: ", option, "=", value, debug=True) else: RESplain("Set extra_option: ", option, "=", value) else: value = default return value def extra_options_flag(flag, extra_options): pattern = rf"^(?:{re.escape(flag)}\s*$|{re.escape(flag)}=)" return bool(re.search(pattern, extra_options, flags=re.MULTILINE)) def get_extra_options_kv(key, default, extra_options, ret_type=None): ret_type = type(default) if ret_type is None else ret_type pattern = rf"^{re.escape(key)}\s*=\s*([a-zA-Z0-9_.+-]+)\s*$" match = re.search(pattern, extra_options, flags=re.MULTILINE) if match: value = match.group(1) else: value = default return ret_type(value) def get_extra_options_list(key, default, extra_options, ret_type=None): default = [default] if type(default) != list else default #ret_type = type(default) if ret_type is None else ret_type ret_type = type(default[0]) if ret_type is None else ret_type pattern = rf"^{re.escape(key)}\s*=\s*([a-zA-Z0-9_.,+-]+)\s*$" match = re.search(pattern, extra_options, flags=re.MULTILINE) if match: value = match.group(1) else: value = default if type(value) == str: value = value.split(',') value = [ret_type(value[_]) for _ in range(len(value))] return value class OptionsManager: APPEND_OPTIONS = {"extra_options"} def __init__(self, options, **kwargs): self.options_list = [] if options is not None: self.options_list.append(options) for key, value in kwargs.items(): if key.startswith('options') and value is not None: self.options_list.append(value) self._merged_dict = None def add_option(self, option): """Add a single options dictionary""" if option is not None: self.options_list.append(option) self._merged_dict = None # invalidate cached merged options @property def merged(self): """Get merged options with proper priority handling""" if self._merged_dict is None: self._merged_dict = {} special_string_options = { key: [] for key in self.APPEND_OPTIONS } for options_dict in self.options_list: if options_dict is not None: for key, value in options_dict.items(): if key in self.APPEND_OPTIONS and value: special_string_options[key].append(value) elif isinstance(value, dict): # Deep merge dictionaries if key not in self._merged_dict: self._merged_dict[key] = {} if isinstance(self._merged_dict[key], dict): self._deep_update(self._merged_dict[key], value) else: self._merged_dict[key] = value.copy() # Special case for FrameWeightsManager elif key == "frame_weights_mgr" and hasattr(value, "_weight_configs"): if key not in self._merged_dict: self._merged_dict[key] = copy.deepcopy(value) else: existing_mgr = self._merged_dict[key] if hasattr(value, "device") and value.device != torch.device('cpu'): existing_mgr.device = value.device if hasattr(value, "dtype") and value.dtype != torch.float64: existing_mgr.dtype = value.dtype # Merge all weight_configs if hasattr(value, "_weight_configs"): for name, config in value._weight_configs.items(): config_kwargs = config.copy() existing_mgr.add_weight_config(name, **config_kwargs) else: self._merged_dict[key] = value # append special case string options (e.g. extra_options) for key, value in special_string_options.items(): if value: self._merged_dict[key] = "\n".join(value) return self._merged_dict def update(self, key_or_dict, value=None, append=False): """Update options with a single key-value pair or a dictionary""" if value is not None or isinstance(key_or_dict, (str, list)): # single key-value update key_path = key_or_dict if isinstance(key_path, str): key_path = key_path.split('.') update_dict = {} current = update_dict for i, key in enumerate(key_path[:-1]): current[key] = {} current = current[key] current[key_path[-1]] = value self.add_option(update_dict) else: # dictionary update flat_updates = {} def _flatten_dict(d, prefix=""): for key, value in d.items(): full_key = f"{prefix}.{key}" if prefix else key if isinstance(value, dict): _flatten_dict(value, full_key) else: flat_updates[full_key] = value _flatten_dict(key_or_dict) for key_path, value in flat_updates.items(): self.update(key_path, value) # Recursive call return self def get(self, key, default=None): return self.merged.get(key, default) def _deep_update(self, target_dict, source_dict): for key, value in source_dict.items(): if isinstance(value, dict) and key in target_dict and isinstance(target_dict[key], dict): # recursive dict update self._deep_update(target_dict[key], value) else: target_dict[key] = value def __getitem__(self, key): """Allow dictionary-like access to options""" return self.merged[key] def __contains__(self, key): """Allow 'in' operator for options""" return key in self.merged def as_dict(self): """Return the merged options as a dictionary""" return self.merged.copy() def __bool__(self): """Return True if there are any options""" return len(self.options_list) > 0 and any(opt is not None for opt in self.options_list) def debug_print_options(self): for i, options_dict in enumerate(self.options_list): RESplain(f"Options {i}:", debug=True) if options_dict is not None: for key, value in options_dict.items(): RESplain(f" {key}: {value}", debug=True) else: RESplain(" None", "\n", debug=True) # MISCELLANEOUS OPS def has_nested_attr(obj, attr_path): attrs = attr_path.split('.') for attr in attrs: if not hasattr(obj, attr): return False obj = getattr(obj, attr) return True def safe_get_nested(d, keys, default=None): for key in keys: if isinstance(d, dict): d = d.get(key, default) else: return default return d class AlwaysTrueList: def __contains__(self, item): return True def __iter__(self): while True: yield True # kapow def parse_range_string(s): if "all" in s: return AlwaysTrueList() result = [] for part in s.split(','): part = part.strip() if not part: continue val = float(part) if '.' in part else int(part) result.append(val) return result def parse_range_string_int(s): if "all" in s: return AlwaysTrueList() result = [] for part in s.split(','): if '-' in part: start, end = part.split('-') result.extend(range(int(start), int(end) + 1)) elif part.strip() != '': result.append(int(part)) return result def parse_tile_sizes(tile_sizes: str): """ Converts multiline string like: "1024,1024\n768,1344\n1344,768" into: [(1024, 1024), (768, 1344), (1344, 768)] """ return [tuple(map(int, line.strip().split(','))) for line in tile_sizes.strip().splitlines() if line.strip()] # COMFY OPS def is_video_model(model): is_video_model = False try : is_video_model = 'video' in model.inner_model.inner_model.model_config.unet_config['image_model'] or \ 'cosmos' in model.inner_model.inner_model.model_config.unet_config['image_model'] or \ 'wan2' in model.inner_model.inner_model.model_config.unet_config['image_model'] or \ 'ltxv' in model.inner_model.inner_model.model_config.unet_config['image_model'] except: pass return is_video_model def is_RF_model(model): from comfy import model_sampling modelsampling = model.inner_model.inner_model.model_sampling return isinstance(modelsampling, model_sampling.CONST) def get_res4lyf_scheduler_list(): scheduler_names = SCHEDULER_NAMES.copy() if "beta57" not in scheduler_names: scheduler_names.append("beta57") return scheduler_names def move_to_same_device(*tensors): if not tensors: return tensors device = tensors[0].device return tuple(tensor.to(device) for tensor in tensors) def conditioning_set_values(conditioning, values={}): c = [] for t in conditioning: n = [t[0], t[1].copy()] for k in values: n[1][k] = values[k] c.append(n) return c # MISC OPS def initialize_or_scale(tensor, value, steps): if tensor is None: return torch.full((steps,), value) else: return value * tensor def pad_tensor_list_to_max_len(tensors: List[torch.Tensor], dim: int = -2) -> List[torch.Tensor]: """Zero-pad each tensor in `tensors` along `dim` up to their common maximum length.""" max_len = max(t.shape[dim] for t in tensors) padded = [] for t in tensors: cur = t.shape[dim] if cur < max_len: pad_shape = list(t.shape) pad_shape[dim] = max_len - cur zeros = torch.zeros(*pad_shape, dtype=t.dtype, device=t.device) t = torch.cat((t, zeros), dim=dim) padded.append(t) return padded class PrecisionTool: def __init__(self, cast_type='fp64'): self.cast_type = cast_type def cast_tensor(self, func): @functools.wraps(func) def wrapper(*args, **kwargs): if self.cast_type not in ['fp64', 'fp32', 'fp16']: return func(*args, **kwargs) target_device = None for arg in args: if torch.is_tensor(arg): target_device = arg.device break if target_device is None: for v in kwargs.values(): if torch.is_tensor(v): target_device = v.device break # recursively zs_recast tensors in nested dictionaries def cast_and_move_to_device(data): if torch.is_tensor(data): if self.cast_type == 'fp64': return data.to(torch.float64).to(target_device) elif self.cast_type == 'fp32': return data.to(torch.float32).to(target_device) elif self.cast_type == 'fp16': return data.to(torch.float16).to(target_device) elif isinstance(data, dict): return {k: cast_and_move_to_device(v) for k, v in data.items()} return data new_args = [cast_and_move_to_device(arg) for arg in args] new_kwargs = {k: cast_and_move_to_device(v) for k, v in kwargs.items()} return func(*new_args, **new_kwargs) return wrapper def set_cast_type(self, new_value): if new_value in ['fp64', 'fp32', 'fp16']: self.cast_type = new_value else: self.cast_type = 'fp64' precision_tool = PrecisionTool(cast_type='fp64') class FrameWeightsManager: def __init__(self): self._weight_configs = {} self._default_config = { "frame_weights": None, # Tensor of weights if directly specified "dynamics": "linear", # Function type for dynamic period "schedule": "moderate_early", # Schedule type "scale": 0.5, # Amount of change "is_reversed": False, # Whether to reverse weights "custom_string": None, # Per-configuration custom string } self.dtype = torch.float64 self.device = torch.device('cpu') def set_device_and_dtype(self, device=None, dtype=None): """Set the device and dtype for generated weights""" if device is not None: self.device = device if dtype is not None: self.dtype = dtype return self def set_custom_weights(self, config_name, weights): """Set custom weights for a specific configuration""" if config_name not in self._weight_configs: self._weight_configs[config_name] = self._default_config.copy() self._weight_configs[config_name]["frame_weights"] = weights return self def add_weight_config(self, name, **kwargs): if name not in self._weight_configs: self._weight_configs[name] = self._default_config.copy() for key, value in kwargs.items(): if key in self._default_config: self._weight_configs[name][key] = value # ignore unknown parameters return self def get_weight_config(self, name): if name not in self._weight_configs: return None return self._weight_configs[name].copy() def get_frame_weights_by_name(self, name, num_frames, step=None): config = self.get_weight_config(name) if config is None: return None weights_tensor = self._generate_frame_weights( num_frames, config["dynamics"], config["schedule"], config["scale"], config["is_reversed"], config["frame_weights"], step=step, custom_string=config["custom_string"] ) if config["custom_string"] is not None and config["custom_string"].strip() != "" and weights_tensor is not None: # ensure that the custom_string has more than just lines that begin with non-numeric characters custom_string = config["custom_string"].strip() custom_string = re.sub(r"^[^0-9].*", "", custom_string, flags=re.MULTILINE) custom_string = re.sub(r"^\s*$", "", custom_string, flags=re.MULTILINE) if custom_string.strip() != "": # If the custom_string is not empty, show the custom weights formatted_weights = [f"{w:.2f}" for w in weights_tensor.tolist()] RESplain(f"Custom '{name}' for step {step}: {formatted_weights}", debug=True) elif weights_tensor is None: weights_tensor = torch.ones(num_frames, dtype=self.dtype, device=self.device) return weights_tensor def _generate_custom_weights(self, num_frames, custom_string, step=None): """ Generate custom weights based on the provided frame weights from a string with one line per step. Args: num_frames: Number of frames to generate weights for custom_string: The custom weights string to parse step: Specific step to use (0-indexed). If None, uses the last line. Features: - Each line represents weights for one step - Add *[multiplier] at the end of a line to scale those weights (e.g., "1.0, 0.8, 0.6*1.5") - Include "interpolate" on its own line to interpolate each line to match num_frames - Prefix line with the steps to apply it to (e.g. "0-5: 1.0, 0.8, 0.6") Example: 0-5:1.0, 0.8, 0.6, 0.4, 0.2, 0.0 6-10:0.0, 0.2, 0.4, 0.6, 0.8, 1.0*1.5 11-30:0.0, 0.5, 1.0, 0.5, 0.0, 0.0*0.8 interpolate """ if custom_string is not None: interpolate_frames = "interpolate" in custom_string lines = custom_string.strip().split('\n') lines = [line for line in lines if line.strip() and not line.strip().startswith("interp")] if not lines: return None if step is not None: matching_line = None for line in lines: # Check if line has a step range prefix step_range_match = re.match(r'^(\d+)-(\d+):(.*)', line.strip()) if step_range_match: start_step = int(step_range_match.group(1)) end_step = int(step_range_match.group(2)) if start_step <= step <= end_step: matching_line = step_range_match.group(3).strip() if matching_line is not None: weights_str = matching_line else: # if no matching line, try to use the step number line or the last line if step < len(lines): line_index = step else: line_index = len(lines) - 1 if line_index < 0: return None weights_str = lines[line_index].strip() if ":" in weights_str: weights_str = weights_str.split(":", 1)[1].strip() else: # When no specific step is provided, use the last line line_index = len(lines) - 1 weights_str = lines[line_index].strip() if ":" in weights_str: weights_str = weights_str.split(":", 1)[1].strip() if not weights_str: return None multiplier = 1.0 if "*" in weights_str: parts = weights_str.rsplit("*", 1) if len(parts) == 2: weights_str = parts[0].strip() try: multiplier = float(parts[1].strip()) except ValueError as e: RESplain(f"Invalid multiplier format: {parts[1]}") try: weights = [float(w.strip()) for w in weights_str.split(',')] weights_tensor = torch.tensor(weights, dtype=self.dtype, device=self.device) if multiplier != 1.0: weights_tensor = weights_tensor * multiplier if interpolate_frames and len(weights_tensor) != num_frames: if len(weights_tensor) > 1: orig_positions = torch.linspace(0, 1, len(weights_tensor), dtype=self.dtype, device=self.device) new_positions = torch.linspace(0, 1, num_frames, dtype=self.dtype, device=self.device) weights_tensor = torch.nn.functional.interpolate( weights_tensor.view(1, 1, -1), size=num_frames, mode='linear', align_corners=True ).squeeze() else: # If only one weight, repeat it for all frames weights_tensor = weights_tensor.repeat(num_frames) else: if len(weights_tensor) < num_frames: # If fewer weights than frames, repeat the last weight weights_tensor = torch.cat([ weights_tensor, torch.full((num_frames - len(weights_tensor),), weights_tensor[-1], dtype=self.dtype, device=self.device) ]) # Trim if too many weights if len(weights_tensor) > num_frames: weights_tensor = weights_tensor[:num_frames] return weights_tensor except (ValueError, IndexError) as e: RESplain(f"Error parsing custom frame weights: {e}") return None return None def _generate_frame_weights(self, num_frames, dynamics, schedule, scale, is_reversed, frame_weights, step=None, custom_string=None): # Look for the multiplier= parameter in the custom string and store it as a float value multiplier = None rate_factor = None start_change_factor = None if custom_string is not None: if "multiplier" in custom_string: multiplier_match = re.search(r"multiplier\s*=\s*([0-9.]+)", custom_string) if multiplier_match: multiplier = float(multiplier_match.group(1)) # Remove the multiplier= from the custom string custom_string = re.sub(r"multiplier\s*=\s*[0-9.]+", "", custom_string).strip() RESplain(f"Custom multiplier detected: {multiplier}", debug=True) if "rate_factor" in custom_string: rate_factor_match = re.search(r"rate_factor\s*=\s*([0-9.]+)", custom_string) if rate_factor_match: rate_factor = float(rate_factor_match.group(1)) # Remove the rate_factor= from the custom string custom_string = re.sub(r"rate_factor\s*=\s*[0-9.]+", "", custom_string).strip() RESplain(f"Custom rate factor detected: {rate_factor}", debug=True) if "start_change_factor" in custom_string: start_change_factor_match = re.search(r"start_change_factor\s*=\s*([0-9.]+)", custom_string) if start_change_factor_match: start_change_factor = float(start_change_factor_match.group(1)) # Remove the start_change_factor= from the custom string custom_string = re.sub(r"start_change_factor\s*=\s*[0-9.]+", "", custom_string).strip() RESplain(f"Custom start change factor detected: {start_change_factor}", debug=True) if custom_string is not None and custom_string.strip() != "" and step is not None: custom_weights = self._generate_custom_weights(num_frames, custom_string, step) if custom_weights is not None: weights = custom_weights weights = torch.flip(weights, [0]) if is_reversed else weights return weights else: RESplain("custom frame weights failed to parse, doing the normal thing...", debug=True) if rate_factor is None: if "fast" in schedule: rate_factor = 0.25 elif "slow" in schedule: rate_factor = 1.0 else: # moderate rate_factor = 0.5 if start_change_factor is None: if "early" in schedule: start_change_factor = 0.0 elif "late" in schedule: start_change_factor = 0.2 else: start_change_factor = 0.0 change_frames = max(round(num_frames * rate_factor), 2) change_start = round(num_frames * start_change_factor) low_value = 1.0 - scale if frame_weights is not None: weights = torch.cat([frame_weights, torch.full((num_frames,), frame_weights[-1])]) weights = weights[:num_frames] else: if dynamics == "constant": weights = self._generate_constant_schedule(change_start, change_frames, low_value, num_frames) elif dynamics == "linear": weights = self._generate_linear_schedule(change_start, change_frames, low_value, num_frames) elif dynamics == "ease_out": weights = self._generate_easeout_schedule(change_start, change_frames, low_value, num_frames) elif dynamics == "ease_in": weights = self._generate_easein_schedule(change_start, change_frames, low_value, num_frames) elif dynamics == "middle": weights = self._generate_middle_schedule(change_start, change_frames, low_value, num_frames) elif dynamics == "trough": weights = self._generate_trough_schedule(change_start, change_frames, low_value, num_frames) else: raise ValueError(f"Invalid schedule: {dynamics}") if multiplier is None: multiplier = 1.0 weights = torch.flip(weights, [0]) if is_reversed else weights weights = weights * multiplier weights = torch.clamp(weights, min=0.0, max=(max(1.0, multiplier))) weights = weights.to(dtype=self.dtype, device=self.device) return weights def _generate_constant_schedule(self, change_start, change_frames, low_value, num_frames): """constant schedule with the scale as the low weight""" return torch.ones(num_frames) * low_value def _generate_linear_schedule(self, change_start, change_frames, low_value, num_frames): """linear schedule from 1 to the low weight""" weights = torch.linspace(1, low_value, change_frames) weights = torch.cat([torch.full((change_start,), 1.0), weights]) weights = torch.cat([weights, torch.full((num_frames,), weights[-1])]) weights = weights[:num_frames] return weights def _generate_easeout_schedule(self, change_start, change_frames, low_value, num_frames, k=4.0): """exponential schedule from 1 to the low weight""" change_frames = max(change_frames, 4) t = torch.linspace(0, 1, change_frames, dtype=self.dtype, device=self.device) weights = 1.0 - (1.0 - low_value) * (1.0 - torch.exp(-k * t)) weights = torch.cat([torch.full((change_start,), 1.0), weights]) weights = torch.cat([weights, torch.full((num_frames,), weights[-1])]) weights = weights[:num_frames] return weights def _generate_easein_schedule(self, change_start, change_frames, low_value, num_frames): """a monomial power schedule from 1 to the low weight""" change_frames = max(change_frames, 4) t = torch.linspace(0, 1, change_frames, dtype=self.dtype, device=self.device) weights = 1 - (1 - low_value) * torch.pow(t, 2) # Prepend with change_start frames of 1.0 weights = torch.cat([torch.full((change_start,), 1.0), weights]) total_frames_to_pad = num_frames - len(weights) if (total_frames_to_pad > 1): mid_value_between_low_value_and_second_to_last_value = (weights[-2] + low_value) / 2.0 weights[-1] = mid_value_between_low_value_and_second_to_last_value # Fill remaining with final value weights = torch.cat([weights, torch.full((num_frames,), weights[-1])]) weights = weights[:num_frames] return weights def _generate_middle_schedule(self, change_start, change_frames, low_value, num_frames): """gaussian middle peaking schedule from 1 to the low weight""" change_frames = max(change_frames, 4) t = torch.linspace(0, 1, change_frames, dtype=self.dtype, device=self.device) weights = torch.exp(-0.5 * ((t - 0.5) / 0.2) ** 2) weights = weights / torch.max(weights) weights = low_value + (1 - low_value) * weights total_frames_to_pad = num_frames - len(weights) pad_left = total_frames_to_pad // 2 pad_right = total_frames_to_pad - pad_left weights = torch.cat([torch.full((pad_left,), low_value), weights, torch.full((pad_right,), low_value)]) if change_start > 0: # Pad the beginning with the first value, and truncate to num_frames weights = torch.cat([torch.full((change_start,), low_value), weights]) weights = weights[:num_frames] return weights def _generate_trough_schedule(self, change_start, change_frames, low_value, num_frames): """ Trough schedule with both ends at 1 and the middle at the low weight. When change_start > 0, creates asymmetry with shorter decay at beginning and longer at end. """ change_frames = max(change_frames, 4) # Calculate sigma based on change_frames - controls overall decay rate sigma = max(0.2, change_frames / num_frames) if change_start == 0: t = torch.linspace(-1, 1, num_frames, dtype=self.dtype, device=self.device) else: asymmetry_factor = min(0.5, change_start / num_frames) split_point = 0.5 - asymmetry_factor first_size = int(split_point * num_frames) first_size = max(1, first_size) # at least one frame t1 = torch.linspace(-1, 0, first_size, dtype=self.dtype, device=self.device) second_size = num_frames - first_size t2 = torch.linspace(0, 1, second_size, dtype=self.dtype, device=self.device) t = torch.cat([t1, t2]) # shape using Gaussian function trough = 1.0 - torch.exp(-0.5 * (t / sigma) ** 2) weights = low_value + (1.0 - low_value) * trough return weights def check_projection_consistency(x, W, b): W_pinv = torch.linalg.pinv(W.T) x_proj = (x - b) @ W_pinv x_recon = x_proj @ W.T + b error = torch.norm(x - x_recon) in_subspace = error < 1e-3 return error, in_subspace def get_max_dtype(device='cpu'): if torch.backends.mps.is_available(): MAX_DTYPE = torch.float32 else: try: torch.tensor([0.0], dtype=torch.float64, device=device) MAX_DTYPE = torch.float64 except (RuntimeError, TypeError): MAX_DTYPE = torch.float32 return MAX_DTYPE ================================================ FILE: helper_sigma_preview_image_preproc.py ================================================ import torch import torch.nn.functional as F from typing import Optional, Callable, Tuple, Dict, Any, Union import numpy as np import folder_paths from PIL.PngImagePlugin import PngInfo from PIL import Image import json import os import random import copy from io import BytesIO import matplotlib.pyplot as plt import matplotlib matplotlib.use('Agg') # use the Agg backend for non-interactive rendering... prevent crashes by not using tkinter (which requires running in the main thread) from comfy.cli_args import args import comfy.samplers import comfy.utils from nodes import MAX_RESOLUTION from .beta.rk_method_beta import RK_Method_Beta from .beta.rk_noise_sampler_beta import RK_NoiseSampler, NOISE_MODE_NAMES from .helper import get_res4lyf_scheduler_list from .sigmas import get_sigmas from .images import image_resize from .res4lyf import RESplain class SaveImage: def __init__(self): self.output_dir = folder_paths.get_output_directory() self.type = "output" self.prefix_append = "" self.compress_level = 4 @classmethod def INPUT_TYPES(cls): return { "required": { "images": ("IMAGE", { "tooltip": "The images to save."}), "filename_prefix": ("STRING", {"default": "ComfyUI", "tooltip": "The prefix for the file to save. This may include formatting information such as %date:yyyy-MM-dd% or %Empty Latent Image.width% to include values from nodes."}) }, "hidden": { "prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO" }, } RETURN_TYPES = () FUNCTION = "save_images" OUTPUT_NODE = True CATEGORY = "image" DESCRIPTION = "Saves the input images to your ComfyUI output directory." def save_images(self, images, filename_prefix = "ComfyUI", prompt = None, extra_pnginfo = None ): filename_prefix += self.prefix_append full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir, images[0].shape[1], images[0].shape[0]) results = list() for (batch_number, image) in enumerate(images): i = 255. * image.cpu().numpy() img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8)) metadata = None if not args.disable_metadata: metadata = PngInfo() if prompt is not None: metadata.add_text("prompt", json.dumps(prompt)) if extra_pnginfo is not None: for x in extra_pnginfo: metadata.add_text(x, json.dumps(extra_pnginfo[x])) filename_with_batch_num = filename.replace("%batch_num%", str(batch_number)) file = f"{filename_with_batch_num}_{counter:05}_.png" img.save(os.path.join(full_output_folder, file), pnginfo=metadata, compress_level=self.compress_level) results.append({ "filename": file, "subfolder": subfolder, "type": self.type }) counter += 1 return { "ui": { "images": results } } # adapted from https://github.com/Extraltodeus/sigmas_tools_and_the_golden_scheduler class SigmasPreview(SaveImage): def __init__(self): self.output_dir = folder_paths.get_temp_directory() self.type = "temp" self.prefix_append = "_temp_" + ''.join(random.choice("abcdefghijklmnopqrstupvxyz1234567890") for x in range(5)) self.compress_level = 4 @classmethod def INPUT_TYPES(self): return { "required": { "sigmas": ("SIGMAS",), "print_as_list" : ("BOOLEAN", {"default": False}), "line_color": ("STRING", {"default": "blue"}), }, } RETURN_TYPES = ("IMAGE",) FUNCTION = "sigmas_preview" OUTPUT_NODE = True CATEGORY = 'RES4LYF/sigmas' @staticmethod def tensor_to_graph_image(tensor, color='blue'): plt.figure() plt.plot(tensor.numpy(), marker='o', linestyle='-', color=color) plt.title("Graph from Tensor") plt.xlabel("Step Number") plt.ylabel("Sigma Value") with BytesIO() as buf: plt.savefig(buf, format='png') buf.seek(0) image = Image.open(buf).copy() plt.close() return image def sigmas_preview(self, sigmas, print_as_list, line_color): if print_as_list: # Convert to list with 4 decimal places sigmas_list = [round(float(s), 4) for s in sigmas.tolist()] # Print header using RESplain RESplain("\n" + "="*60) RESplain("SIGMAS PREVIEW - PRINT LIST") RESplain("="*60) # Print basic stats RESplain(f"Total steps: {len(sigmas_list)}") RESplain(f"Min sigma: {min(sigmas_list):.4f}") RESplain(f"Max sigma: {max(sigmas_list):.4f}") # Print the clean sigma values RESplain(f"\nSigma values ({len(sigmas_list)} steps):") RESplain("-" * 40) # Print in rows of 5 for readability for i in range(0, len(sigmas_list), 5): row = sigmas_list[i:i+5] row_str = " ".join(f"{val:8.4f}" for val in row) RESplain(f"Step {i:2d}-{min(i+4, len(sigmas_list)-1):2d}: {row_str}") # Calculate and print percentages (normalized 0-1) sigmas_percentages = ((sigmas-sigmas.min())/(sigmas.max()-sigmas.min())).tolist() sigmas_percentages = [round(p, 4) for p in sigmas_percentages] RESplain(f"\nNormalized percentages (0.0-1.0):") RESplain("-" * 40) # Print step-by-step breakdown RESplain("Step | Sigma | Normalized | Step Size") RESplain("-----|----------|------------|----------") for i, (sigma, pct) in enumerate(zip(sigmas_list, sigmas_percentages)): if i > 0: step_size = sigmas_list[i-1] - sigma RESplain(f"{i:4d} | {sigma:8.4f} | {pct:10.4f} | {step_size:8.4f}") else: RESplain(f"{i:4d} | {sigma:8.4f} | {pct:10.4f} | {'--':>8}") RESplain("="*60 + "\n") sigmas_graph = self.tensor_to_graph_image(sigmas.cpu(), line_color) numpy_image = np.array(sigmas_graph) numpy_image = numpy_image / 255.0 tensor_image = torch.from_numpy(numpy_image) tensor_image = tensor_image.unsqueeze(0) images_tensor = torch.cat([tensor_image], 0) output = self.save_images(images_tensor, "SigmasPreview") output["result"] = (images_tensor,) return output class VAEEncodeAdvanced: @classmethod def INPUT_TYPES(cls): return { "required": { "resize_to_input": (["false", "image_1", "image_2", "mask", "latent"], {"default": "false"},), "width": ("INT", {"default": 1024, "min": 0, "max": MAX_RESOLUTION, "step": 1, }), "height": ("INT", {"default": 1024, "min": 0, "max": MAX_RESOLUTION, "step": 1, }), "mask_channel": (["red", "green", "blue", "alpha"],), "invert_mask": ("BOOLEAN", {"default": False}), "latent_type": (["4_channels", "16_channels"], {"default": "16_channels",}), }, "optional": { "image_1": ("IMAGE",), "image_2": ("IMAGE",), "mask": ("IMAGE",), "latent": ("LATENT",), "vae": ("VAE", ), } } RETURN_TYPES = ("LATENT", "LATENT", "MASK", "LATENT", "INT", "INT", ) RETURN_NAMES = ("latent_1", "latent_2", "mask", "empty_latent", "width", "height", ) FUNCTION = "main" CATEGORY = "RES4LYF/vae" def main(self, width, height, resize_to_input = "false", image_1 = None, image_2 = None, mask = None, invert_mask = False, method = "stretch", interpolation = "lanczos", condition = "always", multiple_of = 0, keep_proportion = False, mask_channel = "red", latent = None, latent_type = "16_channels", vae = None ): ratio = 8 # latent compression factor # this is unfortunately required to avoid apparent non-deterministic outputs. # without setting the seed each time, the outputs of the VAE encode will change with every generation. torch .manual_seed (42) torch.cuda.manual_seed_all(42) image_1 = image_1.clone() if image_1 is not None else None image_2 = image_2.clone() if image_2 is not None else None if latent is not None and resize_to_input == "latent": height, width = latent['samples'].shape[-2:] #height, width = latent['samples'].shape[2:4] height, width = height * ratio, width * ratio elif image_1 is not None and resize_to_input == "image_1": height, width = image_1.shape[1:3] elif image_2 is not None and resize_to_input == "image_2": height, width = image_2.shape[1:3] elif mask is not None and resize_to_input == "mask": height, width = mask.shape[1:3] if latent is not None: c = latent['samples'].shape[1] else: if latent_type == "4_channels": c = 4 else: c = 16 if image_1 is not None: b = image_1.shape[0] elif image_2 is not None: b = image_2.shape[0] else: b = 1 latent = {"samples": torch.zeros((b, c, height // ratio, width // ratio))} latent_1, latent_2 = None, None if image_1 is not None: image_1 = image_resize(image_1, width, height, method, interpolation, condition, multiple_of, keep_proportion) latent_1 = {"samples": vae.encode(image_1[:,:,:,:3])} if image_2 is not None: image_2 = image_resize(image_2, width, height, method, interpolation, condition, multiple_of, keep_proportion) latent_2 = {"samples": vae.encode(image_2[:,:,:,:3])} if mask is not None and mask.shape[-1] > 1: channels = ["red", "green", "blue", "alpha"] mask = mask[:, :, :, channels.index(mask_channel)] if mask is not None: mask = F.interpolate(mask.unsqueeze(0), size=(height, width), mode='bilinear', align_corners=False).squeeze(0) if invert_mask: mask = 1.0 - mask return (latent_1, latent_2, mask, latent, width, height, ) class VAEStyleTransferLatent: @classmethod def INPUT_TYPES(cls): return { "required": { "method": (["AdaIN", "WCT"], {"default": "AdaIN"}), "latent": ("LATENT",), "style_ref": ("LATENT",), "vae": ("VAE", ), }, "optional": { } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/vae" def main(self, method = None, latent = None, style_ref = None, vae = False, ): from comfy.ldm.cascade.stage_c_coder import StageC_coder # this is unfortunately required to avoid apparent non-deterministic outputs. # without setting the seed each time, the outputs of the VAE encode will change with every generation. torch .manual_seed (42) torch.cuda.manual_seed_all(42) denoised = latent .get('state_info', {}).get('raw_x') y0 = style_ref.get('state_info', {}).get('raw_x') denoised = latent['samples'] if denoised is None else denoised y0 = style_ref['samples'] if y0 is None else y0 #denoised = latent.get('state_info', latent['samples'].get('raw_x', latent['samples'])) #y0 = style_ref.get('state_info', style_ref['samples'].get('raw_x', style_ref['samples'])) if denoised.ndim > 4: denoised = denoised.squeeze(0) if y0.ndim > 4: y0 = y0.squeeze(0) if hasattr(vae.first_stage_model, "up_blocks"): # probably stable cascade stage A x_embedder = copy.deepcopy(vae.first_stage_model.up_blocks[0][0]).to(torch.float64) denoised_embed = x_embedder(denoised.to(x_embedder.weight)) y0_embed = x_embedder(y0.to(x_embedder.weight)) denoised_embed = apply_style_to_latent(denoised_embed, y0_embed, method) denoised_styled = invert_conv2d(x_embedder, denoised_embed, denoised.shape).to(denoised) elif hasattr(vae.first_stage_model, "decoder"): # probably sd15, sdxl, sd35, flux, wan, etc. vae x_embedder = copy.deepcopy(vae.first_stage_model.decoder.conv_in).to(torch.float64) denoised_embed = x_embedder(denoised.to(x_embedder.weight)) y0_embed = x_embedder(y0.to(x_embedder.weight)) denoised_embed = apply_style_to_latent(denoised_embed, y0_embed, method) denoised_styled = invert_conv2d(x_embedder, denoised_embed, denoised.shape).to(denoised) elif type(vae.first_stage_model) == StageC_coder: x_embedder = copy.deepcopy(vae.first_stage_model.encoder.mapper[0]).to(torch.float64) #x_embedder = copy.deepcopy(vae.first_stage_model.previewer.blocks[0]).to(torch.float64) # use with strategy for decoder above, but exploding latent problem, 1.E30 etc. quick to nan denoised_embed = invert_conv2d(x_embedder, denoised, denoised.shape) y0_embed = invert_conv2d(x_embedder, y0, y0.shape) denoised_embed = apply_style_to_latent(denoised_embed, y0_embed, method) denoised_styled = x_embedder(denoised_embed.to(x_embedder.weight)) latent_out = latent.copy() #latent_out['state_info'] = copy.deepcopy(latent['state_info']) if latent_out.get('state_info', {}).get('raw_x') is not None: latent_out['state_info']['raw_x'] = denoised_styled latent_out['samples'] = denoised_styled return (latent_out, ) def apply_style_to_latent(denoised_embed, y0_embed, method="WCT"): from einops import rearrange import torch.nn as nn denoised_embed_shape = denoised_embed.shape denoised_embed = rearrange(denoised_embed, "B C H W -> B (H W) C") y0_embed = rearrange(y0_embed, "B C H W -> B (H W) C") if method == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_embed) elif method == "WCT": f_s = y0_embed[0].clone() # batched style guides not supported mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - mu_s cov = (f_s_centered.transpose(-2,-1).double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.transpose(-2,-1) y0_color = whiten.to(f_s_centered) for wct_i in range(denoised_embed_shape[0]): f_c = denoised_embed[wct_i].clone() mu_c = f_c.mean(dim=0, keepdim=True) f_c_centered = f_c - mu_c cov = (f_c_centered.transpose(-2,-1).double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) inv_sqrt_eig = S_eig.clamp(min=0).rsqrt() whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.transpose(-2,-1) whiten = whiten.to(f_c_centered) f_c_whitened = f_c_centered @ whiten.transpose(-2,-1) f_cs = f_c_whitened @ y0_color.transpose(-2,-1).to(f_c_whitened) + mu_s.to(f_c_whitened) denoised_embed[wct_i] = f_cs denoised_embed = rearrange(denoised_embed, "B (H W) C -> B C H W", W=denoised_embed_shape[-1]) return denoised_embed def invert_conv2d( conv: torch.nn.Conv2d, z: torch.Tensor, original_shape: torch.Size, ) -> torch.Tensor: import torch.nn.functional as F B, C_in, H, W = original_shape C_out, _, kH, kW = conv.weight.shape stride_h, stride_w = conv.stride pad_h, pad_w = conv.padding if conv.bias is not None: b = conv.bias.view(1, C_out, 1, 1).to(z) z_nobias = z - b else: z_nobias = z W_flat = conv.weight.view(C_out, -1).to(z) W_pinv = torch.linalg.pinv(W_flat) Bz, Co, Hp, Wp = z_nobias.shape z_flat = z_nobias.reshape(Bz, Co, -1) x_patches = W_pinv @ z_flat x_sum = F.fold( x_patches, output_size=(H + 2*pad_h, W + 2*pad_w), kernel_size=(kH, kW), stride=(stride_h, stride_w), ) ones = torch.ones_like(x_patches) count = F.fold( ones, output_size=(H + 2*pad_h, W + 2*pad_w), kernel_size=(kH, kW), stride=(stride_h, stride_w), ) x_recon = x_sum / count.clamp(min=1e-6) if pad_h > 0 or pad_w > 0: x_recon = x_recon[..., pad_h:pad_h+H, pad_w:pad_w+W] return x_recon """def invert_conv3d(conv: torch.nn.Conv3d, z: torch.Tensor, original_shape: torch.Size, grid_sizes: Optional[Tuple[int,int,int]] = None) -> torch.Tensor: import torch.nn.functional as F B, C_in, D, H, W = original_shape pD, pH, pW = 1,2,2 sD, sH, sW = pD, pH, pW if z.ndim == 3: # [B, S, C_out] -> reshape to [B, C_out, D', H', W'] S = z.shape[1] if grid_sizes is None: Dp = D // pD Hp = H // pH Wp = W // pW else: Dp, Hp, Wp = grid_sizes C_out = z.shape[2] z = z.transpose(1, 2).reshape(B, C_out, Dp, Hp, Wp) else: B2, C_out, Dp, Hp, Wp = z.shape assert B2 == B, "Batch size mismatch... ya sharked it." # kncokout bias if conv.bias is not None: b = conv.bias.view(1, C_out, 1, 1, 1) z_nobias = z - b else: z_nobias = z # 2D filter -> pinv w3 = conv.weight # [C_out, C_in, 1, pH, pW] w2 = w3.squeeze(2) # [C_out, C_in, pH, pW] out_ch, in_ch, kH, kW = w2.shape W_flat = w2.view(out_ch, -1) # [C_out, in_ch*pH*pW] W_pinv = torch.linalg.pinv(W_flat) # [in_ch*pH*pW, C_out] # merge depth for 2D unfold wackiness z2 = z_nobias.permute(0,2,1,3,4).reshape(B*Dp, C_out, Hp, Wp) # apply pinv ... get patch vectors z_flat = z2.reshape(B*Dp, C_out, -1) # [B*Dp, C_out, L] x_patches = W_pinv @ z_flat # [B*Dp, in_ch*pH*pW, L] # fold -> spatial frames x2 = F.fold( x_patches, output_size=(H, W), kernel_size=(pH, pW), stride=(sH, sW) ) # → [B*Dp, C_in, H, W] # un-merge depth x2 = x2.reshape(B, Dp, in_ch, H, W) # [B, Dp, C_in, H, W] x_recon = x2.permute(0,2,1,3,4).contiguous() # [B, C_in, D, H, W] return x_recon """ def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: mean_c = content.mean(1, keepdim=True) std_c = content.std (1, keepdim=True).add_(eps) # in-place add mean_s = style.mean (1, keepdim=True) std_s = style.std (1, keepdim=True).add_(eps) content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain return content class LatentUpscaleWithVAE: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT", ), "width" : ("INT", {"default": 1024, "min": 8, "max": 1024 ** 2, "step": 8}), "height": ("INT", {"default": 1024, "min": 8, "max": 1024 ** 2, "step": 8}), "vae": ("VAE", ), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, latent, width, height, vae, method = "stretch", interpolation = "lanczos", condition = "always", multiple_of = 0, keep_proportion = False, ): ratio = 8 # latent compression factor # this is unfortunately required to avoid apparent non-deterministic outputs. # without setting the seed each time, the outputs of the VAE encode will change with every generation. torch .manual_seed (42) torch.cuda.manual_seed_all(42) images_prev_list, latent_prev_list = [], [] if 'state_info' in latent: #images = vae.decode(latent['state_info']['raw_x'] ) # .to(latent['samples']) ) images = vae.decode(latent['state_info']['denoised'] ) # .to(latent['samples']) ) data_prev_ = latent['state_info']['data_prev_'].squeeze(0) for i in range(data_prev_.shape[0]): images_prev_list.append( vae.decode(data_prev_[i]) ) # .to(latent['samples']) ) else: images = vae.decode(latent['samples']) if len(images.shape) == 5: #Combine batches images = images.reshape(-1, images.shape[-3], images.shape[-2], images.shape[-1]) images = image_resize(images, width, height, method, interpolation, condition, multiple_of, keep_proportion) latent_tensor = vae.encode(images[:,:,:,:3]) if images_prev_list: for i in range(data_prev_.shape[0]): image_data_p = image_resize(images_prev_list[i], width, height, method, interpolation, condition, multiple_of, keep_proportion) latent_prev_list.append( vae.encode(image_data_p[:,:,:,:3]) ) latent_prev = torch.stack(latent_prev_list).unsqueeze(0) #.view_as(latent['state_info']['data_prev_']) #images_prev = image_resize(images_prev, width, height, method, interpolation, condition, multiple_of, keep_proportion) #latent_tensor = vae.encode(image_1[:,:,:,:3]) if 'state_info' in latent: #latent['state_info']['raw_x'] = latent_tensor latent['state_info']['denoised'] = latent_tensor latent['state_info']['data_prev_'] = latent_prev latent['samples'] = latent_tensor.to(latent['samples']) return (latent,) class SigmasSchedulePreview(SaveImage): def __init__(self): self.output_dir = folder_paths.get_temp_directory() self.type = "temp" self.prefix_append = "_temp_" + ''.join(random.choice("abcdefghijklmnopqrstupvxyz1234567890") for x in range(5)) self.compress_level = 4 @classmethod def INPUT_TYPES(cls): return { "required": { "model": ("MODEL",), "noise_mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "eta": ("FLOAT", {"default": 0.25, "step": 0.01, "min": -1000.0, "max": 1000.0}), "s_noise": ("FLOAT", {"default": 1.00, "step": 0.01, "min": -1000.0, "max": 1000.0}), "denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},), "steps": ("INT", {"default": 30, "min": 1, "max": 10000}), "plot_max": ("FLOAT", {"default": 2.1, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Set to a negative value to have the plot scale automatically."}), "plot_min": ("FLOAT", {"default": 0.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Set to a negative value to have the plot scale automatically."}), }, "optional": { "sigmas": ("SIGMAS",), }, } FUNCTION = "plot_schedule" CATEGORY = "RES4LYF/sigmas" OUTPUT_NODE = True @staticmethod def tensor_to_graph_image(tensors, labels, colors, plot_min, plot_max, input_params): plt.figure(figsize=(6.4, 6.4), dpi=320) ax = plt.gca() ax.set_facecolor("black") ax.patch.set_alpha(1.0) for _ in range(50): for tensor, color in zip(tensors, colors): plt.plot(tensor.numpy(), color=color, alpha=0.1) plt.axhline(y=1.0, color='gray', linestyle='dotted', linewidth=1.5) plt.xlabel("Step", color="white", weight="bold", antialiased=False) plt.ylabel("Value", color="white", weight="bold", antialiased=False) ax.tick_params(colors="white") if plot_max > 0: plt.ylim(plot_min, plot_max) input_text = ( f"noise_mode: {input_params['noise_mode']} | " f"eta: {input_params['eta']} | " f"s_noise: {input_params['s_noise']} | " f"denoise: {input_params['denoise']} | " f"denoise_alt: {input_params['denoise_alt']} | " f"scheduler: {input_params['scheduler']}" ) plt.text(0.5, 1.05, input_text, ha='center', va='center', color='white', fontsize=8, transform=ax.transAxes) from matplotlib.lines import Line2D legend_handles = [Line2D([0], [0], color=color, lw=2, label=label) for label, color in zip(labels, colors)] plt.legend(handles=legend_handles, facecolor="black", edgecolor="white", labelcolor="white", framealpha=1.0) with BytesIO() as buf: plt.savefig(buf, format='png', facecolor="black") buf.seek(0) image = Image.open(buf).copy() plt.close() return image def plot_schedule(self, model, noise_mode, eta, s_noise, denoise, denoise_alt, scheduler, steps, plot_min, plot_max, sigmas=None): sigma_vals = [] sigma_next_vals = [] sigma_down_vals = [] sigma_up_vals = [] sigma_plus_up_vals = [] sigma_hat_vals = [] alpha_ratio_vals = [] sigma_step_size_vals = [] sigma_step_size_sde_vals = [] eta_var = eta rk_type = "res_2s" noise_anchor = 1.0 if sigmas is not None: sigmas = sigmas.clone() else: sigmas = get_sigmas(model, scheduler, steps, denoise) sigmas *= denoise_alt RK = RK_Method_Beta.create(model, rk_type, noise_anchor, model_device=sigmas.device, work_device=sigmas.device, dtype=sigmas.dtype, extra_options="") NS = RK_NoiseSampler(RK, model, device=sigmas.device, dtype=sigmas.dtype, extra_options="") for i in range(len(sigmas) - 1): sigma = sigmas[i] sigma_next = sigmas[i + 1] su, sigma_hat, sd, alpha_ratio = NS.get_sde_step(sigma, sigma_next, eta, noise_mode_override=noise_mode, ) #su, sigma_hat, sd, alpha_ratio = get_res4lyf_step_with_model(model, sigma, sigma_next, eta, noise_mode) su = su * s_noise sigma_vals .append(sigma) sigma_next_vals .append(sigma_next) sigma_down_vals .append(sd) sigma_up_vals .append(su) sigma_plus_up_vals .append(sigma + su) alpha_ratio_vals .append(alpha_ratio) sigma_step_size_vals .append(sigma - sigma_next) sigma_step_size_sde_vals.append(sigma + su - sd) if sigma_hat != sigma: sigma_hat_vals.append(sigma_hat) sigma_tensor = torch.tensor(sigma_vals) sigma_next_tensor = torch.tensor(sigma_next_vals) sigma_down_tensor = torch.tensor(sigma_down_vals) sigma_up_tensor = torch.tensor(sigma_up_vals) sigma_plus_up_tensor = torch.tensor(sigma_plus_up_vals) alpha_ratio_tensor = torch.tensor(alpha_ratio_vals) sigma_step_size_tensor = torch.tensor(sigma_step_size_vals) sigma_step_size_sde_tensor = torch.tensor(sigma_step_size_sde_vals) tensors = [sigma_tensor, sigma_next_tensor, sigma_down_tensor, sigma_up_tensor] labels = ["$σ$", "$σ_{next}$", "$σ_{down}$", "$σ_{up}$"] colors = ["white", "dodgerblue", "green", "red"] if torch.norm(sigma_next_tensor - sigma_down_tensor) < 1e-2: tensors = [sigma_tensor, sigma_next_tensor, sigma_up_tensor] labels = ["$σ$", "$σ_{next,down}$", "$σ_{up}$"] colors = ["white", "cyan", "red"] elif torch.norm(sigma_next_tensor - sigma_up_tensor) < 1e-2: tensors = [sigma_tensor, sigma_next_tensor, sigma_down_tensor] labels = ["$σ$", "$σ_{next,up}$", "$σ_{down}$"] colors = ["white", "violet", "green",] if torch.norm(sigma_tensor - sigma_plus_up_tensor) > 1e-2: tensors.append(sigma_plus_up_tensor) labels.append("$σ + σ_{up}$") colors.append("brown") if torch.norm(sigma_step_size_tensor - sigma_step_size_sde_tensor) > 1e-2: tensors.append(sigma_step_size_sde_tensor) labels.append("$Δ \hat{t}$") colors.append("gold") if sigma_hat_vals: sigma_hat_tensor = torch.tensor(sigma_hat_vals) tensors.append(sigma_hat_tensor) labels.append("$σ̂$") colors.append("maroon") tensors.append(sigma_step_size_tensor) labels.append("$σ̂ - σ_{next}$") colors.append("darkorange") else: tensors.append(sigma_step_size_tensor) #labels.append("$σ - σ_{next}$") labels.append("$Δt$") colors.append("darkorange") tensors.append(alpha_ratio_tensor) labels.append("$α_{ratio}$") colors.append("grey") graph_image = self.tensor_to_graph_image( tensors, labels, colors, plot_min, plot_max, input_params={ "noise_mode": noise_mode, "eta": eta, "s_noise": s_noise, "denoise": denoise, "denoise_alt": denoise_alt, "scheduler": scheduler, } ) numpy_image = np.array(graph_image) numpy_image = numpy_image / 255.0 tensor_image = torch.from_numpy(numpy_image) tensor_image = tensor_image.unsqueeze(0) images_tensor = torch.cat([tensor_image], 0) return self.save_images(images_tensor, "SigmasSchedulePreview") ================================================ FILE: hidream/model.py ================================================ import torch import torch.nn.functional as F import math import torch.nn as nn from torch import Tensor, FloatTensor from typing import Optional, Callable, Tuple, List, Dict, Any, Union, TYPE_CHECKING, TypeVar from dataclasses import dataclass import einops from einops import repeat, rearrange from comfy.ldm.lightricks.model import TimestepEmbedding, Timesteps import torch.nn.functional as F from comfy.ldm.flux.math import apply_rope, rope #from comfy.ldm.flux.layers import LastLayer #from ..flux.layers import LastLayer from comfy.ldm.modules.attention import optimized_attention, attention_pytorch import comfy.model_management import comfy.ldm.common_dit from ..helper import ExtraOptions from ..latents import slerp_tensor, interpolate_spd, tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d from ..style_transfer import StyleMMDiT_Model, apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch, adain_seq, apply_scattersort @dataclass class ModulationOut: shift: Tensor scale: Tensor gate : Tensor class BlockType: Double = 2 Single = 1 Zero = 0 ######################################################################################################################################################################### class HDBlock(nn.Module): def __init__( self, dim : int, heads : int, head_dim : int, num_routed_experts : int = 4, num_activated_experts : int = 2, block_type : BlockType = BlockType.Zero, dtype=None, device=None, operations=None ): super().__init__() block_classes = { BlockType.Double : HDBlockDouble, BlockType.Single : HDBlockSingle, } self.block = block_classes[block_type](dim, heads, head_dim, num_routed_experts, num_activated_experts, dtype=dtype, device=device, operations=operations) def forward( self, img : FloatTensor, img_masks : Optional[FloatTensor] = None, txt : Optional[FloatTensor] = None, clip : FloatTensor = None, rope : FloatTensor = None, mask : Optional[FloatTensor] = None, update_cross_attn : Optional[Dict] = None, style_block = None, ) -> FloatTensor: return self.block(img, img_masks, txt, clip, rope, mask, update_cross_attn, style_block=style_block) # Copied from https://github.com/black-forest-labs/flux/blob/main/src/flux/modules/layers.py class EmbedND(nn.Module): def __init__(self, theta: int, axes_dim: List[int]): super().__init__() self.theta = theta self.axes_dim = axes_dim def forward(self, ids: Tensor) -> Tensor: n_axes = ids.shape[-1] emb = torch.cat([ rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(n_axes)], dim=-3,) return emb.unsqueeze(2) class PatchEmbed(nn.Module): def __init__( self, patch_size = 2, in_channels = 4, out_channels = 1024, dtype=None, device=None, operations=None ): super().__init__() self.patch_size = patch_size self.out_channels = out_channels self.proj = operations.Linear(in_channels * patch_size * patch_size, out_channels, bias=True, dtype=dtype, device=device) def forward(self, latent): latent = self.proj(latent) return latent class PooledEmbed(nn.Module): def __init__(self, text_emb_dim, hidden_size, dtype=None, device=None, operations=None): super().__init__() self.pooled_embedder = TimestepEmbedding(in_channels=text_emb_dim, time_embed_dim=hidden_size, dtype=dtype, device=device, operations=operations) def forward(self, pooled_embed): return self.pooled_embedder(pooled_embed) class TimestepEmbed(nn.Module): def __init__(self, hidden_size, frequency_embedding_size=256, dtype=None, device=None, operations=None): super().__init__() self.time_proj = Timesteps (num_channels=frequency_embedding_size, flip_sin_to_cos=True, downscale_freq_shift=0) self.timestep_embedder = TimestepEmbedding(in_channels=frequency_embedding_size, time_embed_dim=hidden_size, dtype=dtype, device=device, operations=operations) def forward(self, t, wdtype): t_emb = self.time_proj(t).to(dtype=wdtype) t_emb = self.timestep_embedder(t_emb) return t_emb class TextProjection(nn.Module): def __init__(self, in_features, hidden_size, dtype=None, device=None, operations=None): super().__init__() self.linear = operations.Linear(in_features=in_features, out_features=hidden_size, bias=False, dtype=dtype, device=device) def forward(self, caption): hidden_states = self.linear(caption) return hidden_states class HDFeedForwardSwiGLU(nn.Module): def __init__( self, dim : int, hidden_dim : int, multiple_of : int = 256, ffn_dim_multiplier : Optional[float] = None, dtype=None, device=None, operations=None ): super().__init__() hidden_dim = int(2 * hidden_dim / 3) if ffn_dim_multiplier is not None: # custom dim factor multiplier hidden_dim = int(ffn_dim_multiplier * hidden_dim) hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of) self.w1 = operations.Linear(dim, hidden_dim, bias=False, dtype=dtype, device=device) self.w2 = operations.Linear(hidden_dim, dim, bias=False, dtype=dtype, device=device) self.w3 = operations.Linear(dim, hidden_dim, bias=False, dtype=dtype, device=device) def forward(self, x, style_block=None): # 1,4096,2560 -> if style_block is not None and x.shape[0] > 1 and x.ndim == 3: x1 = self.w1(x) x1 = style_block(x1, "ff_1") x1 = torch.nn.functional.silu(x1) x1 = style_block(x1, "ff_1_silu") x3 = self.w3(x) x3 = style_block(x3, "ff_3") x13 = x1 * x3 x13 = style_block(x13, "ff_13") x2 = self.w2(x13) x2 = style_block(x2, "ff_2") return x2 else: return self.w2(torch.nn.functional.silu(self.w1(x)) * self.w3(x)) # Modified from https://github.com/deepseek-ai/DeepSeek-V3/blob/main/inference/model.py class HDMoEGate(nn.Module): def __init__(self, dim, num_routed_experts=4, num_activated_experts=2, dtype=None, device=None): super().__init__() self.top_k = num_activated_experts # 2 self.n_routed_experts = num_routed_experts # 4 self.gating_dim = dim # 2560 self.weight = nn.Parameter(torch.empty((self.n_routed_experts, self.gating_dim), dtype=dtype, device=device)) def forward(self, x): dtype = self.weight.dtype if dtype not in {torch.bfloat16, torch.float16, torch.float32, torch.float64}: dtype = torch.float32 self.weight.data = self.weight.data.to(dtype) logits = F.linear(x.to(dtype), self.weight.to(x.device), None) scores = logits.softmax(dim=-1).to(x) # logits.shape == 4032,4 scores.shape == 4032,4 return torch.topk(scores, k=self.top_k, dim=-1, sorted=False) class HDMOEFeedForwardSwiGLU(nn.Module): def __init__( self, dim : int, hidden_dim : int, num_routed_experts : int, num_activated_experts : int, dtype=None, device=None, operations=None ): super().__init__() self.shared_experts = HDFeedForwardSwiGLU(dim, hidden_dim // 2, dtype=dtype, device=device, operations=operations) self.experts = nn.ModuleList([HDFeedForwardSwiGLU(dim, hidden_dim , dtype=dtype, device=device, operations=operations) for i in range(num_routed_experts)]) self.gate = HDMoEGate(dim, num_routed_experts, num_activated_experts, dtype=dtype, device=device) self.num_activated_experts = num_activated_experts def forward(self, x, style_block=None): y_shared = self.shared_experts(x, style_block.FF_SHARED) # 1,4096,2560 -> 1,4096,2560 y_shared = style_block(y_shared, "shared") topk_weight, topk_idx = self.gate(x) # -> 4096,2 4096,2 topk_weight = style_block(topk_weight, "topk_weight") if y_shared.shape[0] > 1 and style_block.gate[0] and not HDModel.RECON_MODE: topk_idx[0] = topk_idx[1] tk_idx_flat = topk_idx.view(topk_idx.shape[0], -1) x = x.repeat_interleave(self.num_activated_experts, dim=-2) y = torch.empty_like(x) if style_block.gate[0] and not HDModel.RECON_MODE and y_shared.shape[0] > 1: for i, expert in enumerate(self.experts): # TODO: check for empty expert lists and continue if found to avoid CUBLAS errors x_list = [] for b in range(x.shape[0]): x_sel = x[b][tk_idx_flat[b]==i] x_list.append(x_sel) x_list = torch.stack(x_list, dim=0) x_out = expert(x_list, style_block.FF_SEPARATE).to(x.dtype) for b in range(y.shape[0]): y[b][tk_idx_flat[b]==i] = x_out[b] else: for i, expert in enumerate(self.experts): x_sel = x[tk_idx_flat == i, :] if x_sel.shape[0] == 0: continue y[tk_idx_flat == i, :] = expert(x_sel).to(x.dtype) y = style_block(y, "separate") y_sum = torch.einsum('abk,abkd->abd', topk_weight, y.view(*topk_weight.shape, -1)) y_sum = style_block(y_sum, "sum") y_sum = y_sum.view_as(y_shared) + y_shared y_sum = style_block(y_sum, "out") return y_sum def apply_passthrough(denoised_embed, *args, **kwargs): return denoised_embed class AttentionBuffer: buffer = {} def attention(q: Tensor, k: Tensor, v: Tensor, rope: Tensor, mask: Optional[Tensor] = None): q, k = apply_rope(q, k, rope) if mask is not None: AttentionBuffer.buffer = attention_pytorch( q.view(q.shape[0], -1, q.shape[-1] * q.shape[-2]), k.view(k.shape[0], -1, k.shape[-1] * k.shape[-2]), v.view(v.shape[0], -1, v.shape[-1] * v.shape[-2]), q.shape[2], mask=mask, ) else: AttentionBuffer.buffer = optimized_attention( q.view(q.shape[0], -1, q.shape[-1] * q.shape[-2]), k.view(k.shape[0], -1, k.shape[-1] * k.shape[-2]), v.view(v.shape[0], -1, v.shape[-1] * v.shape[-2]), q.shape[2], mask=mask, ) return AttentionBuffer.buffer class HDAttention(nn.Module): def __init__( self, query_dim : int, heads : int = 8, dim_head : int = 64, eps : float = 1e-5, out_dim : int = None, single : bool = False, dtype=None, device=None, operations=None ): super().__init__() self.inner_dim = out_dim if out_dim is not None else dim_head * heads self.query_dim = query_dim self.out_dim = out_dim if out_dim is not None else query_dim self.heads = out_dim // dim_head if out_dim is not None else heads self.single = single self.to_q = operations.Linear (self.query_dim, self.inner_dim, dtype=dtype, device=device) self.to_k = operations.Linear (self.inner_dim, self.inner_dim, dtype=dtype, device=device) self.to_v = operations.Linear (self.inner_dim, self.inner_dim, dtype=dtype, device=device) self.to_out = operations.Linear (self.inner_dim, self.out_dim, dtype=dtype, device=device) self.q_rms_norm = operations.RMSNorm(self.inner_dim, eps, dtype=dtype, device=device) self.k_rms_norm = operations.RMSNorm(self.inner_dim, eps, dtype=dtype, device=device) if not single: self.to_q_t = operations.Linear (self.query_dim, self.inner_dim, dtype=dtype, device=device) self.to_k_t = operations.Linear (self.inner_dim, self.inner_dim, dtype=dtype, device=device) self.to_v_t = operations.Linear (self.inner_dim, self.inner_dim, dtype=dtype, device=device) self.to_out_t = operations.Linear (self.inner_dim, self.out_dim, dtype=dtype, device=device) self.q_rms_norm_t = operations.RMSNorm(self.inner_dim, eps, dtype=dtype, device=device) self.k_rms_norm_t = operations.RMSNorm(self.inner_dim, eps, dtype=dtype, device=device) def forward( self, img : FloatTensor, img_masks : Optional[FloatTensor] = None, txt : Optional[FloatTensor] = None, rope : FloatTensor = None, mask : Optional[FloatTensor] = None, update_cross_attn : Optional[Dict]= None, style_block = None, ) -> Tensor: bsz = img.shape[0] img_q = self.to_q(img) img_k = self.to_k(img) img_v = self.to_v(img) img_q = style_block.img.ATTN(img_q, "q_proj") img_k = style_block.img.ATTN(img_k, "k_proj") img_v = style_block.img.ATTN(img_v, "v_proj") img_q = self.q_rms_norm(img_q) img_k = self.k_rms_norm(img_k) img_q = style_block.img.ATTN(img_q, "q_norm") img_k = style_block.img.ATTN(img_k, "k_norm") inner_dim = img_k.shape[-1] head_dim = inner_dim // self.heads img_q = img_q.view(bsz, -1, self.heads, head_dim) img_k = img_k.view(bsz, -1, self.heads, head_dim) img_v = img_v.view(bsz, -1, self.heads, head_dim) if img_masks is not None: img_k = img_k * img_masks.view(bsz, -1, 1, 1) if self.single: attn = attention(img_q, img_k, img_v, rope=rope, mask=mask) attn = style_block.img.ATTN(attn, "out") return self.to_out(attn) else: txt_q = self.to_q_t(txt) txt_k = self.to_k_t(txt) txt_v = self.to_v_t(txt) txt_q = style_block.txt.ATTN(txt_q, "q_proj") txt_k = style_block.txt.ATTN(txt_k, "k_proj") txt_v = style_block.txt.ATTN(txt_v, "v_proj") txt_q = self.q_rms_norm_t(txt_q) txt_k = self.k_rms_norm_t(txt_k) txt_q = style_block.txt.ATTN(txt_q, "q_norm") txt_k = style_block.txt.ATTN(txt_k, "k_norm") txt_q = txt_q.view(bsz, -1, self.heads, head_dim) txt_k = txt_k.view(bsz, -1, self.heads, head_dim) txt_v = txt_v.view(bsz, -1, self.heads, head_dim) img_len = img_q.shape[1] txt_len = txt_q.shape[1] attn = attention(torch.cat([img_q, txt_q], dim=1), torch.cat([img_k, txt_k], dim=1), torch.cat([img_v, txt_v], dim=1), rope=rope, mask=mask) img_attn, txt_attn = torch.split(attn, [img_len, txt_len], dim=1) #1, 4480, 2560 img_attn = style_block.img.ATTN(img_attn, "out") txt_attn = style_block.txt.ATTN(txt_attn, "out") if update_cross_attn is not None: if not update_cross_attn['skip_cross_attn']: UNCOND = update_cross_attn['UNCOND'] if UNCOND: llama_start = update_cross_attn['src_llama_start'] llama_end = update_cross_attn['src_llama_end'] t5_start = update_cross_attn['src_t5_start'] t5_end = update_cross_attn['src_t5_end'] txt_src = torch.cat([txt[:,t5_start:t5_end,:], txt[:,128+llama_start:128+llama_end,:], txt[:,256+llama_start:256+llama_end],], dim=-2).float() self.c_src = txt_src.transpose(-2,-1).squeeze(0) # shape [C,1] else: llama_start = update_cross_attn['tgt_llama_start'] llama_end = update_cross_attn['tgt_llama_end'] t5_start = update_cross_attn['tgt_t5_start'] t5_end = update_cross_attn['tgt_t5_end'] lamb = update_cross_attn['lamb'] erase = update_cross_attn['erase'] txt_guide = torch.cat([txt[:,t5_start:t5_end,:], txt[:,128+llama_start:128+llama_end,:], txt[:,256+llama_start:256+llama_end],], dim=-2).float() c_guide = txt_guide.transpose(-2,-1).squeeze(0) # [C,1] Wv_old = self.to_v_t.weight.data.float() # [C,C] Wk_old = self.to_k_t.weight.data.float() # [C,C] v_star = Wv_old @ c_guide # [C,1] k_star = Wk_old @ c_guide # [C,1] c_src = self.c_src # [C,1] erase_scale = erase d = c_src.shape[0] C = c_src @ c_src.T # [C,C] I = torch.eye(d, device=C.device, dtype=C.dtype) mat1_v = lamb*Wv_old + erase_scale*(v_star @ c_src.T) # [C,C] mat2_v = lamb*I + erase_scale*(C) # [C,C] Wv_new = mat1_v @ torch.inverse(mat2_v) # [C,C] mat1_k = lamb*Wk_old + erase_scale*(k_star @ c_src.T) # [C,C] mat2_k = lamb*I + erase_scale*(C) # [C,C] Wk_new = mat1_k @ torch.inverse(mat2_k) # [C,C] self.to_v_t.weight.data.copy_(Wv_new.to(self.to_v_t.weight.data.dtype)) self.to_k_t.weight.data.copy_(Wk_new.to(self.to_k_t.weight.data.dtype)) return self.to_out(img_attn), self.to_out_t(txt_attn) ######################################################################################################################################################################### class HDBlockDouble(nn.Module): buffer = {} def __init__( self, dim : int, heads : int, head_dim : int, num_routed_experts : int = 4, num_activated_experts : int = 2, dtype=None, device=None, operations=None ): super().__init__() self.adaLN_modulation = nn.Sequential( nn.SiLU(), operations.Linear(dim, 12*dim, bias=True, dtype=dtype, device=device) ) self.norm1_i = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device) self.norm1_t = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device) self.attn1 = HDAttention (dim, heads, head_dim, single=False, dtype=dtype, device=device, operations=operations) self.norm3_i = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device) self.ff_i = HDMOEFeedForwardSwiGLU(dim, 4*dim, num_routed_experts, num_activated_experts, dtype=dtype, device=device, operations=operations) self.norm3_t = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device) self.ff_t = HDFeedForwardSwiGLU(dim, 4*dim, dtype=dtype, device=device, operations=operations) def forward( self, img : FloatTensor, img_masks : Optional[FloatTensor] = None, txt : Optional[FloatTensor] = None, clip : Optional[FloatTensor] = None, # clip = t + p_embedder (from pooled) rope : FloatTensor = None, mask : Optional[FloatTensor] = None, update_cross_attn : Optional[Dict]= None, style_block = None, ) -> FloatTensor: img_msa_shift, img_msa_scale, img_msa_gate, img_mlp_shift, img_mlp_scale, img_mlp_gate, \ txt_msa_shift, txt_msa_scale, txt_msa_gate, txt_mlp_shift, txt_mlp_scale, txt_mlp_gate = self.adaLN_modulation(clip)[:,None].chunk(12, dim=-1) # 1,1,2560 img_norm = self.norm1_i(img) txt_norm = self.norm1_t(txt) img_norm = style_block.img(img_norm, "attn_norm") txt_norm = style_block.txt(txt_norm, "attn_norm") img_norm = img_norm * (1+img_msa_scale) + img_msa_shift txt_norm = txt_norm * (1+txt_msa_scale) + txt_msa_shift img_norm = style_block.img(img_norm, "attn_norm_mod") txt_norm = style_block.txt(txt_norm, "attn_norm_mod") img_attn, txt_attn = self.attn1(img_norm, img_masks, txt_norm, rope=rope, mask=mask, update_cross_attn=update_cross_attn, style_block=style_block) img_attn = style_block.img(img_attn, "attn") txt_attn = style_block.txt(txt_attn, "attn") img_attn *= img_msa_gate txt_attn *= txt_msa_gate img_attn = style_block.img(img_attn, "attn_gated") txt_attn = style_block.txt(txt_attn, "attn_gated") img += img_attn txt += txt_attn img = style_block.img(img, "attn_res") txt = style_block.txt(txt, "attn_res") # FEED FORWARD img_norm = self.norm3_i(img) txt_norm = self.norm3_t(txt) img_norm = style_block.img(img_norm, "ff_norm") txt_norm = style_block.txt(txt_norm, "ff_norm") img_norm = img_norm * (1+img_mlp_scale) + img_mlp_shift txt_norm = txt_norm * (1+txt_mlp_scale) + txt_mlp_shift img_norm = style_block.img(img_norm, "ff_norm_mod") txt_norm = style_block.txt(txt_norm, "ff_norm_mod") img_ff_i = self.ff_i(img_norm, style_block.img.FF) txt_ff_t = self.ff_t(txt_norm, style_block.txt.FF) img_ff_i = style_block.img(img_ff_i, "ff") txt_ff_t = style_block.txt(txt_ff_t, "ff") img_ff_i *= img_mlp_gate txt_ff_t *= txt_mlp_gate img_ff_i = style_block.img(img_ff_i, "ff_gated") txt_ff_t = style_block.txt(txt_ff_t, "ff_gated") img += img_ff_i txt += txt_ff_t img = style_block.img(img, "ff_res") txt = style_block.txt(txt, "ff_res") return img, txt ######################################################################################################################################################################### class HDBlockSingle(nn.Module): buffer = {} def __init__( self, dim : int, heads : int, head_dim : int, num_routed_experts : int = 4, num_activated_experts : int = 2, dtype=None, device=None, operations=None ): super().__init__() self.adaLN_modulation = nn.Sequential( nn.SiLU(), operations.Linear(dim, 6 * dim, bias=True, dtype=dtype, device=device) ) self.norm1_i = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device) self.attn1 = HDAttention (dim, heads, head_dim, single=True, dtype=dtype, device=device, operations=operations) self.norm3_i = operations.LayerNorm(dim, eps = 1e-06, elementwise_affine = False, dtype=dtype, device=device) self.ff_i = HDMOEFeedForwardSwiGLU(dim, 4*dim, num_routed_experts, num_activated_experts, dtype=dtype, device=device, operations=operations) def forward( self, img : FloatTensor, img_masks : Optional[FloatTensor] = None, txt : Optional[FloatTensor] = None, clip : Optional[FloatTensor] = None, rope : FloatTensor = None, mask : Optional[FloatTensor] = None, update_cross_attn : Optional[Dict] = None, style_block = None, ) -> FloatTensor: img_msa_shift, img_msa_scale, img_msa_gate, img_mlp_shift, img_mlp_scale, img_mlp_gate = self.adaLN_modulation(clip)[:,None].chunk(6, dim=-1) img_norm = self.norm1_i(img) img_norm = style_block.img(img_norm, "attn_norm") # img_norm = img_norm * (1+img_msa_scale) + img_msa_shift img_norm = style_block.img(img_norm, "attn_norm_mod") # img_attn = self.attn1(img_norm, img_masks, rope=rope, mask=mask, style_block=style_block) img_attn = style_block.img(img_attn, "attn") img_attn *= img_msa_gate img_attn = style_block.img(img_attn, "attn_gated") img += img_attn img = style_block.img(img, "attn_res") img_norm = self.norm3_i(img) img_norm = style_block.img(img_norm, "ff_norm") img_norm = img_norm * (1+img_mlp_scale) + img_mlp_shift img_norm = style_block.img(img_norm, "ff_norm_mod") img_ff_i = self.ff_i(img_norm, style_block.img.FF) img_ff_i = style_block.img(img_ff_i, "ff") # fused... "ff" + "attn" img_ff_i *= img_mlp_gate img_ff_i = style_block.img(img_ff_i, "ff_gated") # img += img_ff_i img = style_block.img(img, "ff_res") # return img ######################################################################################################################################################################### class HDModel(nn.Module): CHANNELS = 2560 RECON_MODE = False def __init__( self, patch_size : Optional[int] = None, in_channels : int = 64, out_channels : Optional[int] = None, num_layers : int = 16, num_single_layers : int = 32, attention_head_dim : int = 128, num_attention_heads : int = 20, caption_channels : List[int] = None, text_emb_dim : int = 2048, num_routed_experts : int = 4, num_activated_experts : int = 2, axes_dims_rope : Tuple[int, int] = ( 32, 32), max_resolution : Tuple[int, int] = (128, 128), llama_layers : List[int] = None, image_model = None, # unused, what was this supposed to be?? dtype=None, device=None, operations=None ): self.patch_size = patch_size self.num_attention_heads = num_attention_heads self.attention_head_dim = attention_head_dim self.num_layers = num_layers self.num_single_layers = num_single_layers self.gradient_checkpointing = False super().__init__() self.dtype = dtype self.out_channels = out_channels or in_channels self.inner_dim = self.num_attention_heads * self.attention_head_dim self.llama_layers = llama_layers self.t_embedder = TimestepEmbed( self.inner_dim, dtype=dtype, device=device, operations=operations) self.p_embedder = PooledEmbed(text_emb_dim, self.inner_dim, dtype=dtype, device=device, operations=operations) self.x_embedder = PatchEmbed( patch_size = patch_size, in_channels = in_channels, out_channels = self.inner_dim, dtype=dtype, device=device, operations=operations ) self.pe_embedder = EmbedND(theta=10000, axes_dim=axes_dims_rope) self.double_stream_blocks = nn.ModuleList( [ HDBlock( dim = self.inner_dim, heads = self.num_attention_heads, head_dim = self.attention_head_dim, num_routed_experts = num_routed_experts, num_activated_experts = num_activated_experts, block_type = BlockType.Double, dtype=dtype, device=device, operations=operations ) for i in range(self.num_layers) ] ) self.single_stream_blocks = nn.ModuleList( [ HDBlock( dim = self.inner_dim, heads = self.num_attention_heads, head_dim = self.attention_head_dim, num_routed_experts = num_routed_experts, num_activated_experts = num_activated_experts, block_type = BlockType.Single, dtype=dtype, device=device, operations=operations ) for i in range(self.num_single_layers) ] ) self.final_layer = HDLastLayer(self.inner_dim, patch_size, self.out_channels, dtype=dtype, device=device, operations=operations) caption_channels = [caption_channels[1], ] * (num_layers + num_single_layers) + [caption_channels[0], ] caption_projection = [] for caption_channel in caption_channels: caption_projection.append(TextProjection(in_features=caption_channel, hidden_size=self.inner_dim, dtype=dtype, device=device, operations=operations)) self.caption_projection = nn.ModuleList(caption_projection) self.max_seq = max_resolution[0] * max_resolution[1] // (patch_size * patch_size) def prepare_contexts(self, llama3, context, bsz, img_num_fea): contexts = llama3.movedim(1, 0) contexts = [contexts[k] for k in self.llama_layers] # len == 48..... of tensors that are 1,143,4096 if self.caption_projection is not None: contexts_list = [] for i, cxt in enumerate(contexts): cxt = self.caption_projection[i](cxt) # linear in_features=4096, out_features=2560 len(self.caption_projection) == 49 cxt = cxt.view(bsz, -1, img_num_fea) contexts_list.append(cxt) contexts = contexts_list context = self.caption_projection[-1](context) context = context.view(bsz, -1, img_num_fea) contexts.append(context) # len == 49...... of tensors that are 1,143,2560. last chunk is T5 return contexts ### FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ... FORWARD ### def forward( self, x : Tensor, t : Tensor, y : Optional[Tensor] = None, context : Optional[Tensor] = None, encoder_hidden_states_llama3 = None, # 1,32,143,4096 image_cond = None, # HiDream E1 control = None, transformer_options = {}, mask : Optional[Tensor] = None, ) -> Tensor: x_orig = x.clone() b, c, h, w = x.shape if image_cond is not None: # HiDream E1 x = torch.cat([x, image_cond], dim=-1) h_len = ((h + (self.patch_size // 2)) // self.patch_size) # h_len 96 w_len = ((w + (self.patch_size // 2)) // self.patch_size) # w_len 96 img_len = h_len * w_len txt_slice = slice(img_len, None) img_slice = slice(None, img_len) SIGMA = t[0].clone() / 1000 EO = transformer_options.get("ExtraOptions", ExtraOptions("")) if EO is not None: EO.mute = True if EO("zero_heads"): HEADS = 0 else: HEADS = 20 StyleMMDiT = transformer_options.get('StyleMMDiT', StyleMMDiT_Model()) StyleMMDiT.set_len(h_len, w_len, img_slice, txt_slice, HEADS=HEADS) StyleMMDiT.Retrojector = self.Retrojector if hasattr(self, "Retrojector") else None transformer_options['StyleMMDiT'] = None x_tmp = transformer_options.get("x_tmp") if x_tmp is not None: x_tmp = x_tmp.expand(x.shape[0], -1, -1, -1).clone() img = comfy.ldm.common_dit.pad_to_patch_size(x_tmp, (self.patch_size, self.patch_size)) else: img = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size)) y0_style, img_y0_style = None, None img_orig, t_orig, y_orig, context_orig, llama3_orig = clone_inputs(img, t, y, context, encoder_hidden_states_llama3) weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0) floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0) update_cross_attn = transformer_options.get("update_cross_attn") z_ = transformer_options.get("z_") # initial noise and/or image+noise from start of rk_sampler_beta() rk_row = transformer_options.get("row") # for "smart noise" if z_ is not None: x_init = z_[rk_row].to(x) elif 'x_init' in transformer_options: x_init = transformer_options.get('x_init').to(x) # recon loop to extract exact noise pred for scattersort guide assembly HDModel.RECON_MODE = StyleMMDiT.noise_mode == "recon" recon_iterations = 2 if StyleMMDiT.noise_mode == "recon" else 1 for recon_iter in range(recon_iterations): y0_style = StyleMMDiT.guides y0_style_active = True if type(y0_style) == torch.Tensor else False HDModel.RECON_MODE = True if StyleMMDiT.noise_mode == "recon" and recon_iter == 0 else False if StyleMMDiT.noise_mode == "recon" and recon_iter == 1: x_recon = x_tmp if x_tmp is not None else x_orig noise_prediction = x_recon + (1-SIGMA.to(x_recon)) * eps.to(x_recon) denoised = x_recon - SIGMA.to(x_recon) * eps.to(x_recon) denoised = StyleMMDiT.apply_recon_lure(denoised, y0_style) new_x = (1-SIGMA.to(denoised)) * denoised + SIGMA.to(denoised) * noise_prediction img_orig = img = comfy.ldm.common_dit.pad_to_patch_size(new_x, (self.patch_size, self.patch_size)) x_init = noise_prediction elif StyleMMDiT.noise_mode == "bonanza": x_init = torch.randn_like(x_init) if y0_style_active: SIGMA_ADAIN = (SIGMA * EO("eps_adain_sigma_factor", 1.0)).to(y0_style) y0_style_noised = (1-SIGMA_ADAIN) * y0_style + SIGMA_ADAIN * x_init[0:1].to(y0_style) #always only use first batch of noise to avoid broadcasting img_y0_style_orig = comfy.ldm.common_dit.pad_to_patch_size(y0_style_noised, (self.patch_size, self.patch_size)) mask_zero = None out_list = [] for cond_iter in range(len(transformer_options['cond_or_uncond'])): UNCOND = transformer_options['cond_or_uncond'][cond_iter] == 1 if update_cross_attn is not None: update_cross_attn['UNCOND'] = UNCOND bsz_style = y0_style.shape[0] if y0_style_active else 0 bsz = 1 if HDModel.RECON_MODE else bsz_style + 1 img, t, y, context, llama3 = clone_inputs(img_orig, t_orig, y_orig, context_orig, llama3_orig, index=cond_iter) mask = None if not UNCOND and 'AttnMask' in transformer_options: # and weight != 0: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') if mask_zero is None: mask_zero = torch.ones_like(mask) #img_len = transformer_options['AttnMask'].img_len mask_zero[img_len:, img_len:] = mask[img_len:, img_len:] if weight == 0: context = transformer_options['RegContext'].context.to(context.dtype).to(context.device) context = context.view(128, -1, context.shape[-1]).sum(dim=-2) # 128 !!! llama3 = transformer_options['RegContext'].llama3 .to(llama3 .dtype).to(llama3 .device) mask = None else: context = transformer_options['RegContext'].context.to(context.dtype).to(context.device) llama3 = transformer_options['RegContext'].llama3 .to(llama3 .dtype).to(llama3 .device) if UNCOND and 'AttnMask_neg' in transformer_options: # and weight != 0: AttnMask = transformer_options['AttnMask_neg'] mask = transformer_options['AttnMask_neg'].attn_mask.mask.to('cuda') if mask_zero is None: mask_zero = torch.ones_like(mask) img_len = transformer_options['AttnMask_neg'].img_len mask_zero[img_len:, img_len:] = mask[img_len:, img_len:] if weight == 0: context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device) context = context.view(128, -1, context.shape[-1]).sum(dim=-2) # 128 !!! llama3 = transformer_options['RegContext_neg'].llama3 .to(llama3 .dtype).to(llama3 .device) mask = None else: context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device) llama3 = transformer_options['RegContext_neg'].llama3 .to(llama3 .dtype).to(llama3 .device) elif UNCOND and 'AttnMask' in transformer_options: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') if mask_zero is None: mask_zero = torch.ones_like(mask) #img_len = transformer_options['AttnMask'].img_len mask_zero[img_len:, img_len:] = mask[img_len:, img_len:] if weight == 0: # ADDED 5/23/2025 context = transformer_options['RegContext'].context.to(context.dtype).to(context.device) # ADDED 5/26/2025 14:53 context = context.view(128, -1, context.shape[-1]).sum(dim=-2) # 128 !!! llama3 = transformer_options['RegContext'].llama3 .to(llama3 .dtype).to(llama3 .device) mask = None else: A = context B = transformer_options['RegContext'].context context = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :] A = llama3 B = transformer_options['RegContext'].llama3 llama3 = A.repeat(1, 1, (B.shape[2] // A.shape[2]) + 1, 1)[:,:, :B.shape[2], :] if y0_style_active and not HDModel.RECON_MODE: if mask is None: context, y, llama3 = StyleMMDiT.apply_style_conditioning( UNCOND = UNCOND, base_context = context, base_y = y, base_llama3 = llama3, ) else: context = context.repeat(bsz_style + 1, 1, 1) y = y.repeat(bsz_style + 1, 1) if y is not None else None llama3 = llama3.repeat(bsz_style + 1, 1, 1, 1) if llama3 is not None else None img_y0_style = img_y0_style_orig.clone() if mask is not None and not type(mask[0][0].item()) == bool: mask = mask.to(x.dtype) if mask_zero is not None and not type(mask_zero[0][0].item()) == bool: mask_zero = mask_zero.to(x.dtype) # prep embeds t = self.expand_timesteps(t, bsz, x.device) t = self.t_embedder (t, x.dtype) clip = t + self.p_embedder(y) x_embedder_dtype = self.x_embedder.proj.weight.data.dtype if x_embedder_dtype not in {torch.bfloat16, torch.float16, torch.float32, torch.float64}: x_embedder_dtype = x.dtype img_sizes = None img, img_masks, img_sizes = self.patchify(img, self.max_seq, img_sizes) # for 1024x1024: output is 1,4096,64 None [[64,64]] hidden_states rearranged not shrunk, patch_size 1x1??? if img_masks is None: pH, pW = img_sizes[0] img_ids = torch.zeros(pH, pW, 3, device=img.device) img_ids[..., 1] = img_ids[..., 1] + torch.arange(pH, device=img.device)[:, None] img_ids[..., 2] = img_ids[..., 2] + torch.arange(pW, device=img.device)[None, :] img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bsz) img = self.x_embedder(img.to(x_embedder_dtype)) #img_len = img.shape[-2] if y0_style_active and not HDModel.RECON_MODE: img_y0_style, _, _ = self.patchify(img_y0_style_orig.clone(), self.max_seq, None) # for 1024x1024: output is 1,4096,64 None [[64,64]] hidden_states rearranged not shrunk, patch_size 1x1??? img_y0_style = self.x_embedder(img_y0_style.to(x_embedder_dtype)) # hidden_states 1,4032,2560 for 1024x1024: -> 1,4096,2560 ,64 -> ,2560 (x40) img = torch.cat([img, img_y0_style], dim=0) contexts = self.prepare_contexts(llama3, context, bsz, img.shape[-1]) # txt_ids -> 1,414,3 txt_ids = torch.zeros(bsz, contexts[-1].shape[1] + contexts[-2].shape[1] + contexts[0].shape[1], 3, device=img_ids.device, dtype=img_ids.dtype) ids = torch.cat((img_ids, txt_ids), dim=-2) # ids -> 1,4446,3 rope = self.pe_embedder(ids) # rope -> 1, 4446, 1, 64, 2, 2 txt_init = torch.cat([contexts[-1], contexts[-2]], dim=-2) # shape[1] == 128, 143 then on another step/call it's 128, 128...??? cuz the contexts is now 1,128,2560 txt_init_len = txt_init.shape[-2] # 271 if mask is not None: txt_init_list = [] offset_t5_start = 0 for i in range(transformer_options['AttnMask'].num_regions): offset_t5_end = offset_t5_start + transformer_options['AttnMask'].context_lens_list[i][0] txt_init_list.append(contexts[-1][:,offset_t5_start:offset_t5_end,:]) offset_t5_start = offset_t5_end offset_llama_start = 0 for i in range(transformer_options['AttnMask'].num_regions): offset_llama_end = offset_llama_start + transformer_options['AttnMask'].context_lens_list[i][1] txt_init_list.append(contexts[-2][:,offset_llama_start:offset_llama_end,:]) offset_llama_start = offset_llama_end txt_init = torch.cat(txt_init_list, dim=-2) #T5,LLAMA3 (last block) txt_init_len = txt_init.shape[-2] img = StyleMMDiT(img, "proj_in") img = img.to(x) if img is not None else None # DOUBLE STREAM for bid, (block, style_block) in enumerate(zip(self.double_stream_blocks, StyleMMDiT.double_blocks)): txt_llama = contexts[bid] txt = torch.cat([txt_init, txt_llama], dim=-2) # 1,384,2560 # cur_contexts = T5, LLAMA3 (last block), LLAMA3 (current block) if weight > 0 and mask is not None and weight < bid/48: img, txt_init = block(img, img_masks, txt, clip, rope, mask_zero, style_block=style_block) elif (weight < 0 and mask is not None and abs(weight) < (1 - bid/48)): img_tmpZ, txt_tmpZ = img.clone(), txt.clone() # more efficient than the commented lines below being used instead in the loop? img_tmpZ, txt_init = block(img_tmpZ, img_masks, txt_tmpZ, clip, rope, mask, style_block=style_block) img , txt_tmpZ = block(img , img_masks, txt , clip, rope, mask_zero, style_block=style_block) elif floor > 0 and mask is not None and floor > bid/48: mask_tmp = mask.clone() mask_tmp[:img_len,:img_len] = 1.0 img, txt_init = block(img, img_masks, txt, clip, rope, mask_tmp, style_block=style_block) elif floor < 0 and mask is not None and abs(floor) > (1 - bid/48): mask_tmp = mask.clone() mask_tmp[:img_len,:img_len] = 1.0 img, txt_init = block(img, img_masks, txt, clip, rope, mask_tmp, style_block=style_block) elif update_cross_attn is not None and update_cross_attn['skip_cross_attn']: img, txt_init = block(img, img_masks, txt, clip, rope, mask, update_cross_attn=update_cross_attn) else: img, txt_init = block(img, img_masks, txt, clip, rope, mask, update_cross_attn=update_cross_attn, style_block=style_block) txt_init = txt_init[..., :txt_init_len, :] # END DOUBLE STREAM img = torch.cat([img, txt_init], dim=-2) # 4032 + 271 -> 4303 # txt embed from double stream block joint_len = img.shape[-2] if img_masks is not None: img_masks_ones = torch.ones( (bsz, txt_init.shape[-2] + txt_llama.shape[-2]), device=img_masks.device, dtype=img_masks.dtype) # encoder_attention_mask_ones= padding for txt embed concatted onto end of img img_masks = torch.cat([img_masks, img_masks_ones], dim=-2) # SINGLE STREAM for bid, (block, style_block) in enumerate(zip(self.single_stream_blocks, StyleMMDiT.single_blocks)): txt_llama = contexts[bid+16] # T5 pre-embedded for single stream blocks img = torch.cat([img, txt_llama], dim=-2) # cat img,txt opposite of flux which is txt,img 4303 + 143 -> 4446 if weight > 0 and mask is not None and weight < (bid+16)/48: img = block(img, img_masks, None, clip, rope, mask_zero, style_block=style_block) elif weight < 0 and mask is not None and abs(weight) < (1 - (bid+16)/48): img = block(img, img_masks, None, clip, rope, mask_zero, style_block=style_block) elif floor > 0 and mask is not None and floor > (bid+16)/48: mask_tmp = mask.clone() mask_tmp[:img_len,:img_len] = 1.0 img = block(img, img_masks, None, clip, rope, mask_tmp, style_block=style_block) elif floor < 0 and mask is not None and abs(floor) > (1 - (bid+16)/48): mask_tmp = mask.clone() mask_tmp[:img_len,:img_len] = 1.0 img = block(img, img_masks, None, clip, rope, mask_tmp, style_block=style_block) else: img = block(img, img_masks, None, clip, rope, mask, style_block=style_block) img = img[..., :joint_len, :] # slice off txt_llama # END SINGLE STREAM img = img[..., :img_len, :] #img = self.final_layer(img, clip) # 4096,2560 -> 4096,64 shift, scale = self.final_layer.adaLN_modulation(clip).chunk(2,dim=1) img = (1 + scale[:, None, :]) * self.final_layer.norm_final(img) + shift[:, None, :] if not EO("endojector"): img = StyleMMDiT(img, "proj_out") if y0_style_active and not HDModel.RECON_MODE: img = img[0:1] if EO("endojector"): if EO("dumb"): eps_style = x_init[0:1].to(y0_style) - y0_style else: eps_style = (x_tmp[0:1].to(y0_style) - y0_style) / SIGMA.to(y0_style) eps_embed = self.Endojector.embed(eps_style) img = StyleMMDiT.scattersort_(img.to(eps_embed), eps_embed) img = self.final_layer.linear(img.to(self.final_layer.linear.weight.data)) img = self.unpatchify(img, img_sizes) out_list.append(img) output = torch.cat(out_list, dim=0) eps = -output[:, :, :h, :w] if recon_iter == 1: denoised = new_x - SIGMA.to(new_x) * eps.to(new_x) if x_tmp is not None: eps = (x_tmp - denoised.to(x_tmp)) / SIGMA.to(x_tmp) else: eps = (x_orig - denoised.to(x_orig)) / SIGMA.to(x_orig) freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method") freqsep_sigma = transformer_options.get("freqsep_sigma") freqsep_kernel_size = transformer_options.get("freqsep_kernel_size") freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size") freqsep_stride = transformer_options.get("freqsep_stride") freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight") freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight") freqsep_mask = transformer_options.get("freqsep_mask") y0_style_pos = transformer_options.get("y0_style_pos") y0_style_neg = transformer_options.get("y0_style_neg") # end recon loop self.style_dtype = torch.float32 if self.style_dtype is None else self.style_dtype dtype = eps.dtype if self.style_dtype is None else self.style_dtype if y0_style_pos is not None: y0_style_pos_weight = transformer_options.get("y0_style_pos_weight") y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight") y0_style_pos_synweight *= y0_style_pos_weight y0_style_pos_mask = transformer_options.get("y0_style_pos_mask") y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge") y0_style_pos = y0_style_pos.to(dtype) x = x_orig.to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) y0_adain_embed = self.Retrojector.embed(y0_style_pos) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if EO("scattersort_median_LP"): denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7)) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7)) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad) denoised_spatial = denoised_spatial_LP + denoised_spatial_HP denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": if freqsep_mask is not None: freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float() freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact') if hasattr(self, "adain_tile"): tile_h, tile_w = self.adain_tile denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if self.adain_flag: h_off = tile_h // 2 w_off = tile_w // 2 denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] self.adain_flag = False else: h_off = 0 w_off = 0 self.adain_flag = True tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w)) y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w)) tiles_out = [] for i in range(tiles.shape[0]): tile = tiles[i].unsqueeze(0) y0_tile = y0_tiles[i].unsqueeze(0) tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) tile = adain_seq_inplace(tile, y0_tile) tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w)) tiles_out_tensor = torch.cat(tiles_out, dim=0) tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides) if h_off == 0: denoised_pretile = tiles_out_tensor else: denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"): denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median_pw": denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight) elif freqsep_lowpass_method == "gaussian_pw": denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median": denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size) elif freqsep_lowpass_method == "gaussian": denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP if EO("adain_fs_uhp"): y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP #denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) else: denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) if transformer_options.get('y0_standard_guide') is not None: y0_standard_guide = transformer_options.get('y0_standard_guide') y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide) f_cs = self.StyleWCT.get(y0_standard_guide_embed) self.y0_standard_guide = self.Retrojector.unembed(f_cs) if transformer_options.get('y0_inv_standard_guide') is not None: y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide') y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide) f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed) self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs) elif transformer_options['y0_style_method'] == "WCT2": self.WaveletStyleWCT.set(y0_adain_embed, h_len, w_len) denoised_embed = self.WaveletStyleWCT.get(denoised_embed, h_len, w_len) if transformer_options.get('y0_standard_guide') is not None: y0_standard_guide = transformer_options.get('y0_standard_guide') y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide) f_cs = self.WaveletStyleWCT.get(y0_standard_guide_embed, h_len, w_len) self.y0_standard_guide = self.Retrojector.unembed(f_cs) if transformer_options.get('y0_inv_standard_guide') is not None: y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide') y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide) f_cs = self.WaveletStyleWCT.get(y0_inv_standard_guide_embed, h_len, w_len) self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs) denoised_approx = self.Retrojector.unembed(denoised_embed) eps = (x - denoised_approx) / sigma if not UNCOND: if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) else: eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) elif eps.shape[0] == 1 and UNCOND: eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) #eps = eps.float() if y0_style_neg is not None: y0_style_neg_weight = transformer_options.get("y0_style_neg_weight") y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight") y0_style_neg_synweight *= y0_style_neg_weight y0_style_neg_mask = transformer_options.get("y0_style_neg_mask") y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge") y0_style_neg = y0_style_neg.to(dtype) x = x_orig.to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) y0_adain_embed = self.Retrojector.embed(y0_style_neg) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) elif transformer_options['y0_style_method'] == "WCT2": self.WaveletStyleWCT.set(y0_adain_embed, h_len, w_len) denoised_embed = self.WaveletStyleWCT.get(denoised_embed, h_len, w_len) denoised_approx = self.Retrojector.unembed(denoised_embed) if UNCOND: eps = (x - denoised_approx) / sigma eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) elif eps.shape[0] == 1 and not UNCOND: eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0]) #eps = eps.float() if EO("model_eps_out"): self.eps_out = eps.clone() return eps def expand_timesteps(self, t, batch_size, device): if not torch.is_tensor(t): is_mps = device.type == "mps" if isinstance(t, float): dtype = torch.float32 if is_mps else torch.float64 else: dtype = torch.int32 if is_mps else torch.int64 t = Tensor([t], dtype=dtype, device=device) elif len(t.shape) == 0: t = t[None].to(device) # broadcast to batch dimension in a way that's compatible with ONNX/Core ML t = t.expand(batch_size) return t def unpatchify(self, x: Tensor, img_sizes: List[Tuple[int, int]]) -> List[Tensor]: x_arr = [] for i, img_size in enumerate(img_sizes): # [[64,64]] pH, pW = img_size x_arr.append( einops.rearrange(x[i, :pH*pW].reshape(1, pH, pW, -1), 'B H W (p1 p2 C) -> B C (H p1) (W p2)', p1=self.patch_size, p2=self.patch_size) ) x = torch.cat(x_arr, dim=0) return x def patchify(self, x, max_seq, img_sizes=None): pz2 = self.patch_size * self.patch_size if isinstance(x, Tensor): B = x.shape[0] device = x.device dtype = x.dtype else: B = len(x) device = x[0].device dtype = x[0].dtype x_masks = torch.zeros((B, max_seq), dtype=dtype, device=device) if img_sizes is not None: for i, img_size in enumerate(img_sizes): # [[64,64]] x_masks[i, 0:img_size[0] * img_size[1]] = 1 x = einops.rearrange(x, 'B C S p -> B S (p C)', p=pz2) elif isinstance(x, Tensor): pH, pW = x.shape[-2] // self.patch_size, x.shape[-1] // self.patch_size x = einops.rearrange(x, 'B C (H p1) (W p2) -> B (H W) (p1 p2 C)', p1=self.patch_size, p2=self.patch_size) img_sizes = [[pH, pW]] * B x_masks = None else: raise NotImplementedError return x, x_masks, img_sizes def clone_inputs(*args, index: int=None): if index is None: return tuple(x.clone() for x in args) else: return tuple(x[index].unsqueeze(0).clone() for x in args) def attention_rescale( query, key, value, attn_mask=None ) -> torch.Tensor: L, S = query.size(-2), key.size(-2) scale_factor = 1 / math.sqrt(query.size(-1)) attn_weight = query @ key.transpose(-2, -1) * scale_factor if attn_mask is not None: attn_weight *= attn_mask attn_weight = torch.softmax(attn_weight, dim=-1) return attn_weight @ value class HDLastLayer(nn.Module): def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None): super().__init__() self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device) self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device)) def forward(self, x: Tensor, vec: Tensor, modulation_dims=None) -> Tensor: x_dtype = x.dtype dtype = self.linear.weight.dtype if dtype not in {torch.bfloat16, torch.float16, torch.float32, torch.float64}: dtype = torch.float32 self.linear.weight.data = self.linear.weight.data.to(dtype) self.linear.bias.data = self.linear.bias.data.to(dtype) self.adaLN_modulation[1].weight.data = self.adaLN_modulation[1].weight.data.to(dtype) self.adaLN_modulation[1].bias.data = self.adaLN_modulation[1].bias.data.to(dtype) x = x.to(dtype) vec = vec.to(dtype) if vec.ndim == 2: vec = vec[:, None, :] shift, scale = self.adaLN_modulation(vec).chunk(2, dim=-1) x = apply_mod(self.norm_final(x), (1 + scale), shift, modulation_dims) x = self.linear(x) return x #.to(x_dtype) def apply_mod(tensor, m_mult, m_add=None, modulation_dims=None): if modulation_dims is None: if m_add is not None: return tensor * m_mult + m_add else: return tensor * m_mult else: for d in modulation_dims: tensor[:, d[0]:d[1]] *= m_mult[:, d[2]] if m_add is not None: tensor[:, d[0]:d[1]] += m_add[:, d[2]] return tensor ================================================ FILE: images.py ================================================ import torch import torch.nn.functional as F import math from torchvision import transforms from torch import Tensor from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar, List import numpy as np import kornia import cv2 from PIL import Image, ImageFilter, ImageEnhance import comfy # tensor -> PIL def tensor2pil(image): return Image.fromarray(np.clip(255. * image.cpu().numpy().squeeze(), 0, 255).astype(np.uint8)) # PIL -> tensor def pil2tensor(image): return torch.from_numpy(np.array(image).astype(np.float32) / 255.0).unsqueeze(0) def freq_sep_fft(img, cutoff=5, sigma=10): fft_img = torch.fft.fft2(img, dim=(-2, -1)) fft_shifted = torch.fft.fftshift(fft_img) _, _, h, w = img.shape # freq domain -> meshgrid y, x = torch.meshgrid(torch.arange(h, device=img.device), torch.arange(w, device=img.device)) center_y, center_x = h // 2, w // 2 distance = torch.sqrt((x - center_x) ** 2 + (y - center_y) ** 2) # smoother low-pass filter via gaussian filter low_pass_filter = torch.exp(-distance**2 / (2 * sigma**2)) low_pass_filter = low_pass_filter.unsqueeze(0).unsqueeze(0) low_pass_fft = fft_shifted * low_pass_filter high_pass_fft = fft_shifted * (1 - low_pass_filter) # inverse FFT -> return to spatial domain low_pass_img = torch.fft.ifft2(torch.fft.ifftshift( low_pass_fft), dim=(-2, -1)).real high_pass_img = torch.fft.ifft2(torch.fft.ifftshift(high_pass_fft), dim=(-2, -1)).real return low_pass_img, high_pass_img def color_dodge_blend(base, blend): return torch.clamp(base / (1 - blend + 1e-8), 0, 1) def color_scorch_blend(base, blend): return torch.clamp(1 - (1 - base) / (1 - blend + 1e-8), 0, 1) def divide_blend(base, blend): return torch.clamp(base / (blend + 1e-8), 0, 1) def color_burn_blend(base, blend): return torch.clamp(1 - (1 - base) / (blend + 1e-8), 0, 1) def hard_light_blend(base, blend): return torch.where(blend <= 0.5, 2 * base * blend, 1 - 2 * (1 - base) * (1 - blend)) def hard_light_freq_sep(original, low_pass): high_pass = (color_burn_blend(original, (1 - low_pass)) + divide_blend(original, low_pass)) / 2 return high_pass def linear_light_blend(base, blend): return torch.where(blend <= 0.5, base + 2 * blend - 1, base + 2 * (blend - 0.5)) def linear_light_freq_sep(base, blend): return (base + (1-blend)) / 2 def scale_to_range(value, min_old, max_old, min_new, max_new): return (value - min_old) / (max_old - min_old) * (max_new - min_new) + min_new def normalize_lab(lab_image): L, A, B = lab_image[:, 0:1, :, :], lab_image[:, 1:2, :, :], lab_image[:, 2:3, :, :] L_normalized = L / 100.0 A_normalized = scale_to_range(A, -128, 127, 0, 1) B_normalized = scale_to_range(B, -128, 127, 0, 1) lab_normalized = torch.cat([L_normalized, A_normalized, B_normalized], dim=1) return lab_normalized def denormalize_lab(lab_normalized): L_normalized, A_normalized, B_normalized = torch.split(lab_normalized, 1, dim=1) L = L_normalized * 100.0 A = scale_to_range(A_normalized, 0, 1, -128, 127) B = scale_to_range(B_normalized, 0, 1, -128, 127) lab_image = torch.cat([L, A, B], dim=1) return lab_image def rgb_to_lab(image): return kornia.color.rgb_to_lab(image) def lab_to_rgb(image): return kornia.color.lab_to_rgb(image) # cv2_layer() and ImageMedianBlur adapted from: https://github.com/Nourepide/ComfyUI-Allor/ def cv2_layer(tensor, function): """ This function applies a given function to each channel of an input tensor and returns the result as a PyTorch tensor. :param tensor: A PyTorch tensor of shape (H, W, C) or (N, H, W, C), where C is the number of channels, H is the height, and W is the width of the image. :param function: A function that takes a numpy array of shape (H, W, C) as input and returns a numpy array of the same shape. :return: A PyTorch tensor of the same shape as the input tensor, where the given function has been applied to each channel of each image in the tensor. """ shape_size = tensor.shape.__len__() def produce(image): channels = image[0, 0, :].shape[0] rgb = image[:, :, 0:3].numpy() result_rgb = function(rgb) if channels <= 3: return torch.from_numpy(result_rgb) elif channels == 4: alpha = image[:, :, 3:4].numpy() result_alpha = function(alpha)[..., np.newaxis] result_rgba = np.concatenate((result_rgb, result_alpha), axis=2) return torch.from_numpy(result_rgba) if shape_size == 3: return torch.from_numpy(produce(tensor)) elif shape_size == 4: return torch.stack([ produce(tensor[i]) for i in range(len(tensor)) ]) else: raise ValueError("Incompatible tensor dimension.") # adapted from https://github.com/cubiq/ComfyUI_essentials def image_resize(image, width, height, method = "stretch", interpolation = "nearest", condition = "always", multiple_of = 0, keep_proportion = False): _, oh, ow, _ = image.shape x = y = x2 = y2 = 0 pad_left = pad_right = pad_top = pad_bottom = 0 if keep_proportion: method = "keep proportion" if multiple_of > 1: width = width - (width % multiple_of) height = height - (height % multiple_of) if method == 'keep proportion' or method == 'pad': if width == 0 and oh < height: width = MAX_RESOLUTION elif width == 0 and oh >= height: width = ow if height == 0 and ow < width: height = MAX_RESOLUTION elif height == 0 and ow >= width: height = oh ratio = min(width / ow, height / oh) new_width = round(ow*ratio) new_height = round(oh*ratio) if method == 'pad': pad_left = (width - new_width) // 2 pad_right = width - new_width - pad_left pad_top = (height - new_height) // 2 pad_bottom = height - new_height - pad_top width = new_width height = new_height elif method.startswith('fill'): width = width if width > 0 else ow height = height if height > 0 else oh ratio = max(width / ow, height / oh) new_width = round(ow*ratio) new_height = round(oh*ratio) x = (new_width - width) // 2 y = (new_height - height) // 2 x2 = x + width y2 = y + height if x2 > new_width: x -= (x2 - new_width) if x < 0: x = 0 if y2 > new_height: y -= (y2 - new_height) if y < 0: y = 0 width = new_width height = new_height else: width = width if width > 0 else ow height = height if height > 0 else oh if "always" in condition \ or ("downscale if bigger" == condition and (oh > height or ow > width)) or ("upscale if smaller" == condition and (oh < height or ow < width)) \ or ("bigger area" in condition and (oh * ow > height * width)) or ("smaller area" in condition and (oh * ow < height * width)): outputs = image.permute(0,3,1,2) if interpolation == "lanczos": outputs = comfy.utils.lanczos(outputs, width, height) else: outputs = F.interpolate(outputs, size=(height, width), mode=interpolation) if method == 'pad': if pad_left > 0 or pad_right > 0 or pad_top > 0 or pad_bottom > 0: outputs = F.pad(outputs, (pad_left, pad_right, pad_top, pad_bottom), value=0) outputs = outputs.permute(0,2,3,1) if method.startswith('fill'): if x > 0 or y > 0 or x2 > 0 or y2 > 0: outputs = outputs[:, y:y2, x:x2, :] else: outputs = image if multiple_of > 1 and (outputs.shape[2] % multiple_of != 0 or outputs.shape[1] % multiple_of != 0): width = outputs.shape[2] height = outputs.shape[1] x = (width % multiple_of) // 2 y = (height % multiple_of) // 2 x2 = width - ((width % multiple_of) - x) y2 = height - ((height % multiple_of) - y) outputs = outputs[:, y:y2, x:x2, :] outputs = torch.clamp(outputs, 0, 1) return outputs class ImageRepeatTileToSize: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "image": ("IMAGE",), "width": ("INT", {"default": 1024, "min": 1, "max": 1048576, "step": 1,}), "height": ("INT", {"default": 1024, "min": 1, "max": 1048576, "step": 1,}), "crop": ("BOOLEAN", {"default": True}), }, } RETURN_TYPES = ("IMAGE",) RETURN_NAMES = ("image",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, image, width, height, crop, method = "stretch", interpolation = "lanczos", condition = "always", multiple_of = 0, keep_proportion = False, ): img = image.clone().detach() b, h, w, c = img.shape h_tgt = int(torch.ceil(torch.div(height, h))) w_tgt = int(torch.ceil(torch.div(width, w))) img_tiled = torch.tile(img, (h_tgt, w_tgt, 1)) if crop: img_tiled = img_tiled[:,:height, :width, :] else: img_tiled = image_resize(img_tiled, width, height, method, interpolation, condition, multiple_of, keep_proportion) return (img_tiled,) # Rewrite of the WAS Film Grain node, much improved speed and efficiency (https://github.com/WASasquatch/was-node-suite-comfyui) class Film_Grain: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "image": ("IMAGE",), "density": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 1.0, "step": 0.01}), "intensity": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 1.0, "step": 0.01}), "highlights": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 255.0, "step": 0.01}), "supersample_factor": ("INT", {"default": 4, "min": 1, "max": 8, "step": 1}), "repeats": ("INT", {"default": 1, "min": 1, "max": 1000, "step": 1}) } } RETURN_TYPES = ("IMAGE",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, image, density, intensity, highlights, supersample_factor, repeats=1): image = image.repeat(repeats, 1, 1, 1) return (self.apply_film_grain(image, density, intensity, highlights, supersample_factor), ) def apply_film_grain(self, img, density=0.1, intensity=1.0, highlights=1.0, supersample_factor=4): img_batch = img.clone() img_list = [] for i in range(img_batch.shape[0]): img = img_batch[i].unsqueeze(0) img = tensor2pil(img) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # apply grayscale noise with specified density/intensity/highlights to PIL image img_gray = img.convert('L') original_size = img.size img_gray = img_gray.resize( ((img.size[0] * supersample_factor), (img.size[1] * supersample_factor)), Image.Resampling(2)) num_pixels = int(density * img_gray.size[0] * img_gray.size[1]) img_gray_tensor = torch.from_numpy(np.array(img_gray).astype(np.float32) / 255.0).to(device) img_gray_flat = img_gray_tensor.view(-1) num_pixels = int(density * img_gray_flat.numel()) indices = torch.randint(0, img_gray_flat.numel(), (num_pixels,), device=img_gray_flat.device) values = torch.randint(0, 256, (num_pixels,), device=img_gray_flat.device, dtype=torch.float32) / 255.0 img_gray_flat[indices] = values img_gray = img_gray_flat.view(img_gray_tensor.shape) img_gray_np = (img_gray.cpu().numpy() * 255).astype(np.uint8) img_gray = Image.fromarray(img_gray_np) img_noise = img_gray.convert('RGB') img_noise = img_noise.filter(ImageFilter.GaussianBlur(radius=0.125)) img_noise = img_noise.resize(original_size, Image.Resampling(1)) img_noise = img_noise.filter(ImageFilter.EDGE_ENHANCE_MORE) img_final = Image.blend(img, img_noise, intensity) enhancer = ImageEnhance.Brightness(img_final) img_highlights = enhancer.enhance(highlights) img_list.append(pil2tensor(img_highlights).squeeze(dim=0)) img_highlights = torch.stack(img_list, dim=0) return img_highlights class Image_Grain_Add: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "image": ("IMAGE",), "weight": ("FLOAT", {"default": 0.5, "min": -10000.0, "max": 10000.0, "step": 0.01}), #"density": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 1.0, "step": 0.01}), #"intensity": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 1.0, "step": 0.01}), #"highlights": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 255.0, "step": 0.01}), #"supersample_factor": ("INT", {"default": 4, "min": 1, "max": 8, "step": 1}), #"repeats": ("INT", {"default": 1, "min": 1, "max": 1000, "step": 1}) } } RETURN_TYPES = ("IMAGE",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, image, weight=0.5, density=1.0, intensity=1.0, highlights=1.0, supersample_factor=1.0, repeats=1): image = image.repeat(repeats, 1, 1, 1) image_grain = self.apply_film_grain(image, density, intensity, highlights, supersample_factor) return (image + weight * (hard_light_blend(image_grain, image) - image), ) def apply_film_grain(self, img, density=0.1, intensity=1.0, highlights=1.0, supersample_factor=4): img_batch = img.clone() img_list = [] for i in range(img_batch.shape[0]): img = img_batch[i].unsqueeze(0) img = tensor2pil(img) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # apply grayscale noise with specified density/intensity/highlights to PIL image img_gray = img.convert('L') original_size = img.size img_gray = img_gray.resize( ((img.size[0] * supersample_factor), (img.size[1] * supersample_factor)), Image.Resampling(2)) num_pixels = int(density * img_gray.size[0] * img_gray.size[1]) img_gray_tensor = torch.from_numpy(np.array(img_gray).astype(np.float32) / 255.0).to(device) img_gray_flat = img_gray_tensor.view(-1) num_pixels = int(density * img_gray_flat.numel()) indices = torch.randint(0, img_gray_flat.numel(), (num_pixels,), device=img_gray_flat.device) values = torch.randint(0, 256, (num_pixels,), device=img_gray_flat.device, dtype=torch.float32) / 255.0 img_gray_flat[indices] = values img_gray = img_gray_flat.view(img_gray_tensor.shape) img_gray_np = (img_gray.cpu().numpy() * 255).astype(np.uint8) img_gray = Image.fromarray(img_gray_np) img_noise = img_gray.convert('RGB') img_noise = img_noise.filter(ImageFilter.GaussianBlur(radius=0.125)) img_noise = img_noise.resize(original_size, Image.Resampling(1)) img_noise = img_noise.filter(ImageFilter.EDGE_ENHANCE_MORE) img_final = Image.blend(img, img_noise, intensity) enhancer = ImageEnhance.Brightness(img_final) img_highlights = enhancer.enhance(highlights) img_list.append(pil2tensor(img_highlights).squeeze(dim=0)) img_highlights = torch.stack(img_list, dim=0) return img_highlights class Frequency_Separation_Hard_Light: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "optional": { "high_pass": ("IMAGE",), "original": ("IMAGE",), "low_pass": ("IMAGE",), }, "required": { }, } RETURN_TYPES = ("IMAGE","IMAGE","IMAGE",) RETURN_NAMES = ("high_pass", "original", "low_pass",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, high_pass=None, original=None, low_pass=None): if high_pass is None: high_pass = hard_light_freq_sep(original.to(torch.float64).to('cuda'), low_pass.to(torch.float64).to('cuda')) if original is None: original = hard_light_blend(low_pass.to(torch.float64).to('cuda'), high_pass.to(torch.float64).to('cuda')) return (high_pass, original, low_pass,) class Frequency_Separation_Hard_Light_LAB: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "optional": { "high_pass": ("IMAGE",), "original": ("IMAGE",), "low_pass": ("IMAGE",), }, "required": { }, } RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE",) RETURN_NAMES = ("high_pass", "original", "low_pass",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, high_pass=None, original=None, low_pass=None): if original is not None: lab_original = rgb_to_lab(original.to(torch.float64).permute(0, 3, 1, 2)) lab_original_normalized = normalize_lab(lab_original) if low_pass is not None: lab_low_pass = rgb_to_lab(low_pass.to(torch.float64).permute(0, 3, 1, 2)) lab_low_pass_normalized = normalize_lab(lab_low_pass) if high_pass is not None: lab_high_pass = rgb_to_lab(high_pass.to(torch.float64).permute(0, 3, 1, 2)) lab_high_pass_normalized = normalize_lab(lab_high_pass) #original_l = lab_original_normalized[:, :1, :, :] #low_pass_l = lab_low_pass_normalized[:, :1, :, :] if high_pass is None: lab_high_pass_normalized = hard_light_freq_sep(lab_original_normalized.permute(0, 2, 3, 1), lab_low_pass_normalized.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) lab_high_pass = denormalize_lab(lab_high_pass_normalized) high_pass = lab_to_rgb(lab_high_pass).permute(0, 2, 3, 1) if original is None: lab_original_normalized = hard_light_blend(lab_low_pass_normalized.permute(0, 2, 3, 1), lab_high_pass_normalized.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) lab_original = denormalize_lab(lab_original_normalized) original = lab_to_rgb(lab_original).permute(0, 2, 3, 1) return (high_pass, original, low_pass) class Frame_Select: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "frames": ("IMAGE",), "select": ("INT", {"default": 0, "min": 0, "max": 10000}), }, "optional": { }, } RETURN_TYPES = ("IMAGE",) RETURN_NAMES = ("image",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, frames=None, select=0): frame = frames[select].unsqueeze(0).clone() return (frame,) class Frames_Slice: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "frames": ("IMAGE",), "start": ("INT", {"default": 0, "min": 0, "max": 10000}), "stop": ("INT", {"default": 1, "min": 1, "max": 10000}), }, "optional": { }, } RETURN_TYPES = ("IMAGE",) RETURN_NAMES = ("image",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, frames=None, start=0, stop=1): frames_slice = frames[start:stop].clone() return (frames_slice,) class Frames_Concat: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "frames_0": ("IMAGE",), "frames_1": ("IMAGE",), }, "optional": { }, } RETURN_TYPES = ("IMAGE",) RETURN_NAMES = ("image",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, frames_0, frames_1): frames_concat = torch.cat((frames_0, frames_1), dim=0).squeeze(0).clone() return (frames_concat,) class Image_Channels_LAB: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "optional": { "RGB": ("IMAGE",), "L": ("IMAGE",), "A": ("IMAGE",), "B": ("IMAGE",), }, "required": { }, } RETURN_TYPES = ("IMAGE","IMAGE","IMAGE","IMAGE",) RETURN_NAMES = ("RGB","L","A","B",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, RGB=None, L=None, A=None, B=None): if RGB is not None: LAB = rgb_to_lab(RGB.to(torch.float64).permute(0, 3, 1, 2)) L, A, B = LAB[:, 0:1, :, :], LAB[:, 1:2, :, :], LAB[:, 2:3, :, :] else: LAB = torch.cat([L,A,B], dim=1) RGB = lab_to_rgb(LAB.to(torch.float64)).permute(0,2,3,1) return (RGB, L, A, B,) class Frequency_Separation_Vivid_Light: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "optional": { "high_pass": ("IMAGE",), "original": ("IMAGE",), "low_pass": ("IMAGE",), }, "required": { }, } RETURN_TYPES = ("IMAGE","IMAGE","IMAGE",) RETURN_NAMES = ("high_pass", "original", "low_pass",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, high_pass=None, original=None, low_pass=None): if high_pass is None: high_pass = hard_light_freq_sep(low_pass.to(torch.float64), original.to(torch.float64)) if original is None: original = hard_light_blend(high_pass.to(torch.float64), low_pass.to(torch.float64)) return (high_pass, original, low_pass,) class Frequency_Separation_Linear_Light: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "optional": { "high_pass": ("IMAGE",), "original": ("IMAGE",), "low_pass": ("IMAGE",), }, "required": { }, } RETURN_TYPES = ("IMAGE","IMAGE","IMAGE",) RETURN_NAMES = ("high_pass", "original", "low_pass",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, high_pass=None, original=None, low_pass=None): if high_pass is None: high_pass = linear_light_freq_sep(original.to(torch.float64).to('cuda'), low_pass.to(torch.float64).to('cuda')) if original is None: original = linear_light_blend(low_pass.to(torch.float64).to('cuda'), high_pass.to(torch.float64).to('cuda')) return (high_pass, original, low_pass,) class Frequency_Separation_FFT: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "optional": { "high_pass": ("IMAGE",), "original": ("IMAGE",), "low_pass": ("IMAGE",), }, "required": { "cutoff": ("FLOAT", {"default": 5.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "sigma": ("FLOAT", {"default": 5.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), }, } RETURN_TYPES = ("IMAGE","IMAGE","IMAGE",) RETURN_NAMES = ("high_pass", "original", "low_pass",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, high_pass=None, original=None, low_pass=None, cutoff=5.0, sigma=5.0): if high_pass is None: low_pass, high_pass = freq_sep_fft(original.to(torch.float64), cutoff=cutoff, sigma=sigma) if original is None: original = low_pass + high_pass return (high_pass, original, low_pass,) class ImageSharpenFS: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "images": ("IMAGE",), #"method": (["hard", "linear", "vivid"], {"default": "hard"}), "method": (["hard", "linear"], {"default": "hard"}), "type": (["median", "gaussian"], {"default": "median"}), "intensity": ("INT", {"default": 6, "min": 1, "step": 1, }), }, } RETURN_TYPES = ("IMAGE",) RETURN_NAMES = ("image",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, images, method, type, intensity): match type: case "median": IB = ImageMedianBlur() case "gaussian": IB = ImageGaussianBlur() match method: case "hard": FS = Frequency_Separation_Hard_Light() case "linear": FS = Frequency_Separation_Linear_Light() img_lp = IB.main(images, intensity) fs_hp, fs_orig, fs_lp = FS.main(None, images, *img_lp) _, img_sharpened, _ = FS.main(high_pass=fs_hp, original=None, low_pass=images) return (img_sharpened,) class ImageMedianBlur: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "images": ("IMAGE",), "size": ("INT", {"default": 6, "min": 1, "step": 1,}), }, } RETURN_TYPES = ("IMAGE",) RETURN_NAMES = ("image",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, images, size): size -= 1 img = images.clone().detach() img = (img * 255).to(torch.uint8) return ((cv2_layer(img, lambda x: cv2.medianBlur(x, size)) / 255),) class ImageGaussianBlur: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "images": ("IMAGE",), "size": ("INT", {"default": 6, "min": 1, "step": 1,}), }, } RETURN_TYPES = ("IMAGE",) RETURN_NAMES = ("image",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, images, size): size -= 1 img = images.clone().detach() img = (img * 255).to(torch.uint8) return ((cv2_layer(img, lambda x: cv2.GaussianBlur(x, (size, size), 0)) / 255),) def fast_smudge_blur_comfyui(img, kernel_size=51): img = img.to('cuda').float() # (b, h, w, c) to (b, c, h, w) img = img.permute(0, 3, 1, 2) num_channels = img.shape[1] box_kernel_1d = torch.ones(num_channels, 1, kernel_size, device=img.device, dtype=img.dtype) / kernel_size # apply box blur separately in horizontal and vertical directions blurred_img = F.conv2d( img, box_kernel_1d.unsqueeze(2), padding=kernel_size // 2, groups=num_channels) blurred_img = F.conv2d(blurred_img, box_kernel_1d.unsqueeze(3), padding=kernel_size // 2, groups=num_channels) # (b, c, h, w) to (b, h, w, c) blurred_img = blurred_img.permute(0, 2, 3, 1) return blurred_img class FastSmudgeBlur: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "images": ("IMAGE",), "kernel_size": ("INT", {"default": 51, "min": 1, "step": 1,}), }, } RETURN_TYPES = ("IMAGE",) RETURN_NAMES = ("image",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, images, kernel_size): img = images.clone().detach().to('cuda').float() # (b, h, w, c) to (b, c, h, w) img = img.permute(0, 3, 1, 2) num_channels = img.shape[1] # box blur kernel (separable convolution) box_kernel_1d = torch.ones(num_channels, 1, kernel_size, device=img.device, dtype=img.dtype) / kernel_size padding_size = kernel_size // 2 # apply box blur in horizontal/vertical dim separately blurred_img = F.conv2d( img, box_kernel_1d.unsqueeze(2), padding=(padding_size, 0), groups=num_channels ) blurred_img = F.conv2d( blurred_img, box_kernel_1d.unsqueeze(3), padding=(0, padding_size), groups=num_channels ) # (b, c, h, w) to (b, h, w, c) blurred_img = blurred_img.permute(0, 2, 3, 1) return (blurred_img,) class Image_Pair_Split: @classmethod def INPUT_TYPES(s): return { "required": { "img_pair": ("IMAGE",), } } RETURN_TYPES = ("IMAGE","IMAGE",) RETURN_NAMES = ("img_0","img_1",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, img_pair): img_0, img_1 = img_pair.chunk(2, dim=0) return (img_0, img_1,) class Image_Crop_Location_Exact: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "image": ("IMAGE",), "x": ("INT", {"default": 0, "max": 10000000, "min": 0, "step": 1}), "y": ("INT", {"default": 0, "max": 10000000, "min": 0, "step": 1}), "width": ("INT", {"default": 256, "max": 10000000, "min": 1, "step": 1}), "height": ("INT", {"default": 256, "max": 10000000, "min": 1, "step": 1}), "edge": (["original", "short", "long"],), } } RETURN_TYPES = ("IMAGE", "CROP_DATA",) RETURN_NAMES = ("image", "crop_data",) FUNCTION = "main" CATEGORY = "RES4LYF/images" def main(self, image, x=0, y=0, width=256, height=256, edge="original"): if image.dim() != 4: raise ValueError("Expected a 4D tensor (batch, channels, height, width).") if edge == "short": side = width if width < height else height width, height = side, side if edge == "long": side = width if width > height else height width, height = side, side batch_size, img_height, img_width, channels = image.size() crop_left = max(x, 0) crop_top = max(y, 0) crop_right = min(x + width, img_width) crop_bottom = min(y + height, img_height) crop_width = crop_right - crop_left crop_height = crop_bottom - crop_top if crop_width <= 0 or crop_height <= 0: raise ValueError("Invalid crop dimensions. Please check the values for x, y, width, and height.") cropped_image = image[:, crop_top:crop_bottom, crop_left:crop_right, :] crop_data = ((crop_width, crop_height), (crop_left, crop_top, crop_right, crop_bottom)) return cropped_image, crop_data class Masks_Unpack4: @classmethod def INPUT_TYPES(s): return { "required": { "masks": ("MASK",), } } RETURN_TYPES = ("MASK","MASK","MASK","MASK",) RETURN_NAMES = ("masks","masks","masks","masks",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" DESCRIPTION = "Unpack a list of masks into separate outputs." def main(self, masks,): return (*masks,) class Masks_Unpack8: @classmethod def INPUT_TYPES(s): return { "required": { "masks": ("MASK",), } } RETURN_TYPES = ("MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK",) RETURN_NAMES = ("masks","masks","masks","masks","masks","masks","masks","masks",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" DESCRIPTION = "Unpack a list of masks into separate outputs." def main(self, masks,): return (*masks,) class Masks_Unpack16: @classmethod def INPUT_TYPES(s): return { "required": { "masks": ("MASK",), } } RETURN_TYPES = ("MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK","MASK",) RETURN_NAMES = ("masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks","masks",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" DESCRIPTION = "Unpack a list of masks into separate outputs." def main(self, masks,): return (*masks,) class Image_Get_Color_Swatches: @classmethod def INPUT_TYPES(s): return { "required": { "image_color_swatches": ("IMAGE",), } } RETURN_TYPES = ("COLOR_SWATCHES",) RETURN_NAMES = ("color_swatches",) FUNCTION = "main" CATEGORY = "RES4LYF/images" DESCRIPTION = "Get color swatches, in the order they appear, from top to bottom, in an input image. For use with color masks." def main(self, image_color_swatches): rgb = (image_color_swatches * 255).round().clamp(0, 255).to(torch.uint8) color_swatches = read_swatch_colors(rgb.squeeze().numpy(), min_fraction=0.01) #color_swatches = read_swatch_colors(rgb.squeeze().numpy(), ignore=(255,255,255), min_fraction=0.01) return (color_swatches,) class Masks_From_Color_Swatches: @classmethod def INPUT_TYPES(s): return { "required": { "image_color_mask": ("IMAGE",), "color_swatches": ("COLOR_SWATCHES",), } } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("masks",) FUNCTION = "main" CATEGORY = "RES4LYF/images" DESCRIPTION = "Create masks from a multicolor image using color swatches to identify regions. Returns them as a list." def main(self, image_color_mask, color_swatches): rgb = (image_color_mask * 255).round().clamp(0, 255).to(torch.uint8) masks = build_masks_from_swatch(rgb.squeeze().numpy(), color_swatches, tol=8) masks = cleanup_and_fill_masks(masks) masks = torch.stack(masks, dim=0).unsqueeze(1) return (masks,) class Masks_From_Colors: @classmethod def INPUT_TYPES(s): return { "required": { "image_color_swatches": ("IMAGE",), "image_color_mask": ("IMAGE",), } } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("masks",) FUNCTION = "main" CATEGORY = "RES4LYF/images" DESCRIPTION = "Create masks from a multicolor image using color swatches to identify regions. Returns them as a list." def main(self, image_color_swatches, image_color_mask, ): rgb = (image_color_swatches * 255).round().clamp(0, 255).to(torch.uint8) color_swatches = read_swatch_colors(rgb.squeeze().numpy(), min_fraction=0.01) #color_swatches = read_swatch_colors(rgb.squeeze().numpy(), ignore=(255,255,255), min_fraction=0.01) rgb = (image_color_mask * 255).round().clamp(0, 255).to(torch.uint8) masks = build_masks_from_swatch(rgb.squeeze().numpy(), color_swatches, tol=8) masks = cleanup_and_fill_masks(masks) original_len = len(masks) masks = [m for m in masks if m.sum() != 0] removed = original_len - len(masks) print(f"Removed {removed} empty masks.") masks = torch.stack(masks, dim=0).unsqueeze(1) return (masks,) from PIL import Image import numpy as np def read_swatch_colors( img, ignore: Tuple[int,int,int] = (-1,-1,-1), min_fraction: float = 0.2 ) -> List[Tuple[int,int,int]]: """ 1. Load swatch, RGB. 2. Count every unique color (except `ignore`). 3. Discard any color whose count < (min_fraction * largest_count). 4. Sort the remaining by their first y-position (top→bottom). """ H, W, _ = img.shape flat = img.reshape(-1,3) # count all colors colors, counts = np.unique(flat, axis=0, return_counts=True) # build list of (color, count), skipping white cc = [ (tuple(c.tolist()), cnt) for c, cnt in zip(colors, counts) if tuple(c.tolist()) != ignore ] if not cc: return [] # find largest band size max_cnt = max(cnt for _,cnt in cc) # filter by relative size kept = [c for c,cnt in cc if cnt >= max_cnt * min_fraction] # find first‐y for each kept color first_y = {} for color in kept: # mask of where that color lives mask = np.all(img == color, axis=-1) ys, xs = np.nonzero(mask) first_y[color] = int(np.min(ys)) # sort top→bottom kept.sort(key=lambda c: first_y[c]) return kept import numpy as np import torch from typing import List, Tuple from PIL import Image def build_masks_from_swatch( mask_img: np.ndarray, swatch_colors: List[Tuple[int,int,int]], tol: int = 8 ) -> List[torch.Tensor]: """ 1. Normalize mask_img → uint8 H×W×3 (handles float [0,1] or [0,255], channel-first too). 2. Bin every pixel into buckets of size `tol`. 3. Detect user-painted region (non-black). 4. In swatch order, claim all exact matches (first-wins). 5. Fill in any *painted but unclaimed* pixel by nearest‐swatch in RGB distance. Returns a list of BoolTensors [H,W], one per swatch color. """ # --- 1) ensure H×W×3 uint8 --- img = mask_img # channel-first → channel-last if img.ndim == 3 and img.shape[0] == 3: img = np.transpose(img, (1,2,0)) # float → uint8 if np.issubdtype(img.dtype, np.floating): m = img.max() if m <= 1.01: img = (img * 255.0).round() else: img = img.round() img = img.clip(0,255).astype(np.uint8) H, W, _ = img.shape # --- 2) bin into tol-sized buckets --- binned = (img // tol) * tol # still uint8 # --- 3) painted region mask (non-black) --- painted = np.any(img != 0, axis=2) # H×W bool # --- snap swatch colors into same buckets --- snapped = np.array([ ((np.array(c)//tol)*tol).astype(np.uint8) for c in swatch_colors ]) # C×3 claimed = np.zeros((H, W), dtype=bool) masks = [] # --- 4) first-pass exact matches --- for s in snapped: m = ( (binned[:,:,0] == s[0]) & (binned[:,:,1] == s[1]) & (binned[:,:,2] == s[2]) ) m &= ~claimed masks.append(torch.from_numpy(m)) claimed |= m # --- 5) fill-in only within painted & unclaimed pixels --- miss = painted & (~claimed) if miss.any(): flat = binned.reshape(-1,3).astype(int) # (H*W)×3 flat_miss = miss.reshape(-1) # (H*W,) # squared RGB distances to each swatch: → (H*W)×C d2 = np.sum((flat[:,None,:] - snapped[None,:,:])**2, axis=2) nearest = np.argmin(d2, axis=1) # (H*W,) for i in range(len(masks)): assign = (flat_miss & (nearest == i)).reshape(H, W) masks[i] = masks[i] | torch.from_numpy(assign) return masks import numpy as np import torch from typing import List from collections import deque def _remove_small_components( mask: np.ndarray, rel_thresh: float = 0.01 ) -> np.ndarray: """ Remove connected components smaller than rel_thresh * max_component_size. 4-connectivity. """ H, W = mask.shape visited = np.zeros_like(mask, bool) comps = [] # list of (size, pixels_list) # 1) find all components for y in range(H): for x in range(W): if mask[y,x] and not visited[y,x]: q = deque([(y,x)]) visited[y,x] = True pix = [(y,x)] while q: cy,cx = q.popleft() for dy,dx in ((1,0),(-1,0),(0,1),(0,-1)): ny,nx = cy+dy, cx+dx if 0<=ny= min_size: for (y,x) in pix: out[y,x] = True return out def cleanup_and_fill_masks( masks: List[torch.Tensor], rel_thresh: float = 0.01 ) -> List[torch.Tensor]: """ 1) Remove any component < rel_thresh * (largest component) per mask 2) Then re-assign any freed pixels to nearest-swatches by neighbor-count """ # stack into C×H×W np_masks = np.stack([m.cpu().numpy() for m in masks], axis=0) C, H, W = np_masks.shape # 1) component pruning for c in range(C): np_masks[c] = _remove_small_components(np_masks[c], rel_thresh) # 2) figure out what’s still unclaimed claimed = np_masks.any(axis=0) # H×W # 3) build neighbor‐counts to know who's closest # (reuse the same 8-neighbor idea to bias to the largest local region) shifts = [(1,0),(-1,0),(0,1),(0,-1),(1,1),(1,-1),(-1,1),(-1,-1)] neighbor_counts = np.zeros_like(np_masks, int) for dy,dx in shifts: neighbor_counts += np.roll(np.roll(np_masks, dy, axis=1), dx, axis=2) # 4) for every pixel still unclaimed, pick the mask with the highest neighbor count miss = ~claimed if miss.any(): # which mask “wins” that pixel? winner = np.argmax(neighbor_counts, axis=0) # H×W for c in range(C): assign = (miss & (winner == c)) np_masks[c][assign] = True # back to torch cleaned = [torch.from_numpy(np_masks[c]) for c in range(C)] return cleaned import os import folder_paths class MaskSketch: @classmethod def INPUT_TYPES(s): input_dir = folder_paths.get_input_directory() files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))] return {"required": {"image": (sorted(files), {"image_upload": True})}, } CATEGORY = "image" RETURN_TYPES = ("IMAGE", "MASK") FUNCTION = "load_image" def load_image(self, image): width, height = 512, 512 # or whatever size you prefer # White image: RGB values all set to 1.0 white_image = torch.ones((1, height, width, 3), dtype=torch.float32) # White mask: all ones (or zeros if you're using inverse alpha) white_mask = torch.zeros((1, height, width), dtype=torch.float32) return (white_image, white_mask) def load_image_orig(self, image): image_path = folder_paths.get_annotated_filepath(image) img = node_helpers.pillow(Image.open, image_path) output_images = [] output_masks = [] w, h = None, None excluded_formats = ['MPO'] for i in ImageSequence.Iterator(img): i = node_helpers.pillow(ImageOps.exif_transpose, i) if i.mode == 'I': i = i.point(lambda i: i * (1 / 255)) image = i.convert("RGB") if len(output_images) == 0: w = image.size[0] h = image.size[1] if image.size[0] != w or image.size[1] != h: continue image = np.array(image).astype(np.float32) / 255.0 image = torch.from_numpy(image)[None,] if 'A' in i.getbands(): mask = np.array(i.getchannel('A')).astype(np.float32) / 255.0 mask = 1. - torch.from_numpy(mask) else: mask = torch.zeros((64,64), dtype=torch.float32, device="cpu") output_images.append(image) output_masks.append(mask.unsqueeze(0)) if len(output_images) > 1 and img.format not in excluded_formats: output_image = torch.cat(output_images, dim=0) output_mask = torch.cat(output_masks, dim=0) else: output_image = output_images[0] output_mask = output_masks[0] return (output_image, output_mask) @classmethod def IS_CHANGED(s, image): image_path = folder_paths.get_annotated_filepath(image) m = hashlib.sha256() with open(image_path, 'rb') as f: m.update(f.read()) return m.digest().hex() @classmethod def VALIDATE_INPUTS(s, image): if not folder_paths.exists_annotated_filepath(image): return "Invalid image file: {}".format(image) return True # based on https://github.com/cubiq/ComfyUI_essentials/blob/main/mask.py import math import torch import torch.nn.functional as F import torchvision.transforms.v2 as T import numpy as np from scipy.ndimage import distance_transform_edt class MaskBoundingBoxAspectRatio: @classmethod def INPUT_TYPES(s): return { "required": { "padding": ("INT", { "default": 0, "min": 0, "max": 4096, "step": 1 }), "blur": ("INT", { "default": 0, "min": 0, "max": 256, "step": 1 }), "aspect_ratio": ("FLOAT", { "default": 1.0, "min": 0.01,"max": 10.0, "step": 0.01 }), "transpose": ("BOOLEAN",{"default": False}), }, "optional": { "image": ("IMAGE",), "mask": ("MASK",), }, } RETURN_TYPES = ("IMAGE","MASK","MASK","INT","INT","INT","INT") RETURN_NAMES = ("image","mask","mask_blurred","x","y","width","height") FUNCTION = "execute" CATEGORY = "essentials/mask" def execute(self, mask, padding, blur, aspect_ratio, transpose, image=None): if mask.dim() == 2: mask = mask.unsqueeze(0) B, H, W = mask.shape hard = mask.clone() # build outward-only “blurred” mask via distance transform if blur > 0: m_bool = hard[0].cpu().numpy().astype(bool) d_out = distance_transform_edt(~m_bool) d_in = distance_transform_edt( m_bool) alpha = np.zeros_like(d_out, np.float32) alpha[d_in>0] = 1.0 ramp = np.clip(1.0 - (d_out / blur), 0.0, 1.0) alpha[d_out>0] = ramp[d_out>0] mask_blur_full = torch.from_numpy(alpha)[None,...].to(hard.device) else: mask_blur_full = hard.clone() # calc tight bbox + padding on the "hard" mask ys, xs = torch.where(hard[0] > 0) x1 = max(0, int(xs.min()) - padding) x2 = min(W, int(xs.max()) + 1 + padding) y1 = max(0, int(ys.min()) - padding) y2 = min(H, int(ys.max()) + 1 + padding) w0 = x2 - x1 h0 = y2 - y1 if image is None: img_full = hard.unsqueeze(-1).repeat(1,1,1,3).to(torch.float32) else: img_full = image if img_full.shape[1:3] != (H, W): img_full = comfy.utils.common_upscale( img_full.permute(0,3,1,2), W, H, upscale_method="bicubic", crop="center" ).permute(0,2,3,1) ar = aspect_ratio req_w = math.ceil(h0 * ar) # how wide we'd need to be to hit AR at h0 req_h = math.floor(w0 / ar) # how tall we'd need to be to hit AR at w0 new_x1, new_x2 = x1, x2 new_y1, new_y2 = y1, y2 flush_left = (x1 == 0) flush_right = (x2 == W) flush_top = (y1 == 0) flush_bot = (y2 == H) if not transpose: if req_w > w0: # widen? target_w = min(W, req_w) delta = target_w - w0 if flush_right: new_x1, new_x2 = W - target_w, W elif flush_left: new_x1, new_x2 = 0, target_w else: off = delta // 2 new_x1 = max(0, x1 - off) new_x2 = new_x1 + target_w if new_x2 > W: new_x2 = W new_x1 = W - target_w elif req_h > h0: # vertical bloater? target_h = min(H, req_h) delta = target_h - h0 if flush_bot: new_y1, new_y2 = H - target_h, H elif flush_top: new_y1, new_y2 = 0, target_h else: off = delta // 2 new_y1 = max(0, y1 - off) new_y2 = new_y1 + target_h if new_y2 > H: new_y2 = H new_y1 = H - target_h else: if req_h > h0: target_h = min(H, req_h) delta = target_h - h0 if flush_bot: new_y1, new_y2 = H - target_h, H elif flush_top: new_y1, new_y2 = 0, target_h else: off = delta // 2 new_y1 = max(0, y1 - off) new_y2 = new_y1 + target_h if new_y2 > H: new_y2 = H new_y1 = H - target_h elif req_w > w0: target_w = min(W, req_w) delta = target_w - w0 if flush_right: new_x1, new_x2 = W - target_w, W elif flush_left: new_x1, new_x2 = 0, target_w else: off = delta // 2 new_x1 = max(0, x1 - off) new_x2 = new_x1 + target_w if new_x2 > W: new_x2 = W new_x1 = W - target_w final_w = new_x2 - new_x1 final_h = new_y2 - new_y1 # done... crop image & masks img_crop = img_full[:, new_y1:new_y2, new_x1:new_x2, :] mask_crop = hard[:, new_y1:new_y2, new_x1:new_x2 ] mask_blurred = mask_blur_full[:, new_y1:new_y2, new_x1:new_x2] return ( img_crop, mask_crop, mask_blurred, new_x1, new_y1, final_w, final_h, ) ================================================ FILE: latent_images.py ================================================ import comfy.samplers import comfy.sample import comfy.sampler_helpers import comfy.utils import itertools import torch import math import re from .beta.noise_classes import * def initialize_or_scale(tensor, value, steps): if tensor is None: return torch.full((steps,), value) else: return value * tensor def latent_normalize_channels(x): mean = x.mean(dim=(-2, -1), keepdim=True) std = x.std (dim=(-2, -1), keepdim=True) return (x - mean) / std def latent_stdize_channels(x): std = x.std (dim=(-2, -1), keepdim=True) return x / std def latent_meancenter_channels(x): mean = x.mean(dim=(-2, -1), keepdim=True) return x - mean class latent_channelwise_match: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "model": ("MODEL",), "latent_target": ("LATENT", ), "latent_source": ("LATENT", ), }, "optional": { "mask_target": ("MASK", ), "mask_source": ("MASK", ), "extra_options": ("STRING", {"default": "", "multiline": True}), } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent_matched",) CATEGORY = "RES4LYF/latents" FUNCTION = "main" def main(self, model, latent_target, mask_target, latent_source, mask_source, extra_options): dtype = latent_target['samples'].dtype exclude_channels_match = re.search(r"exclude_channels=([\d,]+)", extra_options) exclude_channels = [] if exclude_channels_match: exclude_channels = [int(ch.strip()) for ch in exclude_channels_match.group(1).split(",")] if re.search(r"\bdisable_process_latent\b", extra_options): x_target = latent_target['samples'].clone() x_source = latent_source['samples'].clone() else: #x_target = model.inner_model.inner_model.process_latent_in(latent_target['samples']).clone() #x_source = model.inner_model.inner_model.process_latent_in(latent_source['samples']).clone() x_target = model.model.process_latent_in(latent_target['samples']).clone().to(torch.float64) x_source = model.model.process_latent_in(latent_source['samples']).clone().to(torch.float64) if mask_target is None: mask_target = torch.ones_like(x_target) else: mask_target = mask_target.unsqueeze(1) mask_target = mask_target.repeat(1, x_target.shape[1], 1, 1) mask_target = F.interpolate(mask_target, size=(x_target.shape[2], x_target.shape[3]), mode='bilinear', align_corners=False) mask_target = mask_target.to(x_target.dtype).to(x_target.device) if mask_source is None: mask_source = torch.ones_like(x_target) else: mask_source = mask_source.unsqueeze(1) mask_source = mask_source.repeat(1, x_target.shape[1], 1, 1) mask_source = F.interpolate(mask_source, size=(x_target.shape[2], x_target.shape[3]), mode='bilinear', align_corners=False) mask_source = mask_source.to(x_target.dtype).to(x_target.device) x_target_masked = x_target * ((mask_target==1)*mask_target) x_target_masked_inv = x_target - x_target_masked #x_source_masked = x_source * ((mask_source==1)*mask_source) x_matched = torch.zeros_like(x_target) for n in range(x_matched.shape[1]): if n in exclude_channels: x_matched[0][n] = x_target[0][n] continue x_target_masked_values = x_target[0][n][mask_target[0][n] == 1] x_source_masked_values = x_source[0][n][mask_source[0][n] == 1] x_target_masked_values_mean = x_target_masked_values.mean() x_target_masked_values_std = x_target_masked_values.std() x_target_masked_source_mean = x_source_masked_values.mean() x_target_masked_source_std = x_source_masked_values.std() x_target_mean = x_target.mean() x_target_std = x_target.std() x_source_mean = x_source.mean() x_source_std = x_source.std() if re.search(r"\benable_std\b", extra_options) == None: x_target_std = x_target_masked_values_std = x_target_masked_source_std = 1 if re.search(r"\bdisable_mean\b", extra_options): x_target_mean = x_target_masked_values_mean = x_target_masked_source_mean = 1 if re.search(r"\bdisable_masks\b", extra_options): x_matched[0][n] = (x_target[0][n] - x_target_mean) / x_target_std x_matched[0][n] = (x_matched[0][n] * x_source_std) + x_source_mean else: x_matched[0][n] = (x_target_masked[0][n] - x_target_masked_values_mean) / x_target_masked_values_std x_matched[0][n] = (x_matched[0][n] * x_target_masked_source_std) + x_target_masked_source_mean x_matched[0][n] = x_target_masked_inv[0][n] + x_matched[0][n] * ((mask_target[0][n]==1)*mask_target[0][n]) if re.search(r"\bdisable_process_latent\b", extra_options) == None: x_matched = model.model.process_latent_out(x_matched).clone() return ({"samples": x_matched.to(dtype)}, ) ================================================ FILE: latents.py ================================================ import torch import torch.nn.functional as F from typing import Tuple, List, Union import math # TENSOR PROJECTION OPS def get_cosine_similarity_manual(a, b): return (a * b).sum() / (torch.norm(a) * torch.norm(b)) def get_cosine_similarity(a, b, mask=None, dim=0): if a.ndim == 5 and b.ndim == 5 and b.shape[2] == 1: b = b.expand(-1, -1, a.shape[2], -1, -1) if mask is not None: return F.cosine_similarity((mask * a).flatten(), (mask * b).flatten(), dim=dim) else: return F.cosine_similarity(a.flatten(), b.flatten(), dim=dim) def get_pearson_similarity(a, b, mask=None, dim=0, norm_dim=None): if a.ndim == 5 and b.ndim == 5 and b.shape[2] == 1: b = b.expand(-1, -1, a.shape[2], -1, -1) if norm_dim is None: if a.ndim == 4: norm_dim=(-2,-1) elif a.ndim == 5: norm_dim=(-4,-2,-1) a = a - a.mean(dim=norm_dim, keepdim=True) b = b - b.mean(dim=norm_dim, keepdim=True) if mask is not None: return F.cosine_similarity((mask * a).flatten(), (mask * b).flatten(), dim=dim) else: return F.cosine_similarity(a.flatten(), b.flatten(), dim=dim) def get_collinear(x, y): return get_collinear_flat(x, y).reshape_as(x) def get_orthogonal(x, y): x_flat = x.reshape(x.size(0), -1).clone() x_ortho_y = x_flat - get_collinear_flat(x, y) return x_ortho_y.view_as(x) def get_collinear_flat(x, y): y_flat = y.reshape(y.size(0), -1).clone() x_flat = x.reshape(x.size(0), -1).clone() y_flat /= y_flat.norm(dim=-1, keepdim=True) x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat return x_proj_y def get_orthogonal_noise_from_channelwise(*refs, max_iter=500, max_score=1e-15): noise, *refs = refs noise_tmp = noise.clone() #b,c,h,w = noise.shape if (noise.ndim == 4): b,ch,h,w = noise.shape elif (noise.ndim == 5): b,ch,t,h,w = noise.shape for i in range(max_iter): noise_tmp = gram_schmidt_channels_optimized(noise_tmp, *refs) cossim_scores = [] for ref in refs: #for c in range(noise.shape[-3]): for c in range(ch): cossim_scores.append(get_cosine_similarity(noise_tmp[0][c], ref[0][c]).abs()) cossim_scores.append(get_cosine_similarity(noise_tmp[0], ref[0]).abs()) if max(cossim_scores) < max_score: break return noise_tmp def gram_schmidt_channels_optimized(A, *refs): if (A.ndim == 4): b,c,h,w = A.shape elif (A.ndim == 5): b,c,t,h,w = A.shape A_flat = A.view(b, c, -1) for ref in refs: ref_flat = ref.view(b, c, -1).clone() ref_flat /= ref_flat.norm(dim=-1, keepdim=True) proj_coeff = torch.sum(A_flat * ref_flat, dim=-1, keepdim=True) projection = proj_coeff * ref_flat A_flat -= projection return A_flat.view_as(A) # Efficient implementation equivalent to the following: def attention_weights( query, key, attn_mask=None ) -> torch.Tensor: L, S = query.size(-2), key.size(-2) scale_factor = 1 / math.sqrt(query.size(-1)) attn_bias = torch.zeros(L, S, dtype=query.dtype).to(query.device) if attn_mask is not None: if attn_mask.dtype == torch.bool: attn_bias.masked_fill_(attn_mask.logical_not(), float("-inf")) else: attn_bias += attn_mask attn_weight = query @ key.transpose(-2, -1) * scale_factor attn_weight += attn_bias attn_weight = torch.softmax(attn_weight, dim=-1) return attn_weight def attention_weights_orig(q, k): # implementation of in-place softmax to reduce memory req scores = torch.matmul(q, k.transpose(-2, -1)) scores.div_(math.sqrt(q.size(-1))) torch.exp(scores, out=scores) summed = torch.sum(scores, dim=-1, keepdim=True) scores /= summed return scores.nan_to_num_(0.0, 65504., -65504.) # calculate slerp ratio needed to hit a target cosine similarity score def get_slerp_weight_for_cossim(cos_sim, target_cos): # assumes unit vector matrices used for cossim import math c = cos_sim T = target_cos K = 1 - c A = K**2 - 2 * T**2 * K B = 2 * (1 - c) * (c + T**2) C = c**2 - T**2 if abs(A) < 1e-8: # nearly collinear return 0.5 # just mix 50:50 disc = B**2 - 4*A*C if disc < 0: return None # no valid solution... blow up somewhere to get user's attention sqrt_disc = math.sqrt(disc) w1 = (-B + sqrt_disc) / (2 * A) w2 = (-B - sqrt_disc) / (2 * A) candidates = [w for w in [w1, w2] if 0 <= w <= 1] if candidates: return candidates[0] else: return max(0.0, min(1.0, w1)) def get_slerp_ratio(cos_sim_A, cos_sim_B, target_cos): import math alpha = math.acos(cos_sim_A) beta = math.acos(cos_sim_B) delta = math.acos(target_cos) if abs(beta - alpha) < 1e-6: return 0.5 t = (delta - alpha) / (beta - alpha) t = max(0.0, min(1.0, t)) return t def find_slerp_ratio_grid(A: torch.Tensor, B: torch.Tensor, D: torch.Tensor, E: torch.Tensor, target_ratio: float = 1.0, num_samples: int = 100) -> float: """ Finds the interpolation parameter t (in [0,1]) for which: f(t) = cos(slerp(t, A, B), D) - target_ratio * cos(slerp(t, A, B), E) is minimized in absolute value. Instead of requiring a sign change for bisection, we sample t values uniformly and pick the one that minimizes |f(t)|. """ ts = torch.linspace(0.0, 1.0, steps=num_samples, device=A.device, dtype=A.dtype) best_t = 0.0 best_val = float('inf') for t_val in ts: t_tensor = torch.tensor(t_val, dtype=A.dtype, device=A.device) C = slerp_tensor(t_tensor, A, B) diff = get_pearson_similarity(C, D) - target_ratio * get_pearson_similarity(C, E) if abs(diff) < best_val: best_val = abs(diff) best_t = t_val return best_t def compute_slerp_ratio_for_target(A: torch.Tensor, B: torch.Tensor, D: torch.Tensor, target: float) -> float: """ Given three unit vectors A, B, and D (all assumed to be coplanar) and a target cosine similarity (target) for the slerp result C with D, compute the interpolation parameter t such that: C = slerp(t, A, B) and cos(C, D) ≈ target. Args: A: Tensor of shape (D,), starting vector. B: Tensor of shape (D,), ending vector. D: Tensor of shape (D,), the reference vector. target: Desired cosine similarity between C and D. Returns: t: A float between 0 and 1. """ A = A / (A.norm() + 1e-8) B = B / (B.norm() + 1e-8) D = D / (D.norm() + 1e-8) alpha = math.acos(max(-1.0, min(1.0, float(torch.dot(D, A))))) # angel between D and A beta = math.acos(max(-1.0, min(1.0, float(torch.dot(D, B))))) # angle between D and B delta = math.acos(max(-1.0, min(1.0, target))) # target cosine similarity... angle etc... if abs(beta - alpha) < 1e-6: return 0.5 t = (delta - alpha) / (beta - alpha) t = max(0.0, min(1.0, t)) return t # TENSOR NORMALIZATION OPS def normalize_zscore(x, channelwise=False, inplace=False): if inplace: if channelwise: return x.sub_(x.mean(dim=(-2,-1), keepdim=True)).div_(x.std(dim=(-2,-1), keepdim=True)) else: return x.sub_(x.mean()).div_(x.std()) else: if channelwise: return (x - x.mean(dim=(-2,-1), keepdim=True) / x.std(dim=(-2,-1), keepdim=True)) else: return (x - x.mean()) / x.std() def latent_normalize_channels(x): mean = x.mean(dim=(-2, -1), keepdim=True) std = x.std (dim=(-2, -1), keepdim=True) return (x - mean) / std def latent_stdize_channels(x): std = x.std (dim=(-2, -1), keepdim=True) return x / std def latent_meancenter_channels(x): mean = x.mean(dim=(-2, -1), keepdim=True) return x - mean # TENSOR INTERPOLATION OPS def lagrange_interpolation(x_values, y_values, x_new): if not isinstance(x_values, torch.Tensor): x_values = torch.tensor(x_values, dtype=torch.get_default_dtype()) if x_values.ndim != 1: raise ValueError("x_values must be a 1D tensor or a list of scalars.") if not isinstance(x_new, torch.Tensor): x_new = torch.tensor(x_new, dtype=x_values.dtype, device=x_values.device) if x_new.ndim == 0: x_new = x_new.unsqueeze(0) if isinstance(y_values, list): y_values = torch.stack(y_values, dim=0) if y_values.ndim < 1: raise ValueError("y_values must have at least one dimension (the sample dimension).") n = x_values.shape[0] if y_values.shape[0] != n: raise ValueError(f"Mismatch: x_values has length {n} but y_values has {y_values.shape[0]} samples.") m = x_new.shape[0] result_shape = (m,) + y_values.shape[1:] result = torch.zeros(result_shape, dtype=y_values.dtype, device=y_values.device) for i in range(n): Li = torch.ones_like(x_new, dtype=y_values.dtype, device=y_values.device) xi = x_values[i] for j in range(n): if i == j: continue xj = x_values[j] Li = Li * ((x_new - xj) / (xi - xj)) extra_dims = (1,) * (y_values.ndim - 1) Li = Li.view(m, *extra_dims) result = result + Li * y_values[i] return result def line_intersection(a: torch.Tensor, d1: torch.Tensor, b: torch.Tensor, d2: torch.Tensor, eps=1e-8) -> torch.Tensor: """ Computes the intersection (or closest point average) of two lines in R^D. The first line is defined by: L1: x = a + t * d1 The second line is defined by: L2: x = b + s * d2 If the lines do not exactly intersect, this function returns the average of the closest points. a, d1, b, d2: Tensors of shape (D,) or with an extra batch dimension (B, D). Returns: Tensor of shape (D,) or (B, D) representing the intersection (or midpoint of closest approach). """ # Compute dot products d1d1 = (d1 * d1).sum(dim=-1, keepdim=True) # shape (B,1) or (1,) d2d2 = (d2 * d2).sum(dim=-1, keepdim=True) d1d2 = (d1 * d2).sum(dim=-1, keepdim=True) r = b - a # shape (B, D) or (D,) r_d1 = (r * d1).sum(dim=-1, keepdim=True) r_d2 = (r * d2).sum(dim=-1, keepdim=True) # Solve for t and s: # t * d1d1 - s * d1d2 = r_d1 # t * d1d2 - s * d2d2 = r_d2 # Solve using determinants: denom = d1d1 * d2d2 - d1d2 * d1d2 # Avoid division by zero denom = torch.where(denom.abs() < eps, torch.full_like(denom, eps), denom) t = (r_d1 * d2d2 - r_d2 * d1d2) / denom s = (r_d1 * d1d2 - r_d2 * d1d1) / denom point1 = a + t * d1 point2 = b + s * d2 # If they intersect exactly, point1 and point2 are identical. # Otherwise, return the midpoint of the closest points. return (point1 + point2) / 2 def slerp_direction(t: float, u0: torch.Tensor, u1: torch.Tensor, DOT_THRESHOLD=0.9995) -> torch.Tensor: dot = (u0 * u1).sum(-1).clamp(-1.0, 1.0) #u0, u1 are unit vectors... should not be affected by clamp if dot.item() > DOT_THRESHOLD: # u0, u1 nearly aligned, fallback to lerp return torch.lerp(u0, u1, t) theta_0 = torch.acos(dot) sin_theta_0 = torch.sin(theta_0) theta_t = theta_0 * t sin_theta_t = torch.sin(theta_t) s0 = torch.sin(theta_0 - theta_t) / sin_theta_0 s1 = sin_theta_t / sin_theta_0 return s0 * u0 + s1 * u1 def magnitude_aware_interpolation(t: float, v0: torch.Tensor, v1: torch.Tensor) -> torch.Tensor: m0 = v0.norm(dim=-1, keepdim=True) m1 = v1.norm(dim=-1, keepdim=True) u0 = v0 / (m0 + 1e-8) u1 = v1 / (m1 + 1e-8) u = slerp_direction(t, u0, u1) m = (1 - t) * m0 + t * m1 # tinerpolate magnitudes linearly return m * u def slerp_tensor(val: torch.Tensor, low: torch.Tensor, high: torch.Tensor, dim=-3) -> torch.Tensor: #dim = (2,3) if low.ndim == 4 and low.shape[-3] > 1: dim=-3 elif low.ndim == 5 and low.shape[-3] > 1: dim=-4 elif low.ndim == 2: dim=(-2,-1) if type(val) == float: val = torch.Tensor([val]).expand_as(low).to(low.dtype).to(low.device) if val.shape != low.shape: val = val.expand_as(low) low_norm = low / (torch.norm(low, dim=dim, keepdim=True)) high_norm = high / (torch.norm(high, dim=dim, keepdim=True)) dot = (low_norm * high_norm).sum(dim=dim, keepdim=True).clamp(-1.0, 1.0) #near = ~(-0.9995 < dot < 0.9995) #dot > 0.9995 or dot < -0.9995 near = dot > 0.9995 opposite = dot < -0.9995 condition = torch.logical_or(near, opposite) omega = torch.acos(dot) so = torch.sin(omega) if val.ndim < low.ndim: val = val.unsqueeze(dim) factor_low = torch.sin((1 - val) * omega) / so factor_high = torch.sin(val * omega) / so res = factor_low * low + factor_high * high res = torch.where(condition, low * (1 - val) + high * val, res) return res # pytorch slerp implementation from https://gist.github.com/Birch-san/230ac46f99ec411ed5907b0a3d728efa from torch import FloatTensor, LongTensor, Tensor, Size, lerp, zeros_like from torch.linalg import norm # adapted to PyTorch from: # https://gist.github.com/dvschultz/3af50c40df002da3b751efab1daddf2c # most of the extra complexity is to support: # - many-dimensional vectors # - v0 or v1 with last dim all zeroes, or v0 ~colinear with v1 # - falls back to lerp() # - conditional logic implemented with parallelism rather than Python loops # - many-dimensional tensor for t # - you can ask for batches of slerp outputs by making t more-dimensional than the vectors # - slerp( # v0: torch.Size([2,3]), # v1: torch.Size([2,3]), # t: torch.Size([4,1,1]), # ) # - this makes it interface-compatible with lerp() def slerp(v0: FloatTensor, v1: FloatTensor, t: float|FloatTensor, DOT_THRESHOLD=0.9995): ''' Spherical linear interpolation Args: v0: Starting vector v1: Final vector t: Float value between 0.0 and 1.0 DOT_THRESHOLD: Threshold for considering the two vectors as colinear. Not recommended to alter this. Returns: Interpolation vector between v0 and v1 ''' assert v0.shape == v1.shape, "shapes of v0 and v1 must match" # Normalize the vectors to get the directions and angles v0_norm: FloatTensor = norm(v0, dim=-1) v1_norm: FloatTensor = norm(v1, dim=-1) v0_normed: FloatTensor = v0 / v0_norm.unsqueeze(-1) v1_normed: FloatTensor = v1 / v1_norm.unsqueeze(-1) # Dot product with the normalized vectors dot: FloatTensor = (v0_normed * v1_normed).sum(-1) dot_mag: FloatTensor = dot.abs() # if dp is NaN, it's because the v0 or v1 row was filled with 0s # If absolute value of dot product is almost 1, vectors are ~colinear, so use lerp gotta_lerp: LongTensor = dot_mag.isnan() | (dot_mag > DOT_THRESHOLD) can_slerp: LongTensor = ~gotta_lerp t_batch_dim_count: int = max(0, t.ndim-v0.ndim) if isinstance(t, Tensor) else 0 t_batch_dims: Size = t.shape[:t_batch_dim_count] if isinstance(t, Tensor) else Size([]) out: FloatTensor = zeros_like(v0.expand(*t_batch_dims, *[-1]*v0.ndim)) # if no elements are lerpable, our vectors become 0-dimensional, preventing broadcasting if gotta_lerp.any(): lerped: FloatTensor = lerp(v0, v1, t) out: FloatTensor = lerped.where(gotta_lerp.unsqueeze(-1), out) # if no elements are slerpable, our vectors become 0-dimensional, preventing broadcasting if can_slerp.any(): # Calculate initial angle between v0 and v1 theta_0: FloatTensor = dot.arccos().unsqueeze(-1) sin_theta_0: FloatTensor = theta_0.sin() # Angle at timestep t theta_t: FloatTensor = theta_0 * t sin_theta_t: FloatTensor = theta_t.sin() # Finish the slerp algorithm s0: FloatTensor = (theta_0 - theta_t).sin() / sin_theta_0 s1: FloatTensor = sin_theta_t / sin_theta_0 slerped: FloatTensor = s0 * v0 + s1 * v1 out: FloatTensor = slerped.where(can_slerp.unsqueeze(-1), out) return out # this is silly... def normalize_latent(target, source=None, mean=True, std=True, set_mean=None, set_std=None, channelwise=True): target = target.clone() source = source.clone() if source is not None else None def normalize_single_latent(single_target, single_source=None): y = torch.zeros_like(single_target) for b in range(y.shape[0]): if channelwise: for c in range(y.shape[1]): single_source_mean = single_source[b][c].mean() if set_mean is None else set_mean single_source_std = single_source[b][c].std() if set_std is None else set_std if mean and std: y[b][c] = (single_target[b][c] - single_target[b][c].mean()) / single_target[b][c].std() if single_source is not None: y[b][c] = y[b][c] * single_source_std + single_source_mean elif mean: y[b][c] = single_target[b][c] - single_target[b][c].mean() if single_source is not None: y[b][c] = y[b][c] + single_source_mean elif std: y[b][c] = single_target[b][c] / single_target[b][c].std() if single_source is not None: y[b][c] = y[b][c] * single_source_std else: single_source_mean = single_source[b].mean() if set_mean is None else set_mean single_source_std = single_source[b].std() if set_std is None else set_std if mean and std: y[b] = (single_target[b] - single_target[b].mean()) / single_target[b].std() if single_source is not None: y[b] = y[b] * single_source_std + single_source_mean elif mean: y[b] = single_target[b] - single_target[b].mean() if single_source is not None: y[b] = y[b] + single_source_mean elif std: y[b] = single_target[b] / single_target[b].std() if single_source is not None: y[b] = y[b] * single_source_std return y if isinstance(target, (list, tuple)): if source is not None: assert isinstance(source, (list, tuple)) and len(source) == len(target), \ "If target is a list/tuple, source must be a list/tuple of the same length." return [normalize_single_latent(t, s) for t, s in zip(target, source)] else: return [normalize_single_latent(t) for t in target] else: return normalize_single_latent(target, source) def hard_light_blend(base_latent, blend_latent): if base_latent.sum() == 0 and base_latent.std() == 0: return base_latent blend_latent = (blend_latent - blend_latent.min()) / (blend_latent.max() - blend_latent.min()) positive_mask = base_latent >= 0 negative_mask = base_latent < 0 positive_latent = base_latent * positive_mask.float() negative_latent = base_latent * negative_mask.float() positive_result = torch.where(blend_latent < 0.5, 2 * positive_latent * blend_latent, 1 - 2 * (1 - positive_latent) * (1 - blend_latent)) negative_result = torch.where(blend_latent < 0.5, 2 * negative_latent.abs() * blend_latent, 1 - 2 * (1 - negative_latent.abs()) * (1 - blend_latent)) negative_result = -negative_result combined_result = positive_result * positive_mask.float() + negative_result * negative_mask.float() #combined_result *= base_latent.max() ks = combined_result ks2 = torch.zeros_like(base_latent) for n in range(base_latent.shape[1]): ks2[0][n] = (ks[0][n]) / ks[0][n].std() ks2[0][n] = (ks2[0][n] * base_latent[0][n].std()) combined_result = ks2 return combined_result def make_checkerboard(tile_size: int, num_tiles: int, dtype=torch.float16, device="cpu"): pattern = torch.tensor([[0, 1], [1, 0]], dtype=dtype, device=device) board = pattern.repeat(num_tiles // 2 + 1, num_tiles // 2 + 1)[:num_tiles, :num_tiles] board_expanded = board.repeat_interleave(tile_size, dim=0).repeat_interleave(tile_size, dim=1) return board_expanded def get_edge_mask_slug(mask: torch.Tensor, dilation: int = 3) -> torch.Tensor: mask = mask.float() eroded = -F.max_pool2d(-mask.unsqueeze(0).unsqueeze(0), kernel_size=3, stride=1, padding=1) eroded = eroded.squeeze(0).squeeze(0) edge = mask - eroded edge = (edge > 0).float() dilated_edge = F.max_pool2d(edge.unsqueeze(0).unsqueeze(0), kernel_size=dilation, stride=1, padding=dilation//2) dilated_edge = dilated_edge.squeeze(0).squeeze(0) return dilated_edge def get_edge_mask(mask: torch.Tensor, dilation: int = 3) -> torch.Tensor: if dilation == 0: # safeguard for zero kernel size... return mask mask_tmp = mask.squeeze().to('cuda') mask_tmp = mask_tmp.float() eroded = -F.max_pool2d(-mask_tmp.unsqueeze(0).unsqueeze(0), kernel_size=3, stride=1, padding=1) eroded = eroded.squeeze(0).squeeze(0) edge = mask_tmp - eroded edge = (edge > 0).float() dilated_edge = F.max_pool2d(edge.unsqueeze(0).unsqueeze(0), kernel_size=dilation, stride=1, padding=dilation//2) dilated_edge = dilated_edge.squeeze(0).squeeze(0) return dilated_edge[...,:mask.shape[-2], :mask.shape[-1]].view_as(mask).to(mask.device) def checkerboard_variable(widths, dtype=torch.float16, device='cpu'): total = sum(widths) mask = torch.zeros((total, total), dtype=dtype, device=device) x_start = 0 for i, w_x in enumerate(widths): y_start = 0 for j, w_y in enumerate(widths): if (i + j) % 2 == 0: # checkerboard logic mask[x_start:x_start+w_x, y_start:y_start+w_y] = 1.0 y_start += w_y x_start += w_x return mask def interpolate_spd(cov1, cov2, t, eps=1e-5): """ Geodesic interpolation on the SPD manifold between cov1 and cov2. Args: cov1, cov2: [D×D] symmetric positive-definite covariances (torch.Tensor). t: interpolation factor in [0,1]. eps: jitter added to diagonal for numerical stability. Returns: cov_t: the SPD matrix at fraction t along the geodesic from cov1 to cov2. """ cov1 = cov1.double() cov2 = cov2.double() M1 = cov1.clone() M1.diagonal().add_(eps) M2 = cov2.clone() M2.diagonal().add_(eps) S1, U1 = torch.linalg.eigh(M1) S1_clamped = S1.clamp(min=eps) inv_sqrt_S1 = S1_clamped.rsqrt() M1_inv_sqrt = U1 @ torch.diag(inv_sqrt_S1) @ U1.T middle = M1_inv_sqrt @ M2 @ M1_inv_sqrt Sm, Um = torch.linalg.eigh(middle) Sm_clamped = Sm.clamp(min=eps) Sm_t = Sm_clamped.pow(t) middle_t = Um @ torch.diag(Sm_t) @ Um.T sqrt_S1 = S1_clamped.sqrt() M1_sqrt = U1 @ torch.diag(sqrt_S1) @ U1.T cov_t = M1_sqrt @ middle_t @ M1_sqrt return cov_t.to(cov1.dtype) def tile_latent(latent: torch.Tensor, tile_size: Tuple[int,int] ) -> Tuple[torch.Tensor, Tuple[int,...], Tuple[int,int], Tuple[List[int],List[int]]]: """ Split `latent` into spatial tiles of shape (t_h, t_w). Works on either: - 4D [B,C,H,W] - 5D [B,C,T,H,W] Returns: tiles: [B*rows*cols, C, (T,), t_h, t_w] orig_shape: the full shape of `latent` tile_hw: (t_h, t_w) positions: (pos_h, pos_w) lists of start y and x positions """ *lead, H, W = latent.shape B, C = lead[0], lead[1] has_time = (latent.ndim == 5) if has_time: T = lead[2] t_h, t_w = tile_size rows = (H + t_h - 1) // t_h cols = (W + t_w - 1) // t_w if rows == 1: pos_h = [0] else: pos_h = [round(i*(H - t_h)/(rows-1)) for i in range(rows)] if cols == 1: pos_w = [0] else: pos_w = [round(j*(W - t_w)/(cols-1)) for j in range(cols)] tiles = [] for y in pos_h: for x in pos_w: if has_time: tile = latent[:, :, :, y:y+t_h, x:x+t_w] else: tile = latent[:, :, y:y+t_h, x:x+t_w] tiles.append(tile) tiles = torch.cat(tiles, dim=0) orig_shape = tuple(latent.shape) return tiles, orig_shape, (t_h, t_w), (pos_h, pos_w) def untile_latent(tiles: torch.Tensor, orig_shape: Tuple[int,...], tile_hw: Tuple[int,int], positions: Tuple[List[int],List[int]] ) -> torch.Tensor: """ Reconstruct latent from tiles + their start positions. Works on either 4D or 5D original. Args: tiles: [B*rows*cols, C, (T,), t_h, t_w] orig_shape: shape of original latent (B,C,H,W) or (B,C,T,H,W) tile_hw: (t_h, t_w) positions: (pos_h, pos_w) Returns: reconstructed latent of shape `orig_shape` """ *lead, H, W = orig_shape B, C = lead[0], lead[1] has_time = (len(orig_shape) == 5) if has_time: T = lead[2] t_h, t_w = tile_hw pos_h, pos_w = positions rows, cols = len(pos_h), len(pos_w) if has_time: out = torch.zeros(B, C, T, H, W, device=tiles.device, dtype=tiles.dtype) count = torch.zeros_like(out) tiles = tiles.view(B, rows, cols, C, T, t_h, t_w) for bi in range(B): for i, y in enumerate(pos_h): for j, x in enumerate(pos_w): tile = tiles[bi, i, j] out[bi, :, :, y:y+t_h, x:x+t_w] += tile count[bi, :, :, y:y+t_h, x:x+t_w] += 1 else: out = torch.zeros(B, C, H, W, device=tiles.device, dtype=tiles.dtype) count = torch.zeros_like(out) tiles = tiles.view(B, rows, cols, C, t_h, t_w) for bi in range(B): for i, y in enumerate(pos_h): for j, x in enumerate(pos_w): tile = tiles[bi, i, j] out[bi, :, y:y+t_h, x:x+t_w] += tile count[bi, :, y:y+t_h, x:x+t_w] += 1 valid = count > 0 out[valid] = out[valid] / count[valid] return out def upscale_to_match_spatial(tensor_5d, ref_4d, mode='bicubic'): """ Upscales a 5D tensor [B, C, T, H1, W1] to match the spatial size of a 4D tensor [1, C, H2, W2]. Args: tensor_5d: Tensor of shape [B, C, T, H1, W1] ref_4d: Tensor of shape [1, C, H2, W2] — used as spatial reference mode: Interpolation mode ('bilinear' or 'bicubic') Returns: Resized tensor of shape [B, C, T, H2, W2] """ b, c, t, _, _ = tensor_5d.shape _, _, h_target, w_target = ref_4d.shape tensor_reshaped = tensor_5d.reshape(b * c, t, tensor_5d.shape[-2], tensor_5d.shape[-1]) upscaled = F.interpolate(tensor_reshaped, size=(h_target, w_target), mode=mode, align_corners=False) return upscaled.view(b, c, t, h_target, w_target) def gaussian_blur_2d(img: torch.Tensor, sigma: float, kernel_size: int = None) -> torch.Tensor: B, C, H, W = img.shape dtype = img.dtype device = img.device if kernel_size is None: kernel_size = int(2 * math.ceil(3 * sigma) + 1) if kernel_size % 2 == 0: kernel_size += 1 coords = torch.arange(kernel_size, dtype=torch.float64) - kernel_size // 2 g = torch.exp(-0.5 * (coords / sigma) ** 2) g = g / g.sum() kernel_2d = g[:, None] * g[None, :] kernel_2d = kernel_2d.to(dtype=dtype, device=device) kernel = kernel_2d.expand(C, 1, kernel_size, kernel_size) pad = kernel_size // 2 img_padded = F.pad(img, (pad, pad, pad, pad), mode='reflect') return F.conv2d(img_padded, kernel, groups=C) def median_blur_2d(img: torch.Tensor, kernel_size: int = 3) -> torch.Tensor: if kernel_size % 2 == 0: kernel_size += 1 pad = kernel_size // 2 B, C, H, W = img.shape img_padded = F.pad(img, (pad, pad, pad, pad), mode='reflect') unfolded = img_padded.unfold(2, kernel_size, 1).unfold(3, kernel_size, 1) # unfolded: [B, C, H, W, kH, kW] → flatten to patches patches = unfolded.contiguous().view(B, C, H, W, -1) median = patches.median(dim=-1).values return median def apply_to_state_info_tensors(obj, ref_shape, modify_func, *args, **kwargs): """ Recursively traverse obj and apply modify_func to tensors whose last 5 dimensions match ref_shape's last 5 dimensions. Used to apply function to all relevant tensors in latent state_info. Args: obj: The object to traverse (dict, list, tuple, tensor, etc.) ref_shape: Reference tensor shape to match against modify_func: Function to apply to matching tensors. Should accept (tensor, *args, **kwargs) *args, **kwargs: Additional arguments passed to modify_func Returns: Modified structure with applicable tensors transformed """ import torch if isinstance(obj, torch.Tensor): if obj.ndim >= 5: # Check if last 5 dims match reference obj_last5 = obj.shape[-5:] ref_last5 = ref_shape[-5:] if len(ref_shape) >= 5 else ref_shape if obj_last5 == ref_last5: return modify_func(obj, *args, **kwargs) return obj if isinstance(obj, dict): changed = False out = {} for k, v in obj.items(): nv = apply_to_state_info_tensors(v, ref_shape, modify_func, *args, **kwargs) changed |= (nv is not v) out[k] = nv return out if changed else obj if isinstance(obj, list): changed = False out = [] for v in obj: nv = apply_to_state_info_tensors(v, ref_shape, modify_func, *args, **kwargs) changed |= (nv is not v) out.append(nv) return out if changed else obj if isinstance(obj, tuple): new_t = tuple(apply_to_state_info_tensors(v, ref_shape, modify_func, *args, **kwargs) for v in obj) if all(ov is nv for ov, nv in zip(obj, new_t)): return obj return new_t return obj ================================================ FILE: legacy/__init__.py ================================================ from . import legacy_samplers from . import legacy_sampler_rk from . import rk_sampler from . import samplers from . import samplers_extensions from . import samplers_tiled def add_legacy(NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers): NODE_CLASS_MAPPINGS.update({ "Legacy_ClownSampler" : legacy_samplers.Legacy_SamplerRK, "Legacy_SharkSampler" : legacy_samplers.Legacy_SharkSampler, "Legacy_ClownsharKSampler" : legacy_samplers.Legacy_ClownsharKSampler, "Legacy_ClownsharKSamplerGuides" : legacy_samplers.Legacy_ClownsharKSamplerGuides, "ClownSampler" : samplers.ClownSampler, "ClownSamplerAdvanced" : samplers.ClownSamplerAdvanced, "ClownsharKSampler" : samplers.ClownsharKSampler, "ClownsharKSamplerGuides" : samplers_extensions.ClownsharKSamplerGuides, "ClownsharKSamplerGuide" : samplers_extensions.ClownsharKSamplerGuide, "ClownOptions_SDE_Noise" : samplers_extensions.ClownOptions_SDE_Noise, "ClownOptions_FrameWeights" : samplers_extensions.ClownOptions_FrameWeights, "ClownInpaint" : samplers_extensions.ClownInpaint, "ClownInpaintSimple" : samplers_extensions.ClownInpaintSimple, "ClownsharKSamplerOptions" : samplers_extensions.ClownsharKSamplerOptions, "ClownsharKSamplerAutomation" : samplers_extensions.ClownsharKSamplerAutomation, "ClownsharKSamplerAutomation_Advanced": samplers_extensions.ClownsharKSamplerAutomation_Advanced, "SamplerOptions_TimestepScaling" : samplers_extensions.SamplerOptions_TimestepScaling, "SamplerOptions_GarbageCollection" : samplers_extensions.SamplerOptions_GarbageCollection, "UltraSharkSampler" : samplers.UltraSharkSampler, "UltraSharkSampler Tiled" : samplers_tiled.UltraSharkSampler_Tiled, }) NODE_DISPLAY_NAME_MAPPINGS.update({ "Legacy_SamplerRK" : "Legacy_ClownSampler", "Legacy_SharkSampler" : "Legacy_SharkSampler", "Legacy_ClownsharKSampler" : "Legacy_ClownsharKSampler", "Legacy_ClownsharKSamplerGuides" : "Legacy_ClownsharKSamplerGuides", "ClownSampler" : "Legacy2_ClownSampler", "ClownSamplerAdvanced" : "Legacy2_ClownSamplerAdvanced", "ClownsharKSampler" : "Legacy2_ClownsharKSampler", "ClownsharKSamplerGuides" : "Legacy2_ClownsharKSamplerGuides", "ClownsharKSamplerGuide" : "Legacy2_ClownsharKSamplerGuide", "ClownOptions_SDE_Noise" : "Legacy2_ClownOptions_SDE_Noise", "ClownOptions_FrameWeights" : "Legacy2_ClownOptions_FrameWeights", "ClownInpaint" : "Legacy2_ClownInpaint", "ClownInpaintSimple" : "Legacy2_ClownInpaintSimple", "ClownsharKSamplerOptions" : "Legacy2_ClownsharKSamplerOptions", "ClownsharKSamplerAutomation" : "Legacy2_ClownsharKSamplerAutomation", "ClownsharKSamplerAutomation_Advanced" : "Legacy2_ClownsharKSamplerAutomation_Advanced", "SamplerOptions_TimestepScaling" : "Legacy2_SamplerOptions_TimestepScaling", "SamplerOptions_GarbageCollection" : "Legacy2_SamplerOptions_GarbageCollection", "UltraSharkSampler" : "Legacy2_UltraSharkSampler", "UltraSharkSampler_Tiled" : "Legacy2_UltraSharkSampler Tiled", }) extra_samplers.update({ #"res_2m" : rk_sampler.sample_res_2m, #"res_2s" : rk_sampler.sample_res_2s, #"res_3s" : rk_sampler.sample_res_3s, #"res_5s" : rk_sampler.sample_res_5s, #"res_6s" : rk_sampler.sample_res_6s, #"res_2m_sde" : rk_sampler.sample_res_2m_sde, #"res_2s_sde" : rk_sampler.sample_res_2s_sde, #"res_3s_sde" : rk_sampler.sample_res_3s_sde, #"res_5s_sde" : rk_sampler.sample_res_5s_sde, #"res_6s_sde" : rk_sampler.sample_res_6s_sde, #"deis_2m" : rk_sampler.sample_deis_2m, #"deis_3m" : rk_sampler.sample_deis_3m, #"deis_4m" : rk_sampler.sample_deis_4m, #"deis_2m_sde": rk_sampler.sample_deis_2m_sde, #"deis_3m_sde": rk_sampler.sample_deis_3m_sde, #"deis_4m_sde": rk_sampler.sample_deis_4m_sde, "rk" : rk_sampler.sample_rk, "legacy_rk" : legacy_sampler_rk.legacy_sample_rk, }) return NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS, extra_samplers ================================================ FILE: legacy/conditioning.py ================================================ import torch import base64 import pickle # used strictly for serializing conditioning in the ConditioningToBase64 and Base64ToConditioning nodes for API use. (Offloading T5 processing to another machine to avoid model shuffling.) import comfy.samplers import comfy.sample import comfy.sampler_helpers import node_helpers import functools from .noise_classes import precision_tool from copy import deepcopy from .helper import initialize_or_scale import torch.nn.functional as F import copy from .helper import get_orthogonal, get_collinear from ..res4lyf import RESplain def multiply_nested_tensors(structure, scalar): if isinstance(structure, torch.Tensor): return structure * scalar elif isinstance(structure, list): return [multiply_nested_tensors(item, scalar) for item in structure] elif isinstance(structure, dict): return {key: multiply_nested_tensors(value, scalar) for key, value in structure.items()} else: return structure class ConditioningOrthoCollin: @classmethod def INPUT_TYPES(s): return {"required": { "conditioning_0": ("CONDITIONING", ), "conditioning_1": ("CONDITIONING", ), "t5_strength": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "clip_strength": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), }} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "combine" CATEGORY = "RES4LYF/conditioning" def combine(self, conditioning_0, conditioning_1, t5_strength, clip_strength): t5_0_1_collin = get_collinear (conditioning_0[0][0], conditioning_1[0][0]) t5_1_0_ortho = get_orthogonal(conditioning_1[0][0], conditioning_0[0][0]) t5_combined = t5_0_1_collin + t5_1_0_ortho t5_1_0_collin = get_collinear (conditioning_1[0][0], conditioning_0[0][0]) t5_0_1_ortho = get_orthogonal(conditioning_0[0][0], conditioning_1[0][0]) t5_B_combined = t5_1_0_collin + t5_0_1_ortho pooled_0_1_collin = get_collinear (conditioning_0[0][1]['pooled_output'].unsqueeze(0), conditioning_1[0][1]['pooled_output'].unsqueeze(0)).squeeze(0) pooled_1_0_ortho = get_orthogonal(conditioning_1[0][1]['pooled_output'].unsqueeze(0), conditioning_0[0][1]['pooled_output'].unsqueeze(0)).squeeze(0) pooled_combined = pooled_0_1_collin + pooled_1_0_ortho #conditioning_0[0][0] = conditioning_0[0][0] + t5_strength * (t5_combined - conditioning_0[0][0]) #conditioning_0[0][0] = t5_strength * t5_combined + (1-t5_strength) * t5_B_combined conditioning_0[0][0] = t5_strength * t5_0_1_collin + (1-t5_strength) * t5_1_0_collin conditioning_0[0][1]['pooled_output'] = conditioning_0[0][1]['pooled_output'] + clip_strength * (pooled_combined - conditioning_0[0][1]['pooled_output']) return (conditioning_0, ) class CLIPTextEncodeFluxUnguided: @classmethod def INPUT_TYPES(s): return {"required": { "clip": ("CLIP", ), "clip_l": ("STRING", {"multiline": True, "dynamicPrompts": True}), "t5xxl": ("STRING", {"multiline": True, "dynamicPrompts": True}), }} RETURN_NAMES = ("conditioning", "clip_l_end", "t5xxl_end",) RETURN_TYPES = ("CONDITIONING","INT","INT",) FUNCTION = "encode" CATEGORY = "RES4LYF/conditioning" def encode(self, clip, clip_l, t5xxl): tokens = clip.tokenize(clip_l) tokens["t5xxl"] = clip.tokenize(t5xxl)["t5xxl"] clip_l_end=0 for i in range(len(tokens['l'][0])): if tokens['l'][0][i][0] == 49407: clip_l_end=i break t5xxl_end=0 for i in range(len(tokens['l'][0])): # bug? should this be t5xxl? if tokens['t5xxl'][0][i][0] == 1: t5xxl_end=i break output = clip.encode_from_tokens(tokens, return_pooled=True, return_dict=True) cond = output.pop("cond") conditioning = [[cond, output]] conditioning[0][1]['clip_l_end'] = clip_l_end conditioning[0][1]['t5xxl_end'] = t5xxl_end return (conditioning, clip_l_end, t5xxl_end,) class StyleModelApplyAdvanced: @classmethod def INPUT_TYPES(s): return {"required": {"conditioning": ("CONDITIONING", ), "style_model": ("STYLE_MODEL", ), "clip_vision_output": ("CLIP_VISION_OUTPUT", ), "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.001}), }} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" DESCRIPTION = "Use with Flux Redux." def main(self, clip_vision_output, style_model, conditioning, strength=1.0): cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0) cond = strength * cond c = [] for t in conditioning: n = [torch.cat((t[0], cond), dim=1), t[1].copy()] c.append(n) return (c, ) class ConditioningZeroAndTruncate: # needs updating to ensure dims are correct for arbitrary models without hardcoding. # vanilla ConditioningZeroOut node doesn't truncate and SD3.5M degrades badly with large embeddings, even if zeroed out, as the negative conditioning @classmethod def INPUT_TYPES(s): return { "required": {"conditioning": ("CONDITIONING", )}} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "zero_out" CATEGORY = "RES4LYF/conditioning" DESCRIPTION = "Use for negative conditioning with SD3.5. ConditioningZeroOut does not truncate the embedding, \ which results in severe degradation of image quality with SD3.5 when the token limit is exceeded." def zero_out(self, conditioning): c = [] for t in conditioning: d = t[1].copy() pooled_output = d.get("pooled_output", None) if pooled_output is not None: d["pooled_output"] = torch.zeros((1,2048), dtype=t[0].dtype, device=t[0].device) n = [torch.zeros((1,154,4096), dtype=t[0].dtype, device=t[0].device), d] c.append(n) return (c, ) class ConditioningTruncate: # needs updating to ensure dims are correct for arbitrary models without hardcoding. @classmethod def INPUT_TYPES(s): return { "required": {"conditioning": ("CONDITIONING", )}} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "zero_out" CATEGORY = "RES4LYF/conditioning" DESCRIPTION = "Use for positive conditioning with SD3.5. Tokens beyond 77 result in degradation of image quality." def zero_out(self, conditioning): c = [] for t in conditioning: d = t[1].copy() pooled_output = d.get("pooled_output", None) if pooled_output is not None: d["pooled_output"] = d["pooled_output"][:, :2048] n = [t[0][:, :154, :4096], d] c.append(n) return (c, ) class ConditioningMultiply: @classmethod def INPUT_TYPES(s): return {"required": {"conditioning": ("CONDITIONING", ), "multiplier": ("FLOAT", {"default": 1.0, "min": -1000000000.0, "max": 1000000000.0, "step": 0.01}) }} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def main(self, conditioning, multiplier): c = multiply_nested_tensors(conditioning, multiplier) return (c,) class ConditioningAdd: @classmethod def INPUT_TYPES(s): return {"required": {"conditioning_1": ("CONDITIONING", ), "conditioning_2": ("CONDITIONING", ), "multiplier": ("FLOAT", {"default": 1.0, "min": -1000000000.0, "max": 1000000000.0, "step": 0.01}) }} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def main(self, conditioning_1, conditioning_2, multiplier): conditioning_1[0][0] += multiplier * conditioning_2[0][0] conditioning_1[0][1]['pooled_output'] += multiplier * conditioning_2[0][1]['pooled_output'] return (conditioning_1,) class ConditioningCombine: @classmethod def INPUT_TYPES(s): return {"required": {"conditioning_1": ("CONDITIONING", ), "conditioning_2": ("CONDITIONING", )}} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "combine" CATEGORY = "RES4LYF/conditioning" def combine(self, conditioning_1, conditioning_2): return (conditioning_1 + conditioning_2, ) class ConditioningAverage : @classmethod def INPUT_TYPES(s): return {"required": {"conditioning_to": ("CONDITIONING", ), "conditioning_from": ("CONDITIONING", ), "conditioning_to_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}) }} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "addWeighted" CATEGORY = "RES4LYF/conditioning" def addWeighted(self, conditioning_to, conditioning_from, conditioning_to_strength): out = [] if len(conditioning_from) > 1: RESplain("Warning: ConditioningAverage conditioning_from contains more than 1 cond, only the first one will actually be applied to conditioning_to.") cond_from = conditioning_from[0][0] pooled_output_from = conditioning_from[0][1].get("pooled_output", None) for i in range(len(conditioning_to)): t1 = conditioning_to[i][0] pooled_output_to = conditioning_to[i][1].get("pooled_output", pooled_output_from) t0 = cond_from[:,:t1.shape[1]] if t0.shape[1] < t1.shape[1]: t0 = torch.cat([t0] + [torch.zeros((1, (t1.shape[1] - t0.shape[1]), t1.shape[2]))], dim=1) tw = torch.mul(t1, conditioning_to_strength) + torch.mul(t0, (1.0 - conditioning_to_strength)) t_to = conditioning_to[i][1].copy() if pooled_output_from is not None and pooled_output_to is not None: t_to["pooled_output"] = torch.mul(pooled_output_to, conditioning_to_strength) + torch.mul(pooled_output_from, (1.0 - conditioning_to_strength)) elif pooled_output_from is not None: t_to["pooled_output"] = pooled_output_from n = [tw, t_to] out.append(n) return (out, ) class ConditioningSetTimestepRange: @classmethod def INPUT_TYPES(s): return {"required": {"conditioning": ("CONDITIONING", ), "start": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), "end": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001}) }} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "set_range" CATEGORY = "RES4LYF/conditioning" def set_range(self, conditioning, start, end): c = node_helpers.conditioning_set_values(conditioning, {"start_percent": start, "end_percent": end}) return (c, ) class ConditioningAverageScheduler: # don't think this is implemented correctly. needs to be reworked @classmethod def INPUT_TYPES(s): return { "required": { "conditioning_0": ("CONDITIONING", ), "conditioning_1": ("CONDITIONING", ), "ratio": ("SIGMAS", ), } } RETURN_TYPES = ("CONDITIONING",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" @staticmethod def addWeighted(conditioning_to, conditioning_from, conditioning_to_strength): #this function borrowed from comfyui out = [] if len(conditioning_from) > 1: RESplain("Warning: ConditioningAverage conditioning_from contains more than 1 cond, only the first one will actually be applied to conditioning_to.") cond_from = conditioning_from[0][0] pooled_output_from = conditioning_from[0][1].get("pooled_output", None) for i in range(len(conditioning_to)): t1 = conditioning_to[i][0] pooled_output_to = conditioning_to[i][1].get("pooled_output", pooled_output_from) t0 = cond_from[:,:t1.shape[1]] if t0.shape[1] < t1.shape[1]: t0 = torch.cat([t0] + [torch.zeros((1, (t1.shape[1] - t0.shape[1]), t1.shape[2]))], dim=1) tw = torch.mul(t1, conditioning_to_strength) + torch.mul(t0, (1.0 - conditioning_to_strength)) t_to = conditioning_to[i][1].copy() if pooled_output_from is not None and pooled_output_to is not None: t_to["pooled_output"] = torch.mul(pooled_output_to, conditioning_to_strength) + torch.mul(pooled_output_from, (1.0 - conditioning_to_strength)) elif pooled_output_from is not None: t_to["pooled_output"] = pooled_output_from n = [tw, t_to] out.append(n) return out @staticmethod def create_percent_array(steps): step_size = 1.0 / steps return [{"start_percent": i * step_size, "end_percent": (i + 1) * step_size} for i in range(steps)] def main(self, conditioning_0, conditioning_1, ratio): steps = len(ratio) percents = self.create_percent_array(steps) cond = [] for i in range(steps): average = self.addWeighted(conditioning_0, conditioning_1, ratio[i].item()) cond += node_helpers.conditioning_set_values(average, {"start_percent": percents[i]["start_percent"], "end_percent": percents[i]["end_percent"]}) return (cond,) class StableCascade_StageB_Conditioning64: @classmethod def INPUT_TYPES(s): return {"required": { "conditioning": ("CONDITIONING",), "stage_c": ("LATENT",), }} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "set_prior" CATEGORY = "RES4LYF/conditioning" @precision_tool.cast_tensor def set_prior(self, conditioning, stage_c): c = [] for t in conditioning: d = t[1].copy() d['stable_cascade_prior'] = stage_c['samples'] n = [t[0], d] c.append(n) return (c, ) class Conditioning_Recast64: @classmethod def INPUT_TYPES(s): return {"required": { "cond_0": ("CONDITIONING",), }, "optional": { "cond_1": ("CONDITIONING",),} } RETURN_TYPES = ("CONDITIONING","CONDITIONING",) RETURN_NAMES = ("cond_0_recast","cond_1_recast",) FUNCTION = "main" CATEGORY = "RES4LYF/precision" @precision_tool.cast_tensor def main(self, cond_0, cond_1 = None): cond_0[0][0] = cond_0[0][0].to(torch.float64) cond_0[0][1]["pooled_output"] = cond_0[0][1]["pooled_output"].to(torch.float64) if cond_1 is not None: cond_1[0][0] = cond_1[0][0].to(torch.float64) cond_1[0][1]["pooled_output"] = cond_1[0][1]["pooled_output"].to(torch.float64) return (cond_0, cond_1,) class ConditioningToBase64: @classmethod def INPUT_TYPES(s): return { "required": { "conditioning": ("CONDITIONING",), }, "hidden": { "unique_id": "UNIQUE_ID", "extra_pnginfo": "EXTRA_PNGINFO", }, } RETURN_TYPES = ("STRING",) FUNCTION = "notify" OUTPUT_NODE = True OUTPUT_IS_LIST = (True,) CATEGORY = "RES4LYF/utilities" def notify(self, unique_id=None, extra_pnginfo=None, conditioning=None): conditioning_pickle = pickle.dumps(conditioning) conditioning_base64 = base64.b64encode(conditioning_pickle).decode('utf-8') text = [conditioning_base64] if unique_id is not None and extra_pnginfo is not None: if not isinstance(extra_pnginfo, list): RESplain("Error: extra_pnginfo is not a list") elif ( not isinstance(extra_pnginfo[0], dict) or "workflow" not in extra_pnginfo[0] ): RESplain("Error: extra_pnginfo[0] is not a dict or missing 'workflow' key") else: workflow = extra_pnginfo[0]["workflow"] node = next( (x for x in workflow["nodes"] if str(x["id"]) == str(unique_id[0])), None, ) if node: node["widgets_values"] = [text] return {"ui": {"text": text}, "result": (text,)} class Base64ToConditioning: @classmethod def INPUT_TYPES(s): return { "required": { "data": ("STRING", {"default": ""}), } } RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/utilities" def main(self, data): conditioning_pickle = base64.b64decode(data) conditioning = pickle.loads(conditioning_pickle) return (conditioning,) class RegionalMask(torch.nn.Module): def __init__(self, mask: torch.Tensor, conditioning: torch.Tensor, conditioning_regional: torch.Tensor, latent:torch.Tensor, start_percent: float, end_percent: float, mask_type: str, img_len: int, text_len: int) -> None: super().__init__() #self.register_buffer('mask', mask) self.mask = mask.clone().to('cuda') self.conditioning = copy.deepcopy(conditioning) self.conditioning_regional = copy.deepcopy(conditioning_regional) self.latent = latent.clone() self.start_percent = start_percent self.end_percent = end_percent self.mask_type = mask_type self.img_len = img_len self.text_len = text_len def __call__(self, transformer_options, weight=0, dtype=torch.bfloat16, *args, **kwargs): sigma = transformer_options['sigmas'][0] if self.start_percent <= 1 - sigma < self.end_percent: if self.mask_type == "gradient": #mask = self.gen_mask(weight) return self.mask.clone().to(sigma.device) * weight """def gen_mask(self, weight): #FOR REGENERATION OF SELF-ATTN MASK b, c, h, w = self.latent.shape h //= 2 # 16x16 PE w //= 2 img_len = h * w cond_r = torch.cat([cond_reg['cond'] for cond_reg in self.conditioning_regional], dim=1) if self.conditioning is not None: text_len = 256 + cond_r.shape[1] # 256 = main prompt tokens... half of t5, comfy issue conditioning_regional = [ { 'mask': torch.ones((1, h, w), dtype=torch.bfloat16), 'cond': torch.ones((1, 256, 4096), dtype=torch.bfloat16), }, *self.conditioning_regional, ] else: text_len = cond_r.shape[1] # 256 = main prompt tokens... half of t5, comfy issue conditioning_regional = self.conditioning_regional all_attn_mask = torch.zeros((text_len+img_len, text_len+img_len), dtype=torch.bfloat16) self_attn_mask = torch.zeros(( img_len, img_len), dtype=torch.bfloat16) self_attn_mask_bkg = torch.zeros(( img_len, img_len), dtype=torch.bfloat16) prev_len = 0 for cond_reg_dict in conditioning_regional: #FOR REGENERATION OF SELF-ATTN MASK cond_reg = cond_reg_dict['cond'] region_mask_ = 1 - cond_reg_dict['mask'][0] region_mask_sq = cond_reg_dict['mask'][0].to(torch.bfloat16) img2txt_mask = torch.nn.functional.interpolate(region_mask_sq[None, None, :, :], (h, w), mode='nearest-exact').flatten().unsqueeze(1).repeat(1, cond_reg.size(1)) txt2img_mask = img2txt_mask.transpose(-1, -2) img2txt_mask_sq = torch.nn.functional.interpolate(region_mask_sq[None, None, :, :], (h, w), mode='nearest-exact').flatten().unsqueeze(1).repeat(1, self.img_len) #img2txt_mask_sq = img2txt_mask[:, :1].repeat(1, img_len) txt2img_mask_sq = img2txt_mask_sq.transpose(-1, -2) curr_len = prev_len + cond_reg.shape[1] #FOR REGENERATION OF SELF-ATTN MASK all_attn_mask[prev_len:curr_len, prev_len:curr_len] = 1.0 # self TXT 2 TXT all_attn_mask[prev_len:curr_len, text_len: ] = txt2img_mask # cross TXT 2 regional IMG all_attn_mask[text_len: , prev_len:curr_len] = img2txt_mask # cross regional IMG 2 TXT #all_attn_mask[text_len:, text_len:] = fp_or(all_attn_mask[text_len:, text_len:] , fp_and( img2txt_mask_sq, txt2img_mask_sq)) self_attn_mask = fp_or(self_attn_mask , fp_and( img2txt_mask_sq, txt2img_mask_sq)) self_attn_mask_bkg = fp_or(self_attn_mask_bkg, fp_and(img2txt_mask_sq.max()-img2txt_mask_sq, txt2img_mask_sq.max()-txt2img_mask_sq)) #self_attn_mask_bkg = fp_or(self_attn_mask_bkg, fp_and(1-img2txt_mask_sq, 1-txt2img_mask_sq)) prev_len = curr_len all_attn_mask[text_len:, text_len:] = fp_or(self_attn_mask, self_attn_mask_bkg) #combine foreground/background self-attn return all_attn_mask """ class RegionalConditioning(torch.nn.Module): def __init__(self, conditioning: torch.Tensor, region_cond: torch.Tensor, start_percent: float, end_percent: float) -> None: super().__init__() #self.register_buffer('region_cond', region_cond) self.conditioning = conditioning self.region_cond = region_cond.clone().to('cuda') self.start_percent = start_percent self.end_percent = end_percent def __call__(self, transformer_options, dtype=torch.bfloat16, *args, **kwargs): sigma = transformer_options['sigmas'][0] if self.start_percent <= 1 - sigma < self.end_percent: return self.region_cond.clone().to(sigma.device).to(dtype) return None def concat_cond(self, context, transformer_options, dtype=torch.bfloat16, *args, **kwargs): sigma = transformer_options['sigmas'][0] if self.start_percent <= 1 - sigma < self.end_percent: region_cond = self.region_cond.clone().to(sigma.device).to(dtype) if self.conditioning is None: return self.region_cond.clone().to(sigma.device).to(dtype) else: return torch.cat([context, region_cond.clone().to(torch.bfloat16)], dim=1) return None class FluxRegionalPrompt: @classmethod def INPUT_TYPES(s): return {"required": { "cond": ("CONDITIONING",), }, "optional": { "cond_regional": ("CONDITIONING_REGIONAL",), "mask": ("MASK",), }} RETURN_TYPES = ("CONDITIONING_REGIONAL","MASK",) RETURN_NAMES = ("cond_regional","mask_inv") FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def main(self, cond, mask, cond_regional=[]): cond_regional = [*cond_regional] cond_regional.append({'mask': mask, 'cond': cond[0][0]}) mask_inv = 1-mask return (cond_regional,mask_inv,) def fp_not(tensor): return 1 - tensor def fp_or(tensor1, tensor2): return torch.maximum(tensor1, tensor2) def fp_and(tensor1, tensor2): return torch.minimum(tensor1, tensor2) class RegionalGenerateConditioningsAndMasks: def __init__(self, conditioning, conditioning_regional, weight, start_percent, end_percent, mask_type): self.conditioning = conditioning self.conditioning_regional = conditioning_regional self.weight = weight self.start_percent = start_percent self.end_percent = end_percent self.mask_type = mask_type def __call__(self, latent): b, c, h, w = latent.shape h //= 2 # 16x16 PE w //= 2 img_len = h * w cond_r = torch.cat([cond_reg['cond'] for cond_reg in self.conditioning_regional], dim=1) if self.conditioning is not None: text_len = 256 + cond_r.shape[1] # 256 = main prompt tokens... half of t5, comfy issue conditioning_regional = [ { 'mask': torch.ones((1, h, w), dtype=torch.bfloat16), 'cond': torch.ones((1, 256, 4096), dtype=torch.bfloat16), }, *self.conditioning_regional, ] else: text_len = cond_r.shape[1] # 256 = main prompt tokens... half of t5, comfy issue conditioning_regional = self.conditioning_regional all_attn_mask = torch.zeros((text_len+img_len, text_len+img_len), dtype=torch.bfloat16) self_attn_mask = torch.zeros(( img_len, img_len), dtype=torch.bfloat16) self_attn_mask_bkg = torch.zeros(( img_len, img_len), dtype=torch.bfloat16) prev_len = 0 for cond_reg_dict in conditioning_regional: cond_reg = cond_reg_dict['cond'] region_mask = cond_reg_dict['mask'][0] img2txt_mask = torch.nn.functional.interpolate(region_mask[None, None, :, :], (h, w), mode='nearest-exact').flatten().unsqueeze(1).repeat(1, cond_reg.size(1)) txt2img_mask = img2txt_mask .transpose(-1, -2) img2txt_mask_sq = torch.nn.functional.interpolate(region_mask[None, None, :, :], (h, w), mode='nearest-exact').flatten().unsqueeze(1).repeat(1, img_len) txt2img_mask_sq = img2txt_mask_sq.transpose(-1, -2) curr_len = prev_len + cond_reg.shape[1] all_attn_mask[prev_len:curr_len, prev_len:curr_len] = 1.0 # self TXT 2 TXT all_attn_mask[prev_len:curr_len, text_len: ] = txt2img_mask # cross TXT 2 regional IMG all_attn_mask[text_len: , prev_len:curr_len] = img2txt_mask # cross regional IMG 2 TXT self_attn_mask = fp_or(self_attn_mask , fp_and( img2txt_mask_sq, txt2img_mask_sq)) self_attn_mask_bkg = fp_or(self_attn_mask_bkg, fp_and(img2txt_mask_sq.max()-img2txt_mask_sq, txt2img_mask_sq.max()-txt2img_mask_sq)) prev_len = curr_len all_attn_mask[text_len:, text_len:] = fp_or(self_attn_mask, self_attn_mask_bkg) #combine foreground/background self-attn all_attn_mask = RegionalMask(all_attn_mask, self.conditioning, self.conditioning_regional, latent, self.start_percent, self.end_percent, self.mask_type, img_len, text_len) regional_conditioning = RegionalConditioning(self.conditioning, cond_r, self.start_percent, self.end_percent) return regional_conditioning, all_attn_mask class FluxRegionalConditioning: @classmethod def INPUT_TYPES(s): return {"required": { "mask_weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "self_attn_floor": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "start_percent": ("FLOAT", {"default": 0, "min": 0.0, "max": 1.0, "step": 0.01}), "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), "mask_type": (["gradient"], {"default": "gradient"}), }, "optional": { "conditioning": ("CONDITIONING",), "conditioning_regional": ("CONDITIONING_REGIONAL",), "mask_weights": ("SIGMAS", ), "self_attn_floors": ("SIGMAS", ), }} RETURN_TYPES = ("CONDITIONING",) RETURN_NAMES = ("conditioning",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def main(self, conditioning_regional, mask_weight=1.0, start_percent=0.0, end_percent=1.0, start_step=0, end_step=10000, conditioning=None, mask_weights=None, self_attn_floors=None, self_attn_floor=0.0, mask_type="gradient", latent=None): weight, weights = mask_weight, mask_weights floor, floors = self_attn_floor, self_attn_floors default_dtype = torch.float64 max_steps = 10000 weights = initialize_or_scale(weights, weight, max_steps).to(default_dtype) weights = F.pad(weights, (0, max_steps), value=0.0) floors = initialize_or_scale(floors, floor, max_steps).to(default_dtype) floors = F.pad(floors, (0, max_steps), value=0.0) regional_generate_conditionings_and_masks_fn = RegionalGenerateConditioningsAndMasks(conditioning, conditioning_regional, weight, start_percent, end_percent, mask_type) if conditioning is None: conditioning = [ [ torch.zeros_like(conditioning_regional[0]['cond']), {'pooled_output': torch.zeros((1,768), dtype=conditioning_regional[0]['cond'].dtype, device=conditioning_regional[0]['cond'].device), } ], ] conditioning[0][1]['regional_generate_conditionings_and_masks_fn'] = regional_generate_conditionings_and_masks_fn conditioning[0][1]['regional_conditioning_weights'] = weights conditioning[0][1]['regional_conditioning_floors'] = floors return (copy.deepcopy(conditioning),) """ from .models import ReFluxPatcher class ClownRegionalConditioningFlux: @classmethod def INPUT_TYPES(s): return {"required": { "regional_model": (["auto", "deactivate"], {"default": "auto"}), "mask_weight": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "region_bleed": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "start_percent": ("FLOAT", {"default": 0, "min": 0.0, "max": 1.0, "step": 0.01}), "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), "mask_type": (["gradient"], {"default": "gradient"}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "model": ("MODEL", ), "positive_masked": ("CONDITIONING", ), "positive_unmasked": ("CONDITIONING", ), "mask": ("MASK", ), "mask_weights": ("SIGMAS", ), "region_bleeds": ("SIGMAS", ), }} RETURN_TYPES = ("MODEL", "CONDITIONING",) RETURN_NAMES = ("model", "positive",) FUNCTION = "main" CATEGORY = "RES4LYF/conditioning" def main(self, model, regional_model, mask_weight=1.0, start_percent=0.0, end_percent=1.0, positive_masked=None, positive_unmasked=None, mask_weights=None, region_bleeds=None, region_bleed=0.0, mask_type="gradient", mask=None, invert_mask=False): if regional_model == "auto": reflux_enable = True else: model, = ReFluxPatcher().main(model, enable=False) return (model, positive_masked,) if invert_mask and mask is not None: mask = 1-mask weight, weights = mask_weight, mask_weights floor, floors = region_bleed, region_bleeds default_dtype = torch.float64 max_steps = 10000 weights = initialize_or_scale(weights, weight, max_steps).to(default_dtype) weights = F.pad(weights, (0, max_steps), value=0.0) floors = initialize_or_scale(floors, floor, max_steps).to(default_dtype) floors = F.pad(floors, (0, max_steps), value=0.0) if (positive_masked is None) and (positive_unmasked is None): positive = None reflux_enable = False elif mask is not None: if regional_model == "auto": reflux_enable = True else: reflux_enable = False if positive_unmasked is None: if positive_unmasked is None: positive_unmasked = [[ torch.zeros((1, 256, 4096)), {'pooled_output': torch.zeros((1, 768))} ]] cond_regional, mask_inv = FluxRegionalPrompt().main(cond=positive_masked, mask=mask) cond_regional, mask_inv_inv = FluxRegionalPrompt().main(cond=positive_unmasked , cond_regional=cond_regional, mask=mask_inv) positive, = FluxRegionalConditioning().main(conditioning_regional=cond_regional, self_attn_floor=floor, self_attn_floors=floors, mask_weight=weight, mask_weights=weights, start_percent=start_percent, end_percent=end_percent, mask_type=mask_type) else: positive = positive_masked reflux_enable = False if not reflux_enable: model, = ReFluxPatcher().main(model, enable=False) return (model, positive_masked,) else: model, = ReFluxPatcher().main(model, enable=True) return (model, positive,) """ ================================================ FILE: legacy/constants.py ================================================ MAX_STEPS = 10000 IMPLICIT_TYPE_NAMES = [ "predictor-corrector", "rebound", "retro-eta", "bongmath", ] ================================================ FILE: legacy/deis_coefficients.py ================================================ # Adapted from: https://github.com/zju-pi/diff-sampler/blob/main/gits-main/solver_utils.py # fixed the calcs for "rhoab" which suffered from an off-by-one error and made some other minor corrections import torch import numpy as np # A pytorch reimplementation of DEIS (https://github.com/qsh-zh/deis). ############################# ### Utils for DEIS solver ### ############################# #---------------------------------------------------------------------------- # Transfer from the input time (sigma) used in EDM to that (t) used in DEIS. def edm2t(edm_steps, epsilon_s=1e-3, sigma_min=0.002, sigma_max=80): vp_sigma = lambda beta_d, beta_min: lambda t: (np.e ** (0.5 * beta_d * (t ** 2) + beta_min * t) - 1) ** 0.5 vp_sigma_inv = lambda beta_d, beta_min: lambda sigma: ((beta_min ** 2 + 2 * beta_d * (sigma ** 2 + 1).log()).sqrt() - beta_min) / beta_d vp_beta_d = 2 * (np.log(torch.tensor(sigma_min).cpu() ** 2 + 1) / epsilon_s - np.log(torch.tensor(sigma_max).cpu() ** 2 + 1)) / (epsilon_s - 1) vp_beta_min = np.log(torch.tensor(sigma_max).cpu() ** 2 + 1) - 0.5 * vp_beta_d t_steps = vp_sigma_inv(vp_beta_d.clone().detach().cpu(), vp_beta_min.clone().detach().cpu())(edm_steps.clone().detach().cpu()) return t_steps, vp_beta_min, vp_beta_d + vp_beta_min #---------------------------------------------------------------------------- def cal_poly(prev_t, j, taus): poly = 1 for k in range(prev_t.shape[0]): if k == j: continue poly *= (taus - prev_t[k]) / (prev_t[j] - prev_t[k]) return poly #---------------------------------------------------------------------------- # Transfer from t to alpha_t. def t2alpha_fn(beta_0, beta_1, t): return torch.exp(-0.5 * t ** 2 * (beta_1 - beta_0) - t * beta_0) #---------------------------------------------------------------------------- def cal_integrand(beta_0, beta_1, taus): with torch.inference_mode(mode=False): taus = taus.clone() beta_0 = beta_0.clone() beta_1 = beta_1.clone() with torch.enable_grad(): taus.requires_grad_(True) alpha = t2alpha_fn(beta_0, beta_1, taus) log_alpha = alpha.log() log_alpha.sum().backward() d_log_alpha_dtau = taus.grad integrand = -0.5 * d_log_alpha_dtau / torch.sqrt(alpha * (1 - alpha)) return integrand #---------------------------------------------------------------------------- def get_deis_coeff_list(t_steps, max_order, N=10000, deis_mode='tab'): """ Get the coefficient list for DEIS sampling. Args: t_steps: A pytorch tensor. The time steps for sampling. max_order: A `int`. Maximum order of the solver. 1 <= max_order <= 4 N: A `int`. Use how many points to perform the numerical integration when deis_mode=='tab'. deis_mode: A `str`. Select between 'tab' and 'rhoab'. Type of DEIS. Returns: A pytorch tensor. A batch of generated samples or sampling trajectories if return_inters=True. """ if deis_mode == 'tab': t_steps, beta_0, beta_1 = edm2t(t_steps) C = [] for i, (t_cur, t_next) in enumerate(zip(t_steps[:-1], t_steps[1:])): order = min(i+1, max_order) if order == 1: C.append([]) else: taus = torch.linspace(t_cur, t_next, N) # split the interval for integral approximation dtau = (t_next - t_cur) / N prev_t = t_steps[[i - k for k in range(order)]] coeff_temp = [] integrand = cal_integrand(beta_0, beta_1, taus) for j in range(order): poly = cal_poly(prev_t, j, taus) coeff_temp.append(torch.sum(integrand * poly) * dtau) C.append(coeff_temp) elif deis_mode == 'rhoab': # Analytical solution, second order def get_def_integral_2(a, b, start, end, c): coeff = (end**3 - start**3) / 3 - (end**2 - start**2) * (a + b) / 2 + (end - start) * a * b return coeff / ((c - a) * (c - b)) # Analytical solution, third order def get_def_integral_3(a, b, c, start, end, d): coeff = (end**4 - start**4) / 4 - (end**3 - start**3) * (a + b + c) / 3 \ + (end**2 - start**2) * (a*b + a*c + b*c) / 2 - (end - start) * a * b * c return coeff / ((d - a) * (d - b) * (d - c)) C = [] for i, (t_cur, t_next) in enumerate(zip(t_steps[:-1], t_steps[1:])): order = min(i+1, max_order) #fixed order calcs if order == 1: C.append([]) else: prev_t = t_steps[[i - k for k in range(order+1)]] if order == 2: coeff_cur = ((t_next - prev_t[1])**2 - (t_cur - prev_t[1])**2) / (2 * (t_cur - prev_t[1])) coeff_prev1 = (t_next - t_cur)**2 / (2 * (prev_t[1] - t_cur)) coeff_temp = [coeff_cur, coeff_prev1] elif order == 3: coeff_cur = get_def_integral_2(prev_t[1], prev_t[2], t_cur, t_next, t_cur) coeff_prev1 = get_def_integral_2(t_cur, prev_t[2], t_cur, t_next, prev_t[1]) coeff_prev2 = get_def_integral_2(t_cur, prev_t[1], t_cur, t_next, prev_t[2]) coeff_temp = [coeff_cur, coeff_prev1, coeff_prev2] elif order == 4: coeff_cur = get_def_integral_3(prev_t[1], prev_t[2], prev_t[3], t_cur, t_next, t_cur) coeff_prev1 = get_def_integral_3(t_cur, prev_t[2], prev_t[3], t_cur, t_next, prev_t[1]) coeff_prev2 = get_def_integral_3(t_cur, prev_t[1], prev_t[3], t_cur, t_next, prev_t[2]) coeff_prev3 = get_def_integral_3(t_cur, prev_t[1], prev_t[2], t_cur, t_next, prev_t[3]) coeff_temp = [coeff_cur, coeff_prev1, coeff_prev2, coeff_prev3] C.append(coeff_temp) return C ================================================ FILE: legacy/flux/controlnet.py ================================================ #Original code can be found on: https://github.com/XLabs-AI/x-flux/blob/main/src/flux/controlnet.py #modified to support different types of flux controlnets import torch import math from torch import Tensor, nn from einops import rearrange, repeat from .layers import (DoubleStreamBlock, EmbedND, LastLayer, MLPEmbedder, SingleStreamBlock, timestep_embedding) from .model import Flux import comfy.ldm.common_dit class MistolineCondDownsamplBlock(nn.Module): def __init__(self, dtype=None, device=None, operations=None): super().__init__() self.encoder = nn.Sequential( operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device) ) def forward(self, x): return self.encoder(x) class MistolineControlnetBlock(nn.Module): def __init__(self, hidden_size, dtype=None, device=None, operations=None): super().__init__() self.linear = operations.Linear(hidden_size, hidden_size, dtype=dtype, device=device) self.act = nn.SiLU() def forward(self, x): return self.act(self.linear(x)) class ControlNetFlux(Flux): def __init__(self, latent_input=False, num_union_modes=0, mistoline=False, control_latent_channels=None, image_model=None, dtype=None, device=None, operations=None, **kwargs): super().__init__(final_layer=False, dtype=dtype, device=device, operations=operations, **kwargs) self.main_model_double = 19 self.main_model_single = 38 self.mistoline = mistoline # add ControlNet blocks if self.mistoline: control_block = lambda : MistolineControlnetBlock(self.hidden_size, dtype=dtype, device=device, operations=operations) else: control_block = lambda : operations.Linear(self.hidden_size, self.hidden_size, dtype=dtype, device=device) self.controlnet_blocks = nn.ModuleList([]) for _ in range(self.params.depth): self.controlnet_blocks.append(control_block()) self.controlnet_single_blocks = nn.ModuleList([]) for _ in range(self.params.depth_single_blocks): self.controlnet_single_blocks.append(control_block()) self.num_union_modes = num_union_modes self.controlnet_mode_embedder = None if self.num_union_modes > 0: self.controlnet_mode_embedder = operations.Embedding(self.num_union_modes, self.hidden_size, dtype=dtype, device=device) self.gradient_checkpointing = False self.latent_input = latent_input if control_latent_channels is None: control_latent_channels = self.in_channels else: control_latent_channels *= 2 * 2 #patch size self.pos_embed_input = operations.Linear(control_latent_channels, self.hidden_size, bias=True, dtype=dtype, device=device) if not self.latent_input: if self.mistoline: self.input_cond_block = MistolineCondDownsamplBlock(dtype=dtype, device=device, operations=operations) else: self.input_hint_block = nn.Sequential( operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), nn.SiLU(), operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device) ) def forward_orig( self, img: Tensor, img_ids: Tensor, controlnet_cond: Tensor, txt: Tensor, txt_ids: Tensor, timesteps: Tensor, y: Tensor, guidance: Tensor = None, control_type: Tensor = None, ) -> Tensor: if img.ndim != 3 or txt.ndim != 3: raise ValueError("Input img and txt tensors must have 3 dimensions.") # running on sequences img img = self.img_in(img) controlnet_cond = self.pos_embed_input(controlnet_cond) img = img + controlnet_cond vec = self.time_in(timestep_embedding(timesteps, 256)) if self.params.guidance_embed: vec = vec + self.guidance_in(timestep_embedding(guidance, 256)) vec = vec + self.vector_in(y) txt = self.txt_in(txt) if self.controlnet_mode_embedder is not None and len(control_type) > 0: control_cond = self.controlnet_mode_embedder(torch.tensor(control_type, device=img.device), out_dtype=img.dtype).unsqueeze(0).repeat((txt.shape[0], 1, 1)) txt = torch.cat([control_cond, txt], dim=1) txt_ids = torch.cat([txt_ids[:,:1], txt_ids], dim=1) ids = torch.cat((txt_ids, img_ids), dim=1) pe = self.pe_embedder(ids) controlnet_double = () for i in range(len(self.double_blocks)): img, txt = self.double_blocks[i](img=img, txt=txt, vec=vec, pe=pe) controlnet_double = controlnet_double + (self.controlnet_blocks[i](img),) img = torch.cat((txt, img), 1) controlnet_single = () for i in range(len(self.single_blocks)): img = self.single_blocks[i](img, vec=vec, pe=pe) controlnet_single = controlnet_single + (self.controlnet_single_blocks[i](img[:, txt.shape[1] :, ...]),) repeat = math.ceil(self.main_model_double / len(controlnet_double)) if self.latent_input: out_input = () for x in controlnet_double: out_input += (x,) * repeat else: out_input = (controlnet_double * repeat) out = {"input": out_input[:self.main_model_double]} if len(controlnet_single) > 0: repeat = math.ceil(self.main_model_single / len(controlnet_single)) out_output = () if self.latent_input: for x in controlnet_single: out_output += (x,) * repeat else: out_output = (controlnet_single * repeat) out["output"] = out_output[:self.main_model_single] return out def forward(self, x, timesteps, context, y, guidance=None, hint=None, **kwargs): patch_size = 2 if self.latent_input: hint = comfy.ldm.common_dit.pad_to_patch_size(hint, (patch_size, patch_size)) elif self.mistoline: hint = hint * 2.0 - 1.0 hint = self.input_cond_block(hint) else: hint = hint * 2.0 - 1.0 hint = self.input_hint_block(hint) hint = rearrange(hint, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) bs, c, h, w = x.shape x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size)) img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) h_len = ((h + (patch_size // 2)) // patch_size) w_len = ((w + (patch_size // 2)) // patch_size) img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype) img_ids[..., 1] = img_ids[..., 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype)[:, None] img_ids[..., 2] = img_ids[..., 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype)[None, :] img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype) return self.forward_orig(img, img_ids, hint, context, txt_ids, timesteps, y, guidance, control_type=kwargs.get("control_type", [])) ================================================ FILE: legacy/flux/layers.py ================================================ # Adapted from: https://github.com/black-forest-labs/flux import math import torch from torch import Tensor, nn import torch.nn.functional as F from einops import rearrange from torch import Tensor from dataclasses import dataclass from .math import attention, rope, apply_rope import comfy.ldm.common_dit class EmbedND(nn.Module): def __init__(self, dim: int, theta: int, axes_dim: list): super().__init__() self.dim = dim self.theta = theta self.axes_dim = axes_dim def forward(self, ids: Tensor) -> Tensor: n_axes = ids.shape[-1] emb = torch.cat( [rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(n_axes)], dim=-3, ) return emb.unsqueeze(1) def attention_weights(q, k): # implementation of in-place softmax to reduce memory req scores = torch.matmul(q, k.transpose(-2, -1)) scores.div_(math.sqrt(q.size(-1))) torch.exp(scores, out=scores) summed = torch.sum(scores, dim=-1, keepdim=True) scores /= summed return scores.nan_to_num_(0.0, 65504., -65504.) def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 1000.0): """ Create sinusoidal timestep embeddings. :param t: a 1-D Tensor of N indices, one per batch element. These may be fractional. :param dim: the dimension of the output. :param max_period: controls the minimum frequency of the embeddings. :return: an (N, D) Tensor of positional embeddings. """ t = time_factor * t half = dim // 2 freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=t.device) / half) args = t[:, None].float() * freqs[None] embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) if dim % 2: embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) if torch.is_floating_point(t): embedding = embedding.to(t) return embedding class MLPEmbedder(nn.Module): def __init__(self, in_dim: int, hidden_dim: int, dtype=None, device=None, operations=None): super().__init__() self.in_layer = operations.Linear( in_dim, hidden_dim, bias=True, dtype=dtype, device=device) self.silu = nn.SiLU() self.out_layer = operations.Linear(hidden_dim, hidden_dim, bias=True, dtype=dtype, device=device) def forward(self, x: Tensor) -> Tensor: return self.out_layer(self.silu(self.in_layer(x))) class RMSNorm(torch.nn.Module): def __init__(self, dim: int, dtype=None, device=None, operations=None): super().__init__() self.scale = nn.Parameter(torch.empty((dim), dtype=dtype, device=device)) # self.scale.shape = 128 def forward(self, x: Tensor): return comfy.ldm.common_dit.rms_norm(x, self.scale, 1e-6) class QKNorm(torch.nn.Module): def __init__(self, dim: int, dtype=None, device=None, operations=None): super().__init__() self.query_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations) self.key_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations) def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple: q = self.query_norm(q) k = self.key_norm(k) return q.to(v), k.to(v) class SelfAttention(nn.Module): def __init__(self, dim: int, num_heads: int = 8, qkv_bias: bool = False, dtype=None, device=None, operations=None): super().__init__() self.num_heads = num_heads # 24 head_dim = dim // num_heads # 128 = 3072 / 24 self.qkv = operations.Linear(dim, dim * 3, bias=qkv_bias, dtype=dtype, device=device) self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations) self.proj = operations.Linear(dim, dim, dtype=dtype, device=device) # dim is usually 3072 @dataclass class ModulationOut: shift: Tensor scale: Tensor gate: Tensor class Modulation(nn.Module): def __init__(self, dim: int, double: bool, dtype=None, device=None, operations=None): super().__init__() self.is_double = double self.multiplier = 6 if double else 3 self.lin = operations.Linear(dim, self.multiplier * dim, bias=True, dtype=dtype, device=device) def forward(self, vec: Tensor) -> tuple: out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(self.multiplier, dim=-1) return (ModulationOut(*out[:3]), ModulationOut(*out[3:]) if self.is_double else None,) class DoubleStreamBlock(nn.Module): def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, dtype=None, device=None, operations=None, idx=-1): super().__init__() self.idx = idx mlp_hidden_dim = int(hidden_size * mlp_ratio) self.num_heads = num_heads self.hidden_size = hidden_size self.img_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations) # in_features=3072, out_features=18432 (3072*6) self.txt_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations) # in_features=3072, out_features=18432 (3072*6) self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) # .qkv: in_features=3072, out_features=9216 .proj: 3072,3072 self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations) # .qkv: in_features=3072, out_features=9216 .proj: 3072,3072 self.img_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.txt_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.img_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.txt_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.img_mlp = nn.Sequential( operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device), nn.GELU(approximate="tanh"), operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device), ) # 3072->12288, 12288->3072 (3072*4) self.txt_mlp = nn.Sequential( operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device), nn.GELU(approximate="tanh"), operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device), ) # 3072->12288, 12288->3072 (3072*4) def img_attn_preproc(self, img, img_mod1): img_modulated = self.img_norm1(img) img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift img_qkv = self.img_attn.qkv(img_modulated) img_q, img_k, img_v = rearrange(img_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads) img_q, img_k = self.img_attn.norm(img_q, img_k, img_v) return img_q, img_k, img_v def txt_attn_preproc(self, txt, txt_mod1): txt_modulated = self.txt_norm1(txt) txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift txt_qkv = self.txt_attn.qkv(txt_modulated) txt_q, txt_k, txt_v = rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads) # Batch SeqLen (9216==3*3072) -> 3*1 24 SeqLen 128 txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v) return txt_q, txt_k, txt_v def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, timestep, transformer_options={}, mask=None, weight=1): # vec 1,3072 img_mod1, img_mod2 = self.img_mod(vec) # -> 3072, 3072 txt_mod1, txt_mod2 = self.txt_mod(vec) img_q, img_k, img_v = self.img_attn_preproc(img, img_mod1) txt_q, txt_k, txt_v = self.txt_attn_preproc(txt, txt_mod1) q, k, v = torch.cat((txt_q, img_q), dim=2), torch.cat((txt_k, img_k), dim=2), torch.cat((txt_v, img_v), dim=2) """if mask is None: attn = attention(q, k, v, pe=pe) else: attn_false = attention(q, k, v, pe=pe) attn = attention(q, k, v, pe=pe, mask=mask.to(torch.bool)) attn = attn_false + weight * (attn - attn_false)""" #I = torch.eye(q.shape[-2], q.shape[-2], dtype=q.dtype, device=q.device).expand((1,1) + (-1, -1)) #attn_map = attention_weights(q, k) """mask_resized = None if mask is not None: txt_a = txt[:,:,:] txt_qa, txt_ka, txt_va = self.txt_attn_preproc(txt_a, txt_mod1) txt_q_rope, txt_k_rope = apply_rope(txt_q, txt_k, pe[:,:,:512,:,:]) img_q_rope, img_k_rope = apply_rope(img_q, img_k, pe[:,:,512:,:,:]) attn_weights = attention_weights(txt_q_rope, img_k_rope) attn_weights = attn_weights.permute(0,1,3,2) attn_weights_slice = attn_weights[:,:,:,:] test = attn_weights_slice.mean(dim=1) test2 = rearrange(test, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=64, w=64, ph=1, pw=1) test3 = test2.mean(dim=1) mask_resized = F.interpolate(test3[None,:,:,:], size=(1024,1024), mode='bilinear', align_corners=False).squeeze(1)""" attn = attention(q, k, v, pe=pe, mask=mask) txt_attn = attn[:, :txt.shape[1]] # 1, 768,3072 img_attn = attn[:, txt.shape[1]:] img += img_mod1.gate * self.img_attn.proj(img_attn) txt += txt_mod1.gate * self.txt_attn.proj(txt_attn) img += img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift) txt += txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift) return img, txt #, mask_resized class SingleStreamBlock(nn.Module): """ A DiT block with parallel linear layers as described in https://arxiv.org/abs/2302.05442 and adapted modulation interface. """ def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float = 4.0, qk_scale: float = None, dtype=None, device=None, operations=None, idx=-1): super().__init__() self.idx = idx self.hidden_dim = hidden_size #3072 self.num_heads = num_heads #24 head_dim = hidden_size // num_heads self.scale = qk_scale or head_dim**-0.5 #0.08838834764831845 self.mlp_hidden_dim = int(hidden_size * mlp_ratio) #12288 == 3072 * 4 # qkv and mlp_in self.linear1 = operations.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim, dtype=dtype, device=device) # proj and mlp_out self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, dtype=dtype, device=device) self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations) self.hidden_size = hidden_size #3072 self.pre_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.mlp_act = nn.GELU(approximate="tanh") self.modulation = Modulation(hidden_size, double=False, dtype=dtype, device=device, operations=operations) def img_attn(self, img, mod, pe, mask, weight): img_mod = (1 + mod.scale) * self.pre_norm(img) + mod.shift # mod => vec qkv, mlp = torch.split(self.linear1(img_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1) q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads) q, k = self.norm(q, k, v) """if mask is None: attn = attention(q, k, v, pe=pe) else: attn_false = attention(q, k, v, pe=pe) attn = attention(q, k, v, pe=pe, mask=mask.to(torch.bool)) attn = attn_false + weight * (attn - attn_false)""" attn = attention(q, k, v, pe=pe, mask=mask) return attn, mlp # vec 1,3072 x 1,9984,3072 def forward(self, img: Tensor, vec: Tensor, pe: Tensor, timestep, transformer_options={}, mask=None, weight=1) -> Tensor: # x 1,9984,3072 if 2 reg embeds, 1,9472,3072 if none # 9216x4096 = 16x1536x1536 mod, _ = self.modulation(vec) attn, mlp = self.img_attn(img, mod, pe, mask, weight) output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2)) img += mod.gate * output return img class LastLayer(nn.Module): def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None): super().__init__() self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.linear = operations.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device) self.adaLN_modulation = nn.Sequential(nn.SiLU(), operations.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device)) def forward(self, x: Tensor, vec: Tensor) -> Tensor: shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1) x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :] x = self.linear(x) return x ================================================ FILE: legacy/flux/math.py ================================================ import torch from einops import rearrange from torch import Tensor from comfy.ldm.modules.attention import optimized_attention import comfy.model_management def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor: q, k = apply_rope(q, k, pe) heads = q.shape[1] x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask) return x def rope(pos: Tensor, dim: int, theta: int) -> Tensor: assert dim % 2 == 0 if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu(): device = torch.device("cpu") else: device = pos.device scale = torch.linspace(0, (dim - 2) / dim, steps=dim//2, dtype=torch.float64, device=device) omega = 1.0 / (theta**scale) out = torch.einsum("...n,d->...nd", pos.to(dtype=torch.float32, device=device), omega) out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1) out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2) return out.to(dtype=torch.float32, device=pos.device) def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor): xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2) xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2) xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1] return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) ================================================ FILE: legacy/flux/model.py ================================================ # Adapted from: https://github.com/black-forest-labs/flux import torch from torch import Tensor, nn from dataclasses import dataclass import copy from .layers import ( DoubleStreamBlock, EmbedND, LastLayer, MLPEmbedder, SingleStreamBlock, timestep_embedding, ) from comfy.ldm.flux.layers import timestep_embedding from comfy.ldm.flux.model import Flux as Flux from einops import rearrange, repeat import comfy.ldm.common_dit @dataclass class FluxParams: in_channels: int out_channels: int vec_in_dim: int context_in_dim: int hidden_size: int mlp_ratio: float num_heads: int depth: int depth_single_blocks: int axes_dim: list theta: int patch_size: int qkv_bias: bool guidance_embed: bool class ReFlux(Flux): def __init__(self, image_model=None, final_layer=True, dtype=None, device=None, operations=None, **kwargs): super().__init__() self.dtype = dtype self.timestep = -1.0 self.threshold_inv = False params = FluxParams(**kwargs) self.params = params #self.params FluxParams(in_channels=16, out_channels=16, vec_in_dim=768, context_in_dim=4096, hidden_size=3072, mlp_ratio=4.0, num_heads=24, depth=19, depth_single_blocks=38, axes_dim=[16, 56, 56], theta=10000, patch_size=2, qkv_bias=True, guidance_embed=False) self.patch_size = params.patch_size self.in_channels = params.in_channels * params.patch_size * params.patch_size # in_channels 64 self.out_channels = params.out_channels * params.patch_size * params.patch_size # out_channels 64 if params.hidden_size % params.num_heads != 0: raise ValueError(f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}") pe_dim = params.hidden_size // params.num_heads if sum(params.axes_dim) != pe_dim: raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}") self.hidden_size = params.hidden_size # 3072 self.num_heads = params.num_heads # 24 self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim) self.img_in = operations.Linear( self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device) # in_features= 64, out_features=3072 self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, dtype=dtype, device=device) # in_features=4096, out_features=3072, bias=True self.time_in = MLPEmbedder( in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations) self.vector_in = MLPEmbedder(params.vec_in_dim, self.hidden_size, dtype=dtype, device=device, operations=operations) # in_features=768, out_features=3072 (first layer) second layer 3072,3072 self.guidance_in = (MLPEmbedder( in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations) if params.guidance_embed else nn.Identity()) self.double_blocks = nn.ModuleList([DoubleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, qkv_bias=params.qkv_bias, dtype=dtype, device=device, operations=operations, idx=_) for _ in range(params.depth)]) self.single_blocks = nn.ModuleList([SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, dtype=dtype, device=device, operations=operations, idx=_) for _ in range(params.depth_single_blocks)]) if final_layer: self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels, dtype=dtype, device=device, operations=operations) def forward_blocks(self, img: Tensor, img_ids: Tensor, txt: Tensor, txt_ids: Tensor, timesteps: Tensor, y: Tensor, guidance: Tensor = None, control=None, transformer_options = {},) -> Tensor: if img.ndim != 3 or txt.ndim != 3: raise ValueError("Input img and txt tensors must have 3 dimensions.") # running on sequences img img = self.img_in(img) # 1,9216,64 == 768x192 # 1,9216,64 == 1,16,128,256 + 1,16,64,64 # 1,8192,64 with uncond/cond #:,:,64 -> :,:,3072 vec = self.time_in(timestep_embedding(timesteps, 256).to(img.dtype)) # 1 -> 1,3072 if self.params.guidance_embed: if guidance is None: print("Guidance strength is none, not using distilled guidance.") else: vec = vec + self.guidance_in(timestep_embedding(guidance, 256).to(img.dtype)) vec = vec + self.vector_in(y) #y.shape=1,768 y==all 0s txt = self.txt_in(txt) # ids = torch.cat((txt_ids, img_ids), dim=1) # img_ids.shape=1,8192,3 txt_ids.shape=1,512,3 #ids.shape=1,8704,3 pe = self.pe_embedder(ids) # pe.shape 1,1,8704,64,2,2 weight = transformer_options['reg_cond_weight'] if 'reg_cond_weight' in transformer_options else 0.0 floor = transformer_options['reg_cond_floor'] if 'reg_cond_floor' in transformer_options else 0.0 mask_orig, mask_self = None, None mask_obj = transformer_options.get('patches', {}).get('regional_conditioning_mask', None) if mask_obj is not None and weight >= 0: mask_orig = mask_obj[0](transformer_options, weight.item()) mask_self = mask_orig.clone() mask_self[mask_obj[0].text_len:, mask_obj[0].text_len:] = mask_self.max() mask_resized_list = [] mask = None mask_obj = transformer_options.get('patches', {}).get('regional_conditioning_mask', None) if mask_obj is not None and weight >= 0: mask = mask_obj[0](transformer_options, weight.item()) text_len = mask_obj[0].text_len mask[text_len:,text_len:] = torch.clamp(mask[text_len:,text_len:], min=floor.to(mask.device)) for i, block in enumerate(self.double_blocks): #img, txt, mask_resized = block(img=img, txt=txt, vec=vec, pe=pe, timestep=timesteps, transformer_options=transformer_options, mask=mask, weight=weight) #, mask=mask) img, txt = block(img=img, txt=txt, vec=vec, pe=pe, timestep=timesteps, transformer_options=transformer_options, mask=mask, weight=weight) #, mask=mask) #if mask is not None: # mask_resized_list.append(mask_resized) if control is not None: # Controlnet control_i = control.get("input") if i < len(control_i): add = control_i[i] if add is not None: img[:1] += add img = torch.cat((txt, img), 1) #first 256 is txt embed for i, block in enumerate(self.single_blocks): img = block(img, vec=vec, pe=pe, timestep=timesteps, transformer_options=transformer_options, mask=mask, weight=weight) if control is not None: # Controlnet control_o = control.get("output") if i < len(control_o): add = control_o[i] if add is not None: img[:1, txt.shape[1] :, ...] += add img = img[:, txt.shape[1] :, ...] img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels) 1,8192,3072 -> 1,8192,64 return img def _get_img_ids(self, x, bs, h_len, w_len, h_start, h_end, w_start, w_end): img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype) img_ids[..., 1] = img_ids[..., 1] + torch.linspace(h_start, h_end - 1, steps=h_len, device=x.device, dtype=x.dtype)[:, None] img_ids[..., 2] = img_ids[..., 2] + torch.linspace(w_start, w_end - 1, steps=w_len, device=x.device, dtype=x.dtype)[None, :] img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) return img_ids def forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs): out_list = [] for i in range(len(transformer_options['cond_or_uncond'])): UNCOND = transformer_options['cond_or_uncond'][i] == 1 bs, c, h, w = x.shape transformer_options['original_shape'] = x.shape patch_size = 2 x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size)) # 1,16,192,192 transformer_options['patch_size'] = patch_size #if 'regional_conditioning_weight' not in transformer_options: # this breaks the graph # transformer_options['regional_conditioning_weight'] = timestep[0] / 1.5 h_len = ((h + (patch_size // 2)) // patch_size) # h_len 96 w_len = ((w + (patch_size // 2)) // patch_size) # w_len 96 img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) # img 1,9216,64 if UNCOND: transformer_options['reg_cond_weight'] = -1 context_tmp = context[i][None,...].clone() elif UNCOND == False: transformer_options['reg_cond_weight'] = transformer_options['regional_conditioning_weight'] transformer_options['reg_cond_floor'] = transformer_options['regional_conditioning_floor'] #if "regional_conditioning_floor" in transformer_options else 0.0 regional_conditioning_positive = transformer_options.get('patches', {}).get('regional_conditioning_positive', None) context_tmp = regional_conditioning_positive[0].concat_cond(context[i][None,...], transformer_options) txt_ids = torch.zeros((bs, context_tmp.shape[1], 3), device=x.device, dtype=x.dtype) # txt_ids 1, 256,3 img_ids_orig = self._get_img_ids(x, bs, h_len, w_len, 0, h_len, 0, w_len) # img_ids_orig = 1,9216,3 out_tmp = self.forward_blocks(img [i][None,...].clone(), img_ids_orig[i][None,...].clone(), context_tmp, txt_ids [i][None,...].clone(), timestep [i][None,...].clone(), y [i][None,...].clone(), guidance [i][None,...].clone(), control, transformer_options=transformer_options) # context 1,256,4096 y 1,768 out_list.append(out_tmp) out = torch.stack(out_list, dim=0).squeeze(dim=1) return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w] ================================================ FILE: legacy/flux/redux.py ================================================ import torch import comfy.ops ops = comfy.ops.manual_cast class ReduxImageEncoder(torch.nn.Module): def __init__( self, redux_dim: int = 1152, txt_in_features: int = 4096, device=None, dtype=None, ) -> None: super().__init__() self.redux_dim = redux_dim self.device = device self.dtype = dtype self.redux_up = ops.Linear(redux_dim, txt_in_features * 3, dtype=dtype) self.redux_down = ops.Linear(txt_in_features * 3, txt_in_features, dtype=dtype) def forward(self, sigclip_embeds) -> torch.Tensor: projected_x = self.redux_down(torch.nn.functional.silu(self.redux_up(sigclip_embeds))) return projected_x ================================================ FILE: legacy/helper.py ================================================ import re import torch from comfy.samplers import SCHEDULER_NAMES import torch.nn.functional as F from ..res4lyf import RESplain def get_extra_options_kv(key, default, extra_options): match = re.search(rf"{key}\s*=\s*([a-zA-Z0-9_.+-]+)", extra_options) if match: value = match.group(1) else: value = default return value def get_extra_options_list(key, default, extra_options): match = re.search(rf"{key}\s*=\s*([a-zA-Z0-9_.,+-]+)", extra_options) if match: value = match.group(1) else: value = default return value def extra_options_flag(flag, extra_options): return bool(re.search(rf"{flag}", extra_options)) def safe_get_nested(d, keys, default=None): for key in keys: if isinstance(d, dict): d = d.get(key, default) else: return default return d def is_video_model(model): is_video_model = False try : is_video_model = 'video' in model.inner_model.inner_model.model_config.unet_config['image_model'] or \ 'cosmos' in model.inner_model.inner_model.model_config.unet_config['image_model'] except: pass return is_video_model def is_RF_model(model): from comfy import model_sampling modelsampling = model.inner_model.inner_model.model_sampling return isinstance(modelsampling, model_sampling.CONST) def lagrange_interpolation(x_values, y_values, x_new): if not isinstance(x_values, torch.Tensor): x_values = torch.tensor(x_values, dtype=torch.get_default_dtype()) if x_values.ndim != 1: raise ValueError("x_values must be a 1D tensor or a list of scalars.") if not isinstance(x_new, torch.Tensor): x_new = torch.tensor(x_new, dtype=x_values.dtype, device=x_values.device) if x_new.ndim == 0: x_new = x_new.unsqueeze(0) if isinstance(y_values, list): y_values = torch.stack(y_values, dim=0) if y_values.ndim < 1: raise ValueError("y_values must have at least one dimension (the sample dimension).") n = x_values.shape[0] if y_values.shape[0] != n: raise ValueError(f"Mismatch: x_values has length {n} but y_values has {y_values.shape[0]} samples.") m = x_new.shape[0] result_shape = (m,) + y_values.shape[1:] result = torch.zeros(result_shape, dtype=y_values.dtype, device=y_values.device) for i in range(n): Li = torch.ones_like(x_new, dtype=y_values.dtype, device=y_values.device) xi = x_values[i] for j in range(n): if i == j: continue xj = x_values[j] Li = Li * ((x_new - xj) / (xi - xj)) extra_dims = (1,) * (y_values.ndim - 1) Li = Li.view(m, *extra_dims) result = result + Li * y_values[i] return result def get_cosine_similarity_manual(a, b): return (a * b).sum() / (torch.norm(a) * torch.norm(b)) def get_cosine_similarity(a, b): if a.dim() == 5 and b.dim() == 5 and b.shape[2] == 1: b = b.expand(-1, -1, a.shape[2], -1, -1) return F.cosine_similarity(a.flatten(), b.flatten(), dim=0) def get_pearson_similarity(a, b): a = a.mean(dim=(-2,-1)) b = b.mean(dim=(-2,-1)) if a.dim() == 5 and b.dim() == 5 and b.shape[2] == 1: b = b.expand(-1, -1, a.shape[2], -1, -1) return F.cosine_similarity(a.flatten(), b.flatten(), dim=0) def initialize_or_scale(tensor, value, steps): if tensor is None: return torch.full((steps,), value) else: return value * tensor def has_nested_attr(obj, attr_path): attrs = attr_path.split('.') for attr in attrs: if not hasattr(obj, attr): return False obj = getattr(obj, attr) return True def get_res4lyf_scheduler_list(): scheduler_names = SCHEDULER_NAMES.copy() if "beta57" not in scheduler_names: scheduler_names.append("beta57") return scheduler_names def conditioning_set_values(conditioning, values={}): c = [] for t in conditioning: n = [t[0], t[1].copy()] for k in values: n[1][k] = values[k] c.append(n) return c def get_collinear_alt(x, y): y_flat = y.view(y.size(0), -1).clone() x_flat = x.view(x.size(0), -1).clone() y_flat /= y_flat.norm(dim=-1, keepdim=True) x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat return x_proj_y.view_as(x) def get_collinear(x, y): y_flat = y.view(y.size(0), -1).clone() x_flat = x.view(x.size(0), -1).clone() y_flat /= y_flat.norm(dim=-1, keepdim=True) x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat return x_proj_y.view_as(x) def get_orthogonal(x, y): y_flat = y.view(y.size(0), -1).clone() x_flat = x.view(x.size(0), -1).clone() y_flat /= y_flat.norm(dim=-1, keepdim=True) x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat x_ortho_y = x_flat - x_proj_y return x_ortho_y.view_as(x) # pytorch slerp implementation from https://gist.github.com/Birch-san/230ac46f99ec411ed5907b0a3d728efa from torch import FloatTensor, LongTensor, Tensor, Size, lerp, zeros_like from torch.linalg import norm # adapted to PyTorch from: # https://gist.github.com/dvschultz/3af50c40df002da3b751efab1daddf2c # most of the extra complexity is to support: # - many-dimensional vectors # - v0 or v1 with last dim all zeroes, or v0 ~colinear with v1 # - falls back to lerp() # - conditional logic implemented with parallelism rather than Python loops # - many-dimensional tensor for t # - you can ask for batches of slerp outputs by making t more-dimensional than the vectors # - slerp( # v0: torch.Size([2,3]), # v1: torch.Size([2,3]), # t: torch.Size([4,1,1]), # ) # - this makes it interface-compatible with lerp() def slerp(v0: FloatTensor, v1: FloatTensor, t: float|FloatTensor, DOT_THRESHOLD=0.9995): ''' Spherical linear interpolation Args: v0: Starting vector v1: Final vector t: Float value between 0.0 and 1.0 DOT_THRESHOLD: Threshold for considering the two vectors as colinear. Not recommended to alter this. Returns: Interpolation vector between v0 and v1 ''' assert v0.shape == v1.shape, "shapes of v0 and v1 must match" # Normalize the vectors to get the directions and angles v0_norm: FloatTensor = norm(v0, dim=-1) v1_norm: FloatTensor = norm(v1, dim=-1) v0_normed: FloatTensor = v0 / v0_norm.unsqueeze(-1) v1_normed: FloatTensor = v1 / v1_norm.unsqueeze(-1) # Dot product with the normalized vectors dot: FloatTensor = (v0_normed * v1_normed).sum(-1) dot_mag: FloatTensor = dot.abs() # if dp is NaN, it's because the v0 or v1 row was filled with 0s # If absolute value of dot product is almost 1, vectors are ~colinear, so use lerp gotta_lerp: LongTensor = dot_mag.isnan() | (dot_mag > DOT_THRESHOLD) can_slerp: LongTensor = ~gotta_lerp t_batch_dim_count: int = max(0, t.dim()-v0.dim()) if isinstance(t, Tensor) else 0 t_batch_dims: Size = t.shape[:t_batch_dim_count] if isinstance(t, Tensor) else Size([]) out: FloatTensor = zeros_like(v0.expand(*t_batch_dims, *[-1]*v0.dim())) # if no elements are lerpable, our vectors become 0-dimensional, preventing broadcasting if gotta_lerp.any(): lerped: FloatTensor = lerp(v0, v1, t) out: FloatTensor = lerped.where(gotta_lerp.unsqueeze(-1), out) # if no elements are slerpable, our vectors become 0-dimensional, preventing broadcasting if can_slerp.any(): # Calculate initial angle between v0 and v1 theta_0: FloatTensor = dot.arccos().unsqueeze(-1) sin_theta_0: FloatTensor = theta_0.sin() # Angle at timestep t theta_t: FloatTensor = theta_0 * t sin_theta_t: FloatTensor = theta_t.sin() # Finish the slerp algorithm s0: FloatTensor = (theta_0 - theta_t).sin() / sin_theta_0 s1: FloatTensor = sin_theta_t / sin_theta_0 slerped: FloatTensor = s0 * v0 + s1 * v1 out: FloatTensor = slerped.where(can_slerp.unsqueeze(-1), out) return out class OptionsManager: APPEND_OPTIONS = {"extra_options"} def __init__(self, options_inputs=None): self.options_list = options_inputs or [] self._merged_dict = None def add_option(self, option): """Add a single options dictionary""" if option is not None: self.options_list.append(option) self._merged_dict = None # invalidate cached merged options @property def merged(self): """Get merged options with proper priority handling""" if self._merged_dict is None: self._merged_dict = {} special_string_options = { key: [] for key in self.APPEND_OPTIONS } for options_dict in self.options_list: if options_dict is not None: for key, value in options_dict.items(): if key in self.APPEND_OPTIONS and value: special_string_options[key].append(value) elif isinstance(value, dict): # Deep merge dictionaries if key not in self._merged_dict: self._merged_dict[key] = {} if isinstance(self._merged_dict[key], dict): self._deep_update(self._merged_dict[key], value) else: self._merged_dict[key] = value.copy() else: self._merged_dict[key] = value # append special case string options (e.g. extra_options) for key, value in special_string_options.items(): if value: self._merged_dict[key] = "\n".join(value) return self._merged_dict def get(self, key, default=None): return self.merged.get(key, default) def _deep_update(self, target_dict, source_dict): for key, value in source_dict.items(): if isinstance(value, dict) and key in target_dict and isinstance(target_dict[key], dict): # recursive dict update self._deep_update(target_dict[key], value) else: target_dict[key] = value def __getitem__(self, key): """Allow dictionary-like access to options""" return self.merged[key] def __contains__(self, key): """Allow 'in' operator for options""" return key in self.merged def as_dict(self): """Return the merged options as a dictionary""" return self.merged.copy() def __bool__(self): """Return True if there are any options""" return len(self.options_list) > 0 and any(opt is not None for opt in self.options_list) def debug_print_options(self): for i, options_dict in enumerate(self.options_list): RESplain(f"Options {i}:", debug=True) if options_dict is not None: for key, value in options_dict.items(): RESplain(f" {key}: {value}", debug=True) else: RESplain(" None", "\n", debug=True) ================================================ FILE: legacy/latents.py ================================================ import torch import torch.nn.functional as F import math import itertools import comfy.samplers import comfy.sample import comfy.sampler_helpers import comfy.utils from .noise_classes import NOISE_GENERATOR_NAMES, NOISE_GENERATOR_CLASSES, precision_tool, prepare_noise def initialize_or_scale(tensor, value, steps): if tensor is None: return torch.full((steps,), value) else: return value * tensor def latent_normalize_channels(x): mean = x.mean(dim=(2, 3), keepdim=True) std = x.std (dim=(2, 3), keepdim=True) return (x - mean) / std def latent_stdize_channels(x): std = x.std (dim=(2, 3), keepdim=True) return x / std def latent_meancenter_channels(x): mean = x.mean(dim=(2, 3), keepdim=True) return x - mean def initialize_or_scale(tensor, value, steps): if tensor is None: return torch.full((steps,), value) else: return value * tensor def normalize_latent(target, source=None, mean=True, std=True, set_mean=None, set_std=None, channelwise=True): target = target.clone() source = source.clone() if source is not None else None def normalize_single_latent(single_target, single_source=None): y = torch.zeros_like(single_target) for b in range(y.shape[0]): if channelwise: for c in range(y.shape[1]): single_source_mean = single_source[b][c].mean() if set_mean is None else set_mean single_source_std = single_source[b][c].std() if set_std is None else set_std if mean and std: y[b][c] = (single_target[b][c] - single_target[b][c].mean()) / single_target[b][c].std() if single_source is not None: y[b][c] = y[b][c] * single_source_std + single_source_mean elif mean: y[b][c] = single_target[b][c] - single_target[b][c].mean() if single_source is not None: y[b][c] = y[b][c] + single_source_mean elif std: y[b][c] = single_target[b][c] / single_target[b][c].std() if single_source is not None: y[b][c] = y[b][c] * single_source_std else: single_source_mean = single_source[b].mean() if set_mean is None else set_mean single_source_std = single_source[b].std() if set_std is None else set_std if mean and std: y[b] = (single_target[b] - single_target[b].mean()) / single_target[b].std() if single_source is not None: y[b] = y[b] * single_source_std + single_source_mean elif mean: y[b] = single_target[b] - single_target[b].mean() if single_source is not None: y[b] = y[b] + single_source_mean elif std: y[b] = single_target[b] / single_target[b].std() if single_source is not None: y[b] = y[b] * single_source_std return y if isinstance(target, (list, tuple)): if source is not None: assert isinstance(source, (list, tuple)) and len(source) == len(target), \ "If target is a list/tuple, source must be a list/tuple of the same length." return [normalize_single_latent(t, s) for t, s in zip(target, source)] else: return [normalize_single_latent(t) for t in target] else: return normalize_single_latent(target, source) class AdvancedNoise: @classmethod def INPUT_TYPES(s): return { "required":{ "alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}), "k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}), "noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), "noise_type": (NOISE_GENERATOR_NAMES, ), }, } RETURN_TYPES = ("NOISE",) FUNCTION = "get_noise" CATEGORY = "RES4LYF/noise" def get_noise(self, noise_seed, noise_type, alpha, k): return (Noise_RandomNoise(noise_seed, noise_type, alpha, k),) class Noise_RandomNoise: def __init__(self, seed, noise_type, alpha, k): self.seed = seed self.noise_type = noise_type self.alpha = alpha self.k = k def generate_noise(self, input_latent): latent_image = input_latent["samples"] batch_inds = input_latent["batch_index"] if "batch_index" in input_latent else None return prepare_noise(latent_image, self.seed, self.noise_type, batch_inds, self.alpha, self.k) class LatentNoised: @classmethod def INPUT_TYPES(s): return {"required": { "add_noise": ("BOOLEAN", {"default": True}), "noise_is_latent": ("BOOLEAN", {"default": False}), "noise_type": (NOISE_GENERATOR_NAMES, ), "alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}), "k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}), "noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), "latent_image": ("LATENT", ), "noise_strength": ("FLOAT", {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.01, "round": 0.01}), "normalize": (["false", "true"], {"default": "false"}), }, "optional": { "latent_noise": ("LATENT", ), "mask": ("MASK", ), } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent_noised",) FUNCTION = "main" CATEGORY = "RES4LYF/noise" def main(self, add_noise, noise_is_latent, noise_type, noise_seed, alpha, k, latent_image, noise_strength, normalize, latent_noise=None, mask=None): latent_out = latent_image.copy() samples = latent_out["samples"].clone() torch.manual_seed(noise_seed) if not add_noise: noise = torch.zeros(samples.size(), dtype=samples.dtype, layout=samples.layout, device="cpu") elif latent_noise is None: batch_inds = latent_out["batch_index"] if "batch_index" in latent_out else None noise = prepare_noise(samples, noise_seed, noise_type, batch_inds, alpha, k) else: noise = latent_noise["samples"] if normalize == "true": latent_mean = samples.mean() latent_std = samples.std() noise = noise * latent_std + latent_mean if noise_is_latent: noise += samples.cpu() noise.sub_(noise.mean()).div_(noise.std()) noise = noise * noise_strength if mask is not None: mask = F.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(samples.shape[2], samples.shape[3]), mode="bilinear") mask = mask.expand((-1, samples.shape[1], -1, -1)).to(samples.device) if mask.shape[0] < samples.shape[0]: mask = mask.repeat((samples.shape[0] - 1) // mask.shape[0] + 1, 1, 1, 1)[:samples.shape[0]] elif mask.shape[0] > samples.shape[0]: mask = mask[:samples.shape[0]] noise = mask * noise + (1 - mask) * torch.zeros_like(noise) latent_out["samples"] = samples.cpu() + noise return (latent_out,) class MaskToggle: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "enable": ("BOOLEAN", {"default": True}), "mask": ("MASK", ), }, } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("mask",) CATEGORY = "RES4LYF/masks" FUNCTION = "main" def main(self, enable=True, mask=None): if enable == False: mask = None return (mask, ) class set_precision: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "latent_image": ("LATENT", ), "precision": (["16", "32", "64"], ), "set_default": ("BOOLEAN", {"default": False}) }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("passthrough",) CATEGORY = "RES4LYF/precision" FUNCTION = "main" def main(self, precision="32", latent_image=None, set_default=False): match precision: case "16": if set_default is True: torch.set_default_dtype(torch.float16) x = latent_image["samples"].to(torch.float16) case "32": if set_default is True: torch.set_default_dtype(torch.float32) x = latent_image["samples"].to(torch.float32) case "64": if set_default is True: torch.set_default_dtype(torch.float64) x = latent_image["samples"].to(torch.float64) return ({"samples": x}, ) class set_precision_universal: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "precision": (["bf16", "fp16", "fp32", "fp64", "passthrough"], {"default": "fp32"}), "set_default": ("BOOLEAN", {"default": False}) }, "optional": { "cond_pos": ("CONDITIONING",), "cond_neg": ("CONDITIONING",), "sigmas": ("SIGMAS", ), "latent_image": ("LATENT", ), }, } RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "SIGMAS", "LATENT",) RETURN_NAMES = ("cond_pos","cond_neg","sigmas","latent_image",) CATEGORY = "RES4LYF/precision" FUNCTION = "main" def main(self, precision="fp32", cond_pos=None, cond_neg=None, sigmas=None, latent_image=None, set_default=False): dtype = None match precision: case "bf16": dtype = torch.bfloat16 case "fp16": dtype = torch.float16 case "fp32": dtype = torch.float32 case "fp64": dtype = torch.float64 case "passthrough": return (cond_pos, cond_neg, sigmas, latent_image, ) if cond_pos is not None: cond_pos[0][0] = cond_pos[0][0].clone().to(dtype) cond_pos[0][1]["pooled_output"] = cond_pos[0][1]["pooled_output"].clone().to(dtype) if cond_neg is not None: cond_neg[0][0] = cond_neg[0][0].clone().to(dtype) cond_neg[0][1]["pooled_output"] = cond_neg[0][1]["pooled_output"].clone().to(dtype) if sigmas is not None: sigmas = sigmas.clone().to(dtype) if latent_image is not None: x = latent_image["samples"].clone().to(dtype) latent_image = {"samples": x} if set_default is True: torch.set_default_dtype(dtype) return (cond_pos, cond_neg, sigmas, latent_image, ) class set_precision_advanced: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "latent_image": ("LATENT", ), "global_precision": (["64", "32", "16"], ), "shark_precision": (["64", "32", "16"], ), }, } RETURN_TYPES = ("LATENT","LATENT","LATENT","LATENT","LATENT",) RETURN_NAMES = ("PASSTHROUGH","LATENT_CAST_TO_GLOBAL","LATENT_16","LATENT_32","LATENT_64",) CATEGORY = "RES4LYF/precision" FUNCTION = "main" def main(self, global_precision="32", shark_precision="64", latent_image=None): dtype_map = { "16": torch.float16, "32": torch.float32, "64": torch.float64 } precision_map = { "16": 'fp16', "32": 'fp32', "64": 'fp64' } torch.set_default_dtype(dtype_map[global_precision]) precision_tool.set_cast_type(precision_map[shark_precision]) latent_passthrough = latent_image["samples"] latent_out16 = latent_image["samples"].to(torch.float16) latent_out32 = latent_image["samples"].to(torch.float32) latent_out64 = latent_image["samples"].to(torch.float64) target_dtype = dtype_map[global_precision] if latent_image["samples"].dtype != target_dtype: latent_image["samples"] = latent_image["samples"].to(target_dtype) latent_cast_to_global = latent_image["samples"] return ({"samples": latent_passthrough}, {"samples": latent_cast_to_global}, {"samples": latent_out16}, {"samples": latent_out32}, {"samples": latent_out64}) class latent_to_cuda: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "latent": ("LATENT", ), "to_cuda": ("BOOLEAN", {"default": True}), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("passthrough",) CATEGORY = "RES4LYF/latents" FUNCTION = "main" def main(self, latent, to_cuda): match to_cuda: case "True": latent = latent.to('cuda') case "False": latent = latent.to('cpu') return (latent,) class latent_batch: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "latent": ("LATENT", ), "batch_size": ("INT", {"default": 0, "min": -10000, "max": 10000}), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent_batch",) CATEGORY = "RES4LYF/latents" FUNCTION = "main" def main(self, latent, batch_size): latent = latent["samples"] b, c, h, w = latent.shape batch_latents = torch.zeros([batch_size, 4, h, w], device=latent.device) for i in range(batch_size): batch_latents[i] = latent return ({"samples": batch_latents}, ) class LatentPhaseMagnitude: @classmethod def INPUT_TYPES(s): return { "required": { "latent_0_batch": ("LATENT",), "latent_1_batch": ("LATENT",), "phase_mix_power": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_mix_power": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "latent_0_normal": ("BOOLEAN", {"default": True}), "latent_1_normal": ("BOOLEAN", {"default": True}), "latent_out_normal": ("BOOLEAN", {"default": True}), "latent_0_stdize": ("BOOLEAN", {"default": True}), "latent_1_stdize": ("BOOLEAN", {"default": True}), "latent_out_stdize": ("BOOLEAN", {"default": True}), "latent_0_meancenter": ("BOOLEAN", {"default": True}), "latent_1_meancenter": ("BOOLEAN", {"default": True}), "latent_out_meancenter": ("BOOLEAN", {"default": True}), }, "optional": { "phase_mix_powers": ("SIGMAS", ), "magnitude_mix_powers": ("SIGMAS", ), "phase_luminositys": ("SIGMAS", ), "phase_cyan_reds": ("SIGMAS", ), "phase_lime_purples": ("SIGMAS", ), "phase_pattern_structures": ("SIGMAS", ), "magnitude_luminositys": ("SIGMAS", ), "magnitude_cyan_reds": ("SIGMAS", ), "magnitude_lime_purples": ("SIGMAS", ), "magnitude_pattern_structures": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_repeat(latent, batch_size): b, c, h, w = latent.shape batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device) for i in range(batch_size): batch_latents[i] = latent return batch_latents @staticmethod def mix_latent_phase_magnitude(latent_0, latent_1, power_phase, power_magnitude, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure ): dtype = torch.promote_types(latent_0.dtype, latent_1.dtype) # big accuracy problems with fp32 FFT! let's avoid that latent_0 = latent_0.double() latent_1 = latent_1.double() latent_0_fft = torch.fft.fft2(latent_0) latent_1_fft = torch.fft.fft2(latent_1) latent_0_phase = torch.angle(latent_0_fft) latent_1_phase = torch.angle(latent_1_fft) latent_0_magnitude = torch.abs(latent_0_fft) latent_1_magnitude = torch.abs(latent_1_fft) # DC corruption...? handle separately?? #dc_index = (0, 0) #dc_0 = latent_0_fft[:, :, dc_index[0], dc_index[1]] #dc_1 = latent_1_fft[:, :, dc_index[0], dc_index[1]] #mixed_dc = dc_0 * 0.5 + dc_1 * 0.5 #mixed_dc = dc_0 * (1 - phase_weight) + dc_1 * phase_weight # create complex FFT using a weighted mix of phases chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]] chan_weights_magnitude = [w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]] mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) for i in range(4): mixed_phase[:, i] = ( (latent_0_phase[:,i] * (1-chan_weights_phase[i])) ** power_phase + (latent_1_phase[:,i] * chan_weights_phase[i]) ** power_phase) ** (1/power_phase) mixed_magnitude[:, i] = ( (latent_0_magnitude[:,i] * (1-chan_weights_magnitude[i])) ** power_magnitude + (latent_1_magnitude[:,i] * chan_weights_magnitude[i]) ** power_magnitude) ** (1/power_magnitude) new_fft = mixed_magnitude * torch.exp(1j * mixed_phase) #new_fft[:, :, dc_index[0], dc_index[1]] = mixed_dc # inverse FFT to convert back to spatial domain mixed_phase_magnitude = torch.fft.ifft2(new_fft).real return mixed_phase_magnitude.to(dtype) def main(self, #batch_size, latent_1_repeat, latent_0_batch, latent_1_batch, latent_0_normal, latent_1_normal, latent_out_normal, latent_0_stdize, latent_1_stdize, latent_out_stdize, latent_0_meancenter, latent_1_meancenter, latent_out_meancenter, phase_mix_power, magnitude_mix_power, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure, phase_mix_powers=None, magnitude_mix_powers=None, phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None, magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None ): latent_0_batch = latent_0_batch["samples"].double() latent_1_batch = latent_1_batch["samples"].double().to(latent_0_batch.device) #if batch_size == 0: batch_size = latent_0_batch.shape[0] if latent_1_batch.shape[0] == 1: latent_1_batch = self.latent_repeat(latent_1_batch, batch_size) magnitude_mix_powers = initialize_or_scale(magnitude_mix_powers, magnitude_mix_power, batch_size) phase_mix_powers = initialize_or_scale(phase_mix_powers, phase_mix_power, batch_size) phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size) phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size) phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size) phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size) magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size) magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size) magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size) magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size) mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device) if latent_0_normal == True: latent_0_batch = latent_normalize_channels(latent_0_batch) if latent_1_normal == True: latent_1_batch = latent_normalize_channels(latent_1_batch) if latent_0_meancenter == True: latent_0_batch = latent_meancenter_channels(latent_0_batch) if latent_1_meancenter == True: latent_1_batch = latent_meancenter_channels(latent_1_batch) if latent_0_stdize == True: latent_0_batch = latent_stdize_channels(latent_0_batch) if latent_1_stdize == True: latent_1_batch = latent_stdize_channels(latent_1_batch) for i in range(batch_size): mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1], latent_1_batch[i:i+1], phase_mix_powers[i].item(), magnitude_mix_powers[i].item(), phase_luminositys[i].item(), phase_cyan_reds[i].item(),phase_lime_purples[i].item(),phase_pattern_structures[i].item(), magnitude_luminositys[i].item(), magnitude_cyan_reds[i].item(),magnitude_lime_purples[i].item(),magnitude_pattern_structures[i].item() ) if latent_out_normal == True: mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude) if latent_out_stdize == True: mixed_phase_magnitude = latent_stdize_channels(mixed_phase_magnitude) if latent_out_meancenter == True: mixed_phase_magnitude = latent_meancenter_channels(mixed_phase_magnitude) mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude return ({"samples": mixed_phase_magnitude_batch}, ) class LatentPhaseMagnitudeMultiply: @classmethod def INPUT_TYPES(s): return { "required": { "latent_0_batch": ("LATENT",), "phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "latent_0_normal": ("BOOLEAN", {"default": False}), "latent_out_normal": ("BOOLEAN", {"default": False}), }, "optional": { "phase_luminositys": ("SIGMAS", ), "phase_cyan_reds": ("SIGMAS", ), "phase_lime_purples": ("SIGMAS", ), "phase_pattern_structures": ("SIGMAS", ), "magnitude_luminositys": ("SIGMAS", ), "magnitude_cyan_reds": ("SIGMAS", ), "magnitude_lime_purples": ("SIGMAS", ), "magnitude_pattern_structures": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_repeat(latent, batch_size): b, c, h, w = latent.shape batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device) for i in range(batch_size): batch_latents[i] = latent return batch_latents @staticmethod def mix_latent_phase_magnitude(latent_0, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure ): dtype = latent_0.dtype # avoid big accuracy problems with fp32 FFT! latent_0 = latent_0.double() latent_0_fft = torch.fft.fft2(latent_0) latent_0_phase = torch.angle(latent_0_fft) latent_0_magnitude = torch.abs(latent_0_fft) # create new complex FFT using weighted mix of phases chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]] chan_weights_magnitude = [ w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]] mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) for i in range(4): mixed_phase[:, i] = latent_0_phase[:,i] * chan_weights_phase[i] mixed_magnitude[:, i] = latent_0_magnitude[:,i] * chan_weights_magnitude[i] new_fft = mixed_magnitude * torch.exp(1j * mixed_phase) # inverse FFT to convert back to spatial domain mixed_phase_magnitude = torch.fft.ifft2(new_fft).real return mixed_phase_magnitude.to(dtype) def main(self, latent_0_batch, latent_0_normal, latent_out_normal, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure, phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None, magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None ): latent_0_batch = latent_0_batch["samples"].double() batch_size = latent_0_batch.shape[0] phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size) phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size) phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size) phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size) magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size) magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size) magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size) magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size) mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device) if latent_0_normal == True: latent_0_batch = latent_normalize_channels(latent_0_batch) for i in range(batch_size): mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1], phase_luminositys[i].item(), phase_cyan_reds[i].item(),phase_lime_purples[i].item(),phase_pattern_structures[i].item(), magnitude_luminositys[i].item(), magnitude_cyan_reds[i].item(),magnitude_lime_purples[i].item(),magnitude_pattern_structures[i].item() ) if latent_out_normal == True: mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude) mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude return ({"samples": mixed_phase_magnitude_batch}, ) class LatentPhaseMagnitudeOffset: @classmethod def INPUT_TYPES(s): return { "required": { "latent_0_batch": ("LATENT",), "phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "latent_0_normal": ("BOOLEAN", {"default": False}), "latent_out_normal": ("BOOLEAN", {"default": False}), }, "optional": { "phase_luminositys": ("SIGMAS", ), "phase_cyan_reds": ("SIGMAS", ), "phase_lime_purples": ("SIGMAS", ), "phase_pattern_structures": ("SIGMAS", ), "magnitude_luminositys": ("SIGMAS", ), "magnitude_cyan_reds": ("SIGMAS", ), "magnitude_lime_purples": ("SIGMAS", ), "magnitude_pattern_structures": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_repeat(latent, batch_size): b, c, h, w = latent.shape batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device) for i in range(batch_size): batch_latents[i] = latent return batch_latents @staticmethod def mix_latent_phase_magnitude(latent_0, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure ): dtype = latent_0.dtype # avoid big accuracy problems with fp32 FFT! latent_0 = latent_0.double() latent_0_fft = torch.fft.fft2(latent_0) latent_0_phase = torch.angle(latent_0_fft) latent_0_magnitude = torch.abs(latent_0_fft) # create new complex FFT using a weighted mix of phases chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]] chan_weights_magnitude = [ w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]] mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) for i in range(4): mixed_phase[:, i] = latent_0_phase[:,i] + chan_weights_phase[i] mixed_magnitude[:, i] = latent_0_magnitude[:,i] + chan_weights_magnitude[i] new_fft = mixed_magnitude * torch.exp(1j * mixed_phase) # inverse FFT to convert back to spatial domain mixed_phase_magnitude = torch.fft.ifft2(new_fft).real return mixed_phase_magnitude.to(dtype) def main(self, latent_0_batch, latent_0_normal, latent_out_normal, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure, phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None, magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None ): latent_0_batch = latent_0_batch["samples"].double() batch_size = latent_0_batch.shape[0] phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size) phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size) phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size) phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size) magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size) magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size) magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size) magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size) mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device) if latent_0_normal == True: latent_0_batch = latent_normalize_channels(latent_0_batch) for i in range(batch_size): mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1], phase_luminositys[i].item(), phase_cyan_reds[i].item(),phase_lime_purples[i].item(),phase_pattern_structures[i].item(), magnitude_luminositys[i].item(), magnitude_cyan_reds[i].item(),magnitude_lime_purples[i].item(),magnitude_pattern_structures[i].item() ) if latent_out_normal == True: mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude) mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude return ({"samples": mixed_phase_magnitude_batch}, ) class LatentPhaseMagnitudePower: @classmethod def INPUT_TYPES(s): return { "required": { "latent_0_batch": ("LATENT",), "phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "latent_0_normal": ("BOOLEAN", {"default": False}), "latent_out_normal": ("BOOLEAN", {"default": False}), }, "optional": { "phase_luminositys": ("SIGMAS", ), "phase_cyan_reds": ("SIGMAS", ), "phase_lime_purples": ("SIGMAS", ), "phase_pattern_structures": ("SIGMAS", ), "magnitude_luminositys": ("SIGMAS", ), "magnitude_cyan_reds": ("SIGMAS", ), "magnitude_lime_purples": ("SIGMAS", ), "magnitude_pattern_structures": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_repeat(latent, batch_size): b, c, h, w = latent.shape batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device) for i in range(batch_size): batch_latents[i] = latent return batch_latents @staticmethod def mix_latent_phase_magnitude(latent_0, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure ): dtype = latent_0.dtype # avoid big accuracy problems with fp32 FFT! latent_0 = latent_0.double() latent_0_fft = torch.fft.fft2(latent_0) latent_0_phase = torch.angle(latent_0_fft) latent_0_magnitude = torch.abs(latent_0_fft) # create new complex FFT using a weighted mix of phases chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]] chan_weights_magnitude = [ w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]] mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) for i in range(4): mixed_phase[:, i] = latent_0_phase[:,i] ** chan_weights_phase[i] mixed_magnitude[:, i] = latent_0_magnitude[:,i] ** chan_weights_magnitude[i] new_fft = mixed_magnitude * torch.exp(1j * mixed_phase) # inverse FFT to convert back to spatial domain mixed_phase_magnitude = torch.fft.ifft2(new_fft).real return mixed_phase_magnitude.to(dtype) def main(self, latent_0_batch, latent_0_normal, latent_out_normal, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure, phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None, magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None ): latent_0_batch = latent_0_batch["samples"].double() batch_size = latent_0_batch.shape[0] phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size) phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size) phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size) phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size) magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size) magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size) magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size) magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size) mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device) if latent_0_normal == True: latent_0_batch = latent_normalize_channels(latent_0_batch) for i in range(batch_size): mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1], phase_luminositys[i].item(), phase_cyan_reds[i].item(),phase_lime_purples[i].item(),phase_pattern_structures[i].item(), magnitude_luminositys[i].item(), magnitude_cyan_reds[i].item(),magnitude_lime_purples[i].item(),magnitude_pattern_structures[i].item() ) if latent_out_normal == True: mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude) mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude return ({"samples": mixed_phase_magnitude_batch}, ) class StableCascade_StageC_VAEEncode_Exact: def __init__(self, device="cpu"): self.device = device @classmethod def INPUT_TYPES(s): return {"required": { "image": ("IMAGE",), "vae": ("VAE", ), "width": ("INT", {"default": 24, "min": 1, "max": 1024, "step": 1}), "height": ("INT", {"default": 24, "min": 1, "max": 1024, "step": 1}), }} RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("stage_c",) FUNCTION = "generate" CATEGORY = "RES4LYF/vae" def generate(self, image, vae, width, height): out_width = (width) * vae.downscale_ratio #downscale_ratio = 32 out_height = (height) * vae.downscale_ratio #movedim(-1,1) goes from 1,1024,1024,3 to 1,3,1024,1024 s = comfy.utils.common_upscale(image.movedim(-1,1), out_width, out_height, "lanczos", "center").movedim(1,-1) c_latent = vae.encode(s[:,:,:,:3]) #to slice off alpha channel? return ({ "samples": c_latent, },) class StableCascade_StageC_VAEEncode_Exact_Tiled: def __init__(self, device="cpu"): self.device = device @classmethod def INPUT_TYPES(s): return {"required": { "image": ("IMAGE",), "vae": ("VAE", ), "tile_size": ("INT", {"default": 512, "min": 320, "max": 4096, "step": 64}), "overlap": ("INT", {"default": 16, "min": 8, "max": 128, "step": 8}), }} RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("stage_c",) FUNCTION = "generate" CATEGORY = "RES4LYF/vae" def generate(self, image, vae, tile_size, overlap): img_width = image.shape[-2] img_height = image.shape[-3] upscale_amount = vae.downscale_ratio # downscale_ratio = 32 image = image.movedim(-1, 1) # bhwc -> bchw encode_fn = lambda img: vae.encode(img.to(vae.device)).to("cpu") c_latent = tiled_scale_multidim( image, encode_fn, tile=(tile_size // 8, tile_size // 8), overlap=overlap, upscale_amount=upscale_amount, out_channels=16, output_device=self.device ) return ({ "samples": c_latent, },) @torch.inference_mode() def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_amount=4, out_channels=3, output_device="cpu", pbar=None): dims = len(tile) output_shape = [samples.shape[0], out_channels] + list(map(lambda a: round(a * upscale_amount), samples.shape[2:])) output = torch.zeros(output_shape, device=output_device) for b in range(samples.shape[0]): for it in itertools.product(*map(lambda a: range(0, a[0], a[1] - overlap), zip(samples.shape[2:], tile))): s_in = samples[b:b+1] upscaled = [] for d in range(dims): pos = max(0, min(s_in.shape[d + 2] - overlap, it[d])) l = min(tile[d], s_in.shape[d + 2] - pos) s_in = s_in.narrow(d + 2, pos, l) upscaled.append(round(pos * upscale_amount)) ps = function(s_in).to(output_device) mask = torch.ones_like(ps) feather = round(overlap * upscale_amount) for t in range(feather): for d in range(2, dims + 2): mask.narrow(d, t, 1).mul_((1.0 / feather) * (t + 1)) mask.narrow(d, mask.shape[d] - 1 - t, 1).mul_((1.0 / feather) * (t + 1)) o = output[b:b+1] for d in range(dims): o = o.narrow(d + 2, upscaled[d], mask.shape[d + 2]) o.add_(ps * mask) if pbar is not None: pbar.update(1) return output class EmptyLatentImageCustom: def __init__(self): self.device = comfy.model_management.intermediate_device() @classmethod def INPUT_TYPES(s): return {"required": { "width": ("INT", {"default": 24, "min": 1, "max": MAX_RESOLUTION, "step": 1}), "height": ("INT", {"default": 24, "min": 1, "max": MAX_RESOLUTION, "step": 1}), "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), "channels": (['4', '16'], {"default": '4'}), "mode": (['sdxl', 'cascade_b', 'cascade_c', 'exact'], {"default": 'default'}), "compression": ("INT", {"default": 42, "min": 4, "max": 128, "step": 1}), "precision": (['fp16', 'fp32', 'fp64'], {"default": 'fp32'}), }} RETURN_TYPES = ("LATENT",) FUNCTION = "generate" CATEGORY = "RES4LYF/latents" def generate(self, width, height, batch_size, channels, mode, compression, precision): c = int(channels) ratio = 1 match mode: case "sdxl": ratio = 8 case "cascade_b": ratio = 4 case "cascade_c": ratio = compression case "exact": ratio = 1 dtype=torch.float32 match precision: case "fp16": dtype=torch.float16 case "fp32": dtype=torch.float32 case "fp64": dtype=torch.float64 latent = torch.zeros([batch_size, c, height // ratio, width // ratio], dtype=dtype, device=self.device) return ({"samples":latent}, ) class EmptyLatentImage64: def __init__(self): self.device = comfy.model_management.intermediate_device() @classmethod def INPUT_TYPES(s): return {"required": { "width": ("INT", {"default": 1024, "min": 16, "max": MAX_RESOLUTION, "step": 8}), "height": ("INT", {"default": 1024, "min": 16, "max": MAX_RESOLUTION, "step": 8}), "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}} RETURN_TYPES = ("LATENT",) FUNCTION = "generate" CATEGORY = "RES4LYF/latents" def generate(self, width, height, batch_size=1): latent = torch.zeros([batch_size, 4, height // 8, width // 8], dtype=torch.float64, device=self.device) return ({"samples":latent}, ) """class CheckpointLoader32: @classmethod def INPUT_TYPES(s): return {"required": { "config_name": (folder_paths.get_filename_list("configs"), ), "ckpt_name": (folder_paths.get_filename_list("checkpoints"), )}} RETURN_TYPES = ("MODEL", "CLIP", "VAE") FUNCTION = "load_checkpoint" CATEGORY = "advanced/loaders" def load_checkpoint(self, config_name, ckpt_name, output_vae=True, output_clip=True): #torch.set_default_dtype(torch.float64) config_path = folder_paths.get_full_path("configs", config_name) ckpt_path = folder_paths.get_full_path("checkpoints", ckpt_name) return comfy.sd.load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, embedding_directory=folder_paths.get_folder_paths("embeddings"))""" MAX_RESOLUTION=8192 class LatentNoiseBatch_perlin: def __init__(self): pass @classmethod def INPUT_TYPES(s): return {"required": { "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), "width": ("INT", {"default": 1024, "min": 8, "max": MAX_RESOLUTION, "step": 8}), "height": ("INT", {"default": 1024, "min": 8, "max": MAX_RESOLUTION, "step": 8}), "batch_size": ("INT", {"default": 1, "min": 1, "max": 256}), "detail_level": ("FLOAT", {"default": 0, "min": -1, "max": 1.0, "step": 0.1}), }, "optional": { "details": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "create_noisy_latents_perlin" CATEGORY = "RES4LYF/noise" # found at https://gist.github.com/vadimkantorov/ac1b097753f217c5c11bc2ff396e0a57 # which was ported from https://github.com/pvigier/perlin-numpy/blob/master/perlin2d.py def rand_perlin_2d(self, shape, res, fade = lambda t: 6*t**5 - 15*t**4 + 10*t**3): delta = (res[0] / shape[0], res[1] / shape[1]) d = (shape[0] // res[0], shape[1] // res[1]) grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1])), dim = -1) % 1 angles = 2*math.pi*torch.rand(res[0]+1, res[1]+1) gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim = -1) tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1) dot = lambda grad, shift: (torch.stack((grid[:shape[0],:shape[1],0] + shift[0], grid[:shape[0],:shape[1], 1] + shift[1] ), dim = -1) * grad[:shape[0], :shape[1]]).sum(dim = -1) n00 = dot(tile_grads([0, -1], [0, -1]), [0, 0]) n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0]) n01 = dot(tile_grads([0, -1],[1, None]), [0, -1]) n11 = dot(tile_grads([1, None], [1, None]), [-1,-1]) t = fade(grid[:shape[0], :shape[1]]) return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1]) def rand_perlin_2d_octaves(self, shape, res, octaves=1, persistence=0.5): noise = torch.zeros(shape) frequency = 1 amplitude = 1 for _ in range(octaves): noise += amplitude * self.rand_perlin_2d(shape, (frequency*res[0], frequency*res[1])) frequency *= 2 amplitude *= persistence noise = torch.remainder(torch.abs(noise)*1000000,11)/11 # noise = (torch.sin(torch.remainder(noise*1000000,83))+1)/2 return noise def scale_tensor(self, x): min_value = x.min() max_value = x.max() x = (x - min_value) / (max_value - min_value) return x def create_noisy_latents_perlin(self, seed, width, height, batch_size, detail_level, details=None): if details is None: details = torch.full((10000,), detail_level) else: details = detail_level * details torch.manual_seed(seed) noise = torch.zeros((batch_size, 4, height // 8, width // 8), dtype=torch.float32, device="cpu").cpu() for i in range(batch_size): for j in range(4): noise_values = self.rand_perlin_2d_octaves((height // 8, width // 8), (1,1), 1, 1) result = (1+details[i]/10)*torch.erfinv(2 * noise_values - 1) * (2 ** 0.5) result = torch.clamp(result,-5,5) noise[i, j, :, :] = result return ({"samples": noise},) class LatentNoiseBatch_gaussian_channels: @classmethod def INPUT_TYPES(s): return { "required": { "latent": ("LATENT",), "mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "mean_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "mean_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "mean_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "mean_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "std": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "steps": ("INT", {"default": 0, "min": -10000, "max": 10000}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }, "optional": { "means": ("SIGMAS", ), "mean_luminositys": ("SIGMAS", ), "mean_cyan_reds": ("SIGMAS", ), "mean_lime_purples": ("SIGMAS", ), "mean_pattern_structures": ("SIGMAS", ), "stds": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/noise" """ @staticmethod def gaussian_noise_channels_like(x, mean=0.0, mean_luminosity = -0.1, mean_cyan_red = 0.0, mean_lime_purple=0.0, mean_pattern_structure=0.0, std_dev=1.0, seed=42): x = x.squeeze(0) noise = torch.randn_like(x) * std_dev + mean luminosity = noise[0:1] + mean_luminosity cyan_red = noise[1:2] + mean_cyan_red lime_purple = noise[2:3] + mean_lime_purple pattern_structure = noise[3:4] + mean_pattern_structure noise = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0) return noise.to(x.device)""" @staticmethod def gaussian_noise_channels(x, mean_luminosity = -0.1, mean_cyan_red = 0.0, mean_lime_purple=0.0, mean_pattern_structure=0.0): x = x.squeeze(0) luminosity = x[0:1] + mean_luminosity cyan_red = x[1:2] + mean_cyan_red lime_purple = x[2:3] + mean_lime_purple pattern_structure = x[3:4] + mean_pattern_structure x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0) return x def main(self, latent, steps, seed, mean, mean_luminosity, mean_cyan_red, mean_lime_purple, mean_pattern_structure, std, means=None, mean_luminositys=None, mean_cyan_reds=None, mean_lime_purples=None, mean_pattern_structures=None, stds=None): if steps == 0: steps = len(means) x = latent["samples"] b, c, h, w = x.shape noise_latents = torch.zeros([steps, 4, h, w], dtype=x.dtype, layout=x.layout, device=x.device) noise_sampler = NOISE_GENERATOR_CLASSES.get('gaussian')(x=x, seed = seed) means = initialize_or_scale(means, mean, steps) mean_luminositys = initialize_or_scale(mean_luminositys, mean_luminosity, steps) mean_cyan_reds = initialize_or_scale(mean_cyan_reds, mean_cyan_red, steps) mean_lime_purples = initialize_or_scale(mean_lime_purples, mean_lime_purple, steps) mean_pattern_structures = initialize_or_scale(mean_pattern_structures, mean_pattern_structure, steps) stds = initialize_or_scale(stds, std, steps) for i in range(steps): noise = noise_sampler(mean=means[i].item(), std=stds[i].item()) noise = self.gaussian_noise_channels(noise, mean_luminositys[i].item(), mean_cyan_reds[i].item(), mean_lime_purples[i].item(), mean_pattern_structures[i].item()) noise_latents[i] = x + noise return ({"samples": noise_latents}, ) class LatentNoiseBatch_gaussian: @classmethod def INPUT_TYPES(s): return { "required": { "latent": ("LATENT",), "mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "std": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "steps": ("INT", {"default": 0, "min": -10000, "max": 10000}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }, "optional": { "means": ("SIGMAS", ), "stds": ("SIGMAS", ), "steps_": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/noise" def main(self, latent, mean, std, steps, seed, means=None, stds=None, steps_=None): if steps_ is not None: steps = len(steps_) means = initialize_or_scale(means, mean, steps) stds = initialize_or_scale(stds, std, steps) latent_samples = latent["samples"] b, c, h, w = latent_samples.shape noise_latents = torch.zeros([steps, c, h, w], dtype=latent_samples.dtype, layout=latent_samples.layout, device=latent_samples.device) noise_sampler = NOISE_GENERATOR_CLASSES.get('gaussian')(x=latent_samples, seed = seed) for i in range(steps): noise_latents[i] = noise_sampler(mean=means[i].item(), std=stds[i].item()) return ({"samples": noise_latents}, ) class LatentNoiseBatch_fractal: @classmethod def INPUT_TYPES(s): return { "required": { "latent": ("LATENT",), "alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "k_flip": ("BOOLEAN", {"default": False}), "steps": ("INT", {"default": 0, "min": -10000, "max": 10000}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }, "optional": { "alphas": ("SIGMAS", ), "ks": ("SIGMAS", ), "steps_": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/noise" def main(self, latent, alpha, k_flip, steps, seed=42, alphas=None, ks=None, sigmas_=None, steps_=None): if steps_ is not None: steps = len(steps_) alphas = initialize_or_scale(alphas, alpha, steps) k_flip = -1 if k_flip else 1 ks = initialize_or_scale(ks, k_flip, steps) latent_samples = latent["samples"] b, c, h, w = latent_samples.shape noise_latents = torch.zeros([steps, c, h, w], dtype=latent_samples.dtype, layout=latent_samples.layout, device=latent_samples.device) noise_sampler = NOISE_GENERATOR_CLASSES.get('fractal')(x=latent_samples, seed = seed) for i in range(steps): noise_latents[i] = noise_sampler(alpha=alphas[i].item(), k=ks[i].item(), scale=0.1) return ({"samples": noise_latents}, ) class LatentNoiseList: @classmethod def INPUT_TYPES(s): return { "required": { "latent": ("LATENT",), "alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "k_flip": ("BOOLEAN", {"default": False}), "steps": ("INT", {"default": 0, "min": -10000, "max": 10000}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }, "optional": { "alphas": ("SIGMAS", ), "ks": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) OUTPUT_IS_LIST = (True,) FUNCTION = "main" CATEGORY = "RES4LYF/noise" def main(self, seed, latent, alpha, k_flip, steps, alphas=None, ks=None): alphas = initialize_or_scale(alphas, alpha, steps) k_flip = -1 if k_flip else 1 ks = initialize_or_scale(ks, k_flip, steps) latent_samples = latent["samples"] latents = [] size = latent_samples.shape steps = len(alphas) if steps == 0 else steps noise_sampler = NOISE_GENERATOR_CLASSES.get('fractal')(x=latent_samples, seed=seed) for i in range(steps): noise = noise_sampler(alpha=alphas[i].item(), k=ks[i].item(), scale=0.1) noisy_latent = latent_samples + noise new_latent = {"samples": noisy_latent} latents.append(new_latent) return (latents, ) class LatentBatch_channels: @classmethod def INPUT_TYPES(s): return { "required": { "latent": ("LATENT",), "mode": (["offset", "multiply", "power"],), "luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), }, "optional": { "luminositys": ("SIGMAS", ), "cyan_reds": ("SIGMAS", ), "lime_purples": ("SIGMAS", ), "pattern_structures": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_channels_multiply(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0): luminosity = x[0:1] * luminosity cyan_red = x[1:2] * cyan_red lime_purple = x[2:3] * lime_purple pattern_structure = x[3:4] * pattern_structure x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0) return x @staticmethod def latent_channels_offset(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0): luminosity = x[0:1] + luminosity cyan_red = x[1:2] + cyan_red lime_purple = x[2:3] + lime_purple pattern_structure = x[3:4] + pattern_structure x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0) return x @staticmethod def latent_channels_power(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0): luminosity = x[0:1] ** luminosity cyan_red = x[1:2] ** cyan_red lime_purple = x[2:3] ** lime_purple pattern_structure = x[3:4] ** pattern_structure x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0) return x def main(self, latent, mode, luminosity, cyan_red, lime_purple, pattern_structure, luminositys=None, cyan_reds=None, lime_purples=None, pattern_structures=None): x = latent["samples"] b, c, h, w = x.shape noise_latents = torch.zeros([b, c, h, w], dtype=x.dtype, layout=x.layout, device=x.device) luminositys = initialize_or_scale(luminositys, luminosity, b) cyan_reds = initialize_or_scale(cyan_reds, cyan_red, b) lime_purples = initialize_or_scale(lime_purples, lime_purple, b) pattern_structures = initialize_or_scale(pattern_structures, pattern_structure, b) for i in range(b): if mode == "offset": noise = self.latent_channels_offset(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item()) elif mode == "multiply": noise = self.latent_channels_multiply(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item()) elif mode == "power": noise = self.latent_channels_power(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item()) noise_latents[i] = noise return ({"samples": noise_latents}, ) class LatentBatch_channels_16: @classmethod def INPUT_TYPES(s): return { "required": { "latent": ("LATENT",), "mode": (["offset", "multiply", "power"],), "chan_1": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_2": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_3": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_4": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_5": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_6": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_7": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_8": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_9": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_10": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_11": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_12": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_13": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_14": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_15": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_16": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), }, "optional": { "chan_1s": ("SIGMAS", ), "chan_2s": ("SIGMAS", ), "chan_3s": ("SIGMAS", ), "chan_4s": ("SIGMAS", ), "chan_5s": ("SIGMAS", ), "chan_6s": ("SIGMAS", ), "chan_7s": ("SIGMAS", ), "chan_8s": ("SIGMAS", ), "chan_9s": ("SIGMAS", ), "chan_10s": ("SIGMAS", ), "chan_11s": ("SIGMAS", ), "chan_12s": ("SIGMAS", ), "chan_13s": ("SIGMAS", ), "chan_14s": ("SIGMAS", ), "chan_15s": ("SIGMAS", ), "chan_16s": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_channels_multiply(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0): chan_1 = x[0:1] * chan_1 chan_2 = x[1:2] * chan_2 chan_3 = x[2:3] * chan_3 chan_4 = x[3:4] * chan_4 chan_5 = x[4:5] * chan_5 chan_6 = x[5:6] * chan_6 chan_7 = x[6:7] * chan_7 chan_8 = x[7:8] * chan_8 chan_9 = x[8:9] * chan_9 chan_10 = x[9:10] * chan_10 chan_11 = x[10:11] * chan_11 chan_12 = x[11:12] * chan_12 chan_13 = x[12:13] * chan_13 chan_14 = x[13:14] * chan_14 chan_15 = x[14:15] * chan_15 chan_16 = x[15:16] * chan_16 x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0) return x @staticmethod def latent_channels_offset(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0): chan_1 = x[0:1] + chan_1 chan_2 = x[1:2] + chan_2 chan_3 = x[2:3] + chan_3 chan_4 = x[3:4] + chan_4 chan_5 = x[4:5] + chan_5 chan_6 = x[5:6] + chan_6 chan_7 = x[6:7] + chan_7 chan_8 = x[7:8] + chan_8 chan_9 = x[8:9] + chan_9 chan_10 = x[9:10] + chan_10 chan_11 = x[10:11] + chan_11 chan_12 = x[11:12] + chan_12 chan_13 = x[12:13] + chan_13 chan_14 = x[13:14] + chan_14 chan_15 = x[14:15] + chan_15 chan_16 = x[15:16] + chan_16 x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0) return x @staticmethod def latent_channels_power(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0): chan_1 = x[0:1] ** chan_1 chan_2 = x[1:2] ** chan_2 chan_3 = x[2:3] ** chan_3 chan_4 = x[3:4] ** chan_4 chan_5 = x[4:5] ** chan_5 chan_6 = x[5:6] ** chan_6 chan_7 = x[6:7] ** chan_7 chan_8 = x[7:8] ** chan_8 chan_9 = x[8:9] ** chan_9 chan_10 = x[9:10] ** chan_10 chan_11 = x[10:11] ** chan_11 chan_12 = x[11:12] ** chan_12 chan_13 = x[12:13] ** chan_13 chan_14 = x[13:14] ** chan_14 chan_15 = x[14:15] ** chan_15 chan_16 = x[15:16] ** chan_16 x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0) return x def main(self, latent, mode, chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16, chan_1s=None, chan_2s=None, chan_3s=None, chan_4s=None, chan_5s=None, chan_6s=None, chan_7s=None, chan_8s=None, chan_9s=None, chan_10s=None, chan_11s=None, chan_12s=None, chan_13s=None, chan_14s=None, chan_15s=None, chan_16s=None): x = latent["samples"] b, c, h, w = x.shape noise_latents = torch.zeros([b, c, h, w], dtype=x.dtype, layout=x.layout, device=x.device) chan_1s = initialize_or_scale(chan_1s, chan_1, b) chan_2s = initialize_or_scale(chan_2s, chan_2, b) chan_3s = initialize_or_scale(chan_3s, chan_3, b) chan_4s = initialize_or_scale(chan_4s, chan_4, b) chan_5s = initialize_or_scale(chan_5s, chan_5, b) chan_6s = initialize_or_scale(chan_6s, chan_6, b) chan_7s = initialize_or_scale(chan_7s, chan_7, b) chan_8s = initialize_or_scale(chan_8s, chan_8, b) chan_9s = initialize_or_scale(chan_9s, chan_9, b) chan_10s = initialize_or_scale(chan_10s, chan_10, b) chan_11s = initialize_or_scale(chan_11s, chan_11, b) chan_12s = initialize_or_scale(chan_12s, chan_12, b) chan_13s = initialize_or_scale(chan_13s, chan_13, b) chan_14s = initialize_or_scale(chan_14s, chan_14, b) chan_15s = initialize_or_scale(chan_15s, chan_15, b) chan_16s = initialize_or_scale(chan_16s, chan_16, b) for i in range(b): if mode == "offset": noise = self.latent_channels_offset(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item()) elif mode == "multiply": noise = self.latent_channels_multiply(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item()) elif mode == "power": noise = self.latent_channels_power(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item()) noise_latents[i] = noise return ({"samples": noise_latents}, ) class latent_normalize_channels: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "latent": ("LATENT", ), "mode": (["full", "channels"],), "operation": (["normalize", "center", "standardize"],), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("passthrough",) CATEGORY = "RES4LYF/latents" FUNCTION = "main" def main(self, latent, mode, operation): x = latent["samples"] b, c, h, w = x.shape if mode == "full": if operation == "normalize": x = (x - x.mean()) / x.std() elif operation == "center": x = x - x.mean() elif operation == "standardize": x = x / x.std() elif mode == "channels": if operation == "normalize": for i in range(b): for j in range(c): x[i, j] = (x[i, j] - x[i, j].mean()) / x[i, j].std() elif operation == "center": for i in range(b): for j in range(c): x[i, j] = x[i, j] - x[i, j].mean() elif operation == "standardize": for i in range(b): for j in range(c): x[i, j] = x[i, j] / x[i, j].std() return ({"samples": x},) def hard_light_blend(base_latent, blend_latent): if base_latent.sum() == 0 and base_latent.std() == 0: return base_latent blend_latent = (blend_latent - blend_latent.min()) / (blend_latent.max() - blend_latent.min()) positive_mask = base_latent >= 0 negative_mask = base_latent < 0 positive_latent = base_latent * positive_mask.float() negative_latent = base_latent * negative_mask.float() positive_result = torch.where(blend_latent < 0.5, 2 * positive_latent * blend_latent, 1 - 2 * (1 - positive_latent) * (1 - blend_latent)) negative_result = torch.where(blend_latent < 0.5, 2 * negative_latent.abs() * blend_latent, 1 - 2 * (1 - negative_latent.abs()) * (1 - blend_latent)) negative_result = -negative_result combined_result = positive_result * positive_mask.float() + negative_result * negative_mask.float() #combined_result *= base_latent.max() ks = combined_result ks2 = torch.zeros_like(base_latent) for n in range(base_latent.shape[1]): ks2[0][n] = (ks[0][n]) / ks[0][n].std() ks2[0][n] = (ks2[0][n] * base_latent[0][n].std()) combined_result = ks2 return combined_result ================================================ FILE: legacy/legacy_sampler_rk.py ================================================ import torch import torch.nn.functional as F from tqdm.auto import trange import math import copy import gc import comfy.model_patcher from .noise_classes import NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_CLASSES from .deis_coefficients import get_deis_coeff_list from .latents import hard_light_blend from .noise_sigmas_timesteps_scaling import get_res4lyf_step_with_model, get_res4lyf_half_step3 def get_epsilon(model, x, sigma, **extra_args): s_in = x.new_ones([x.shape[0]]) x0 = model(x, sigma * s_in, **extra_args) eps = (x - x0) / (sigma * s_in) return eps def get_denoised(model, x, sigma, **extra_args): s_in = x.new_ones([x.shape[0]]) x0 = model(x, sigma * s_in, **extra_args) return x0 # Remainder solution def __phi(j, neg_h): remainder = torch.zeros_like(neg_h) for k in range(j): remainder += (neg_h)**k / math.factorial(k) phi_j_h = ((neg_h).exp() - remainder) / (neg_h)**j return phi_j_h def calculate_gamma(c2, c3): return (3*(c3**3) - 2*c3) / (c2*(2 - 3*c2)) from typing import Optional def _gamma(n: int,) -> int: """ https://en.wikipedia.org/wiki/Gamma_function for every positive integer n, Γ(n) = (n-1)! """ return math.factorial(n-1) def _incomplete_gamma(s: int, x: float, gamma_s: Optional[int] = None) -> float: """ https://en.wikipedia.org/wiki/Incomplete_gamma_function#Special_values if s is a positive integer, Γ(s, x) = (s-1)!*∑{k=0..s-1}(x^k/k!) """ if gamma_s is None: gamma_s = _gamma(s) sum_: float = 0 # {k=0..s-1} inclusive for k in range(s): numerator: float = x**k denom: int = math.factorial(k) quotient: float = numerator/denom sum_ += quotient incomplete_gamma_: float = sum_ * math.exp(-x) * gamma_s return incomplete_gamma_ # Exact analytic solution originally calculated by Clybius. https://github.com/Clybius/ComfyUI-Extra-Samplers/tree/main def phi(j: int, neg_h: float, ): """ For j={1,2,3}: you could alternatively use Kat's phi_1, phi_2, phi_3 which perform fewer steps Lemma 1 https://arxiv.org/abs/2308.02157 ϕj(-h) = 1/h^j*∫{0..h}(e^(τ-h)*(τ^(j-1))/((j-1)!)dτ) https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84 = 1/h^j*[(e^(-h)*(-τ)^(-j)*τ(j))/((j-1)!)]{0..h} https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84+between+0+and+h = 1/h^j*((e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h)))/(j-1)!) = (e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h))/((j-1)!*h^j) = (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/(j-1)! = (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/Γ(j) = (e^(-h)*(-h)^(-j)*(1-Γ(j,-h)/Γ(j)) requires j>0 """ assert j > 0 gamma_: float = _gamma(j) incomp_gamma_: float = _incomplete_gamma(j, neg_h, gamma_s=gamma_) phi_: float = math.exp(neg_h) * neg_h**-j * (1-incomp_gamma_/gamma_) return phi_ rk_coeff = { "gauss-legendre_5s": ( [ [4563950663 / 32115191526, (310937500000000 / 2597974476091533 + 45156250000 * (739**0.5) / 8747388808389), (310937500000000 / 2597974476091533 - 45156250000 * (739**0.5) / 8747388808389), (5236016175 / 88357462711 + 709703235 * (739**0.5) / 353429850844), (5236016175 / 88357462711 - 709703235 * (739**0.5) / 353429850844)], [(4563950663 / 32115191526 - 38339103 * (739**0.5) / 6250000000), (310937500000000 / 2597974476091533 + 9557056475401 * (739**0.5) / 3498955523355600000), (310937500000000 / 2597974476091533 - 14074198220719489 * (739**0.5) / 3498955523355600000), (5236016175 / 88357462711 + 5601362553163918341 * (739**0.5) / 2208936567775000000000), (5236016175 / 88357462711 - 5040458465159165409 * (739**0.5) / 2208936567775000000000)], [(4563950663 / 32115191526 + 38339103 * (739**0.5) / 6250000000), (310937500000000 / 2597974476091533 + 14074198220719489 * (739**0.5) / 3498955523355600000), (310937500000000 / 2597974476091533 - 9557056475401 * (739**0.5) / 3498955523355600000), (5236016175 / 88357462711 + 5040458465159165409 * (739**0.5) / 2208936567775000000000), (5236016175 / 88357462711 - 5601362553163918341 * (739**0.5) / 2208936567775000000000)], [(4563950663 / 32115191526 - 38209 * (739**0.5) / 7938810), (310937500000000 / 2597974476091533 - 359369071093750 * (739**0.5) / 70145310854471391), (310937500000000 / 2597974476091533 - 323282178906250 * (739**0.5) / 70145310854471391), (5236016175 / 88357462711 - 470139 * (739**0.5) / 1413719403376), (5236016175 / 88357462711 - 44986764863 * (739**0.5) / 21205791050640)], [(4563950663 / 32115191526 + 38209 * (739**0.5) / 7938810), (310937500000000 / 2597974476091533 + 359369071093750 * (739**0.5) / 70145310854471391), (310937500000000 / 2597974476091533 + 323282178906250 * (739**0.5) / 70145310854471391), (5236016175 / 88357462711 + 44986764863 * (739**0.5) / 21205791050640), (5236016175 / 88357462711 + 470139 * (739**0.5) / 1413719403376)], [4563950663 / 16057595763, 621875000000000 / 2597974476091533, 621875000000000 / 2597974476091533, 10472032350 / 88357462711, 10472032350 / 88357462711] ], [ 1 / 2, 1 / 2 - 99 * (739**0.5) / 10000, 1 / 2 + 99 * (739**0.5) / 10000, 1 / 2 - (739**0.5) / 60, 1 / 2 + (739**0.5) / 60 ] ), "gauss-legendre_4s": ( [ [1/4, 1/4 - 15**0.5 / 6, 1/4 + 15**0.5 / 6, 1/4], [1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6, 1/4], [1/4, 1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6], [1/4 - 15**0.5 / 6, 1/4, 1/4 + 15**0.5 / 6, 1/4], [1/8, 3/8, 3/8, 1/8] ], [ 1/2 - 15**0.5 / 10, 1/2 + 15**0.5 / 10, 1/2 + 15**0.5 / 10, 1/2 - 15**0.5 / 10 ] ), "gauss-legendre_3s": ( [ [5/36, 2/9 - 15**0.5 / 15, 5/36 - 15**0.5 / 30], [5/36 + 15**0.5 / 24, 2/9, 5/36 - 15**0.5 / 24], [5/36 + 15**0.5 / 30, 2/9 + 15**0.5 / 15, 5/36], [5/18, 4/9, 5/18] ], [1/2 - 15**0.5 / 10, 1/2, 1/2 + 15**0.5 / 10] ), "gauss-legendre_2s": ( [ [1/4, 1/4 - 3**0.5 / 6], [1/4 + 3**0.5 / 6, 1/4], [1/2, 1/2], ], [1/2 - 3**0.5 / 6, 1/2 + 3**0.5 / 6] ), "radau_iia_3s": ( [ [11/45 - 7*6**0.5 / 360, 37/225 - 169*6**0.5 / 1800, -2/225 + 6**0.5 / 75], [37/225 + 169*6**0.5 / 1800, 11/45 + 7*6**0.5 / 360, -2/225 - 6**0.5 / 75], [4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9], [4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9], ], [2/5 - 6**0.5 / 10, 2/5 + 6**0.5 / 10, 1.] ), "radau_iia_2s": ( [ [5/12, -1/12], [3/4, 1/4], [3/4, 1/4], ], [1/3, 1] ), "lobatto_iiic_3s": ( [ [1/6, -1/3, 1/6], [1/6, 5/12, -1/12], [1/6, 2/3, 1/6], [1/6, 2/3, 1/6], ], [0, 1/2, 1] ), "lobatto_iiic_2s": ( [ [1/2, -1/2], [1/2, 1/2], [1/2, 1/2], ], [0, 1] ), "dormand-prince_13s": ( [ [1/18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1/48, 1/16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1/32, 0, 3/32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [5/16, 0, -75/64, 75/64, 0, 0, 0, 0, 0, 0, 0, 0, 0], [3/80, 0, 0, 3/16, 3/20, 0, 0, 0, 0, 0, 0, 0, 0], [29443841/614563906, 0, 0, 77736538/692538347, -28693883/1125000000, 23124283/1800000000, 0, 0, 0, 0, 0, 0, 0], [16016141/946692911, 0, 0, 61564180/158732637, 22789713/633445777, 545815736/2771057229, -180193667/1043307555, 0, 0, 0, 0, 0, 0], [39632708/573591083, 0, 0, -433636366/683701615, -421739975/2616292301, 100302831/723423059, 790204164/839813087, 800635310/3783071287, 0, 0, 0, 0, 0], [246121993/1340847787, 0, 0, -37695042795/15268766246, -309121744/1061227803, -12992083/490766935, 6005943493/2108947869, 393006217/1396673457, 123872331/1001029789, 0, 0, 0, 0], [-1028468189/846180014, 0, 0, 8478235783/508512852, 1311729495/1432422823, -10304129995/1701304382, -48777925059/3047939560, 15336726248/1032824649, -45442868181/3398467696, 3065993473/597172653, 0, 0, 0], [185892177/718116043, 0, 0, -3185094517/667107341, -477755414/1098053517, -703635378/230739211, 5731566787/1027545527, 5232866602/850066563, -4093664535/808688257, 3962137247/1805957418, 65686358/487910083, 0, 0], [403863854/491063109, 0, 0, -5068492393/434740067, -411421997/543043805, 652783627/914296604, 11173962825/925320556, -13158990841/6184727034, 3936647629/1978049680, -160528059/685178525, 248638103/1413531060, 0, 0], [14005451/335480064, 0, 0, 0, 0, -59238493/1068277825, 181606767/758867731, 561292985/797845732, -1041891430/1371343529, 760417239/1151165299, 118820643/751138087, -528747749/2220607170, 1/4] ], [0, 1/18, 1/12, 1/8, 5/16, 3/8, 59/400, 93/200, 5490023248 / 9719169821, 13/20, 1201146811 / 1299019798, 1, 1], ), "dormand-prince_6s": ( [ [1/5, 0, 0, 0, 0, 0, 0], [3/40, 9/40, 0, 0, 0, 0, 0], [44/45, -56/15, 32/9, 0, 0, 0, 0], [19372/6561, -25360/2187, 64448/6561, -212/729, 0, 0, 0], [9017/3168, -355/33, 46732/5247, 49/176, -5103/18656, 0], [35/384, 0, 500/1113, 125/192, -2187/6784, 11/84, 0], ], [0, 1/5, 3/10, 4/5, 8/9, 1], ), "dormand-prince_6s_alt": ( [ [1/5, 0, 0, 0, 0, 0, 0], [3/40, 9/40, 0, 0, 0, 0, 0], [44/45, -56/15, 32/9, 0, 0, 0, 0], [19372/6561, -25360/2187, 64448/6561, -212/729, 0, 0, 0], [9017/3168, -355/33, 46732/5247, 49/176, -5103/18656, 0], [35/384, 0, 500/1113, 125/192, -2187/6784, 11/84, 0], ], [0, 1/5, 3/10, 4/5, 8/9, 1], ), "dormand-prince_7s": ( [ [1/5, 0, 0, 0, 0, 0, 0], [3/40, 9/40, 0, 0, 0, 0, 0], [44/45, -56/15, 32/9, 0, 0, 0, 0], [19372/6561, -25360/2187, 64448/6561, -212/729, 0, 0, 0], [9017/3168, -355/33, 46732/5247, 49/176, -5103/18656, 0], [35/384, 0, 500/1113, 125/192, -2187/6784, 11/84, 0], ], [0, 1/5, 3/10, 4/5, 8/9, 1], ), "bogacki-shampine_7s": ( #5th order [ [1/6, 0, 0, 0, 0, 0, 0], [2/27, 4/27, 0, 0, 0, 0, 0], [183/1372, -162/343, 1053/1372, 0, 0, 0, 0], [68/297, -4/11, 42/143, 1960/3861, 0, 0, 0], [597/22528, 81/352, 63099/585728, 58653/366080, 4617/20480, 0, 0], [174197/959244, -30942/79937, 8152137/19744439, 666106/1039181, -29421/29068, 482048/414219, 0], [587/8064, 0, 4440339/15491840, 24353/124800, 387/44800, 2152/5985, 7267/94080] ], [0, 1/6, 2/9, 3/7, 2/3, 3/4, 1] ), "rk4_4s": ( [ [1/2, 0, 0, 0], [0, 1/2, 0, 0], [0, 0, 1, 0], [1/6, 1/3, 1/3, 1/6] ], [0, 1/2, 1/2, 1], ), "rk38_4s": ( [ [1/3, 0, 0, 0], [-1/3, 1, 0, 0], [1, -1, 1, 0], [1/8, 3/8, 3/8, 1/8] ], [0, 1/3, 2/3, 1], ), "ralston_4s": ( [ [2/5, 0, 0, 0], [(-2889+1428 * 5**0.5)/1024, (3785-1620 * 5**0.5)/1024, 0, 0], [(-3365+2094 * 5**0.5)/6040, (-975-3046 * 5**0.5)/2552, (467040+203968*5**0.5)/240845, 0], [(263+24*5**0.5)/1812, (125-1000*5**0.5)/3828, (3426304+1661952*5**0.5)/5924787, (30-4*5**0.5)/123] ], [0, 2/5, (14-3 * 5**0.5)/16, 1], ), "heun_3s": ( [ [1/3, 0, 0], [0, 2/3, 0], [1/4, 0, 3/4] ], [0, 1/3, 2/3], ), "kutta_3s": ( [ [1/2, 0, 0], [-1, 2, 0], [1/6, 2/3, 1/6] ], [0, 1/2, 1], ), "ralston_3s": ( [ [1/2, 0, 0], [0, 3/4, 0], [2/9, 1/3, 4/9] ], [0, 1/2, 3/4], ), "houwen-wray_3s": ( [ [8/15, 0, 0], [1/4, 5/12, 0], [1/4, 0, 3/4] ], [0, 8/15, 2/3], ), "ssprk3_3s": ( [ [1, 0, 0], [1/4, 1/4, 0], [1/6, 1/6, 2/3] ], [0, 1, 1/2], ), "midpoint_2s": ( [ [1/2, 0], [0, 1] ], [0, 1/2], ), "heun_2s": ( [ [1, 0], [1/2, 1/2] ], [0, 1], ), "ralston_2s": ( [ [2/3, 0], [1/4, 3/4] ], [0, 2/3], ), "buehler": ( [ [1], ], [0], ), } def get_rk_methods(rk_type, h, c1=0.0, c2=0.5, c3=1.0, h_prev=None, h_prev2=None, stepcount=0, sigmas=None): FSAL = False multistep_stages = 0 if rk_type[:4] == "deis": order = int(rk_type[-2]) if stepcount < order: if order == 4: rk_type = "res_3s" order = 3 elif order == 3: rk_type = "res_3s" elif order == 2: rk_type = "res_2s" else: rk_type = "deis" multistep_stages = order-1 if rk_type[-2:] == "2m": #multistep method if h_prev is not None: multistep_stages = 1 c2 = -h_prev / h rk_type = rk_type[:-2] + "2s" else: rk_type = rk_type[:-2] + "2s" if rk_type[-2:] == "3m": #multistep method if h_prev2 is not None: multistep_stages = 2 c2 = -h_prev2 / h_prev c3 = -h_prev / h rk_type = rk_type[:-2] + "3s" else: rk_type = rk_type[:-2] + "3s" if rk_type in rk_coeff: ab, ci = copy.deepcopy(rk_coeff[rk_type]) ci = ci[:] ci.append(1) alpha_fn = lambda h: 1 t_fn = lambda sigma: sigma sigma_fn = lambda t: t h_fn = lambda sigma_down, sigma: sigma_down - sigma model_call = get_denoised EPS_PRED = False else: alpha_fn = lambda neg_h: torch.exp(neg_h) t_fn = lambda sigma: sigma.log().neg() sigma_fn = lambda t: t.neg().exp() h_fn = lambda sigma_down, sigma: -torch.log(sigma_down/sigma) model_call = get_denoised EPS_PRED = False match rk_type: case "deis": alpha_fn = lambda neg_h: torch.exp(neg_h) t_fn = lambda sigma: sigma.log().neg() sigma_fn = lambda t: t.neg().exp() h_fn = lambda sigma_down, sigma: -torch.log(sigma_down/sigma) model_call = get_epsilon EPS_PRED = True coeff_list = get_deis_coeff_list(sigmas, multistep_stages+1, deis_mode="rhoab") coeff_list = [[elem / h for elem in inner_list] for inner_list in coeff_list] if multistep_stages == 1: b1, b2 = coeff_list[stepcount] ab = [ [0, 0], [b1, b2], ] ci = [0, 0, 1] if multistep_stages == 2: b1, b2, b3 = coeff_list[stepcount] ab = [ [0, 0, 0], [0, 0, 0], [b1, b2, b3], ] ci = [0, 0, 0, 1] if multistep_stages == 3: b1, b2, b3, b4 = coeff_list[stepcount] ab = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [b1, b2, b3, b4], ] ci = [0, 0, 0, 0, 1] case "dormand-prince_6s": FSAL = True case "ddim": b1 = phi(1, -h) ab = [ [b1], ] ci = [0, 1] case "res_2s": a2_1 = c2 * phi(1, -h*c2) b1 = phi(1, -h) - phi(2, -h)/c2 b2 = phi(2, -h)/c2 a2_1 /= (1 - torch.exp(-h*c2)) / h b1 /= phi(1, -h) b2 /= phi(1, -h) ab = [ [a2_1, 0], [b1, b2], ] ci = [0, c2, 1] case "res_3s": gamma = calculate_gamma(c2, c3) a2_1 = c2 * phi(1, -h*c2) a3_2 = gamma * c2 * phi(2, -h*c2) + (c3 ** 2 / c2) * phi(2, -h*c3) #phi_2_c3_h # a32 from k2 to k3 a3_1 = c3 * phi(1, -h*c3) - a3_2 # a31 from k1 to k3 b3 = (1 / (gamma * c2 + c3)) * phi(2, -h) b2 = gamma * b3 #simplified version of: b2 = (gamma / (gamma * c2 + c3)) * phi_2_h b1 = phi(1, -h) - b2 - b3 0 a3_2 /= (1 - torch.exp(-h*c3)) / h a3_1 /= (1 - torch.exp(-h*c3)) / h b1 /= phi(1, -h) b2 /= phi(1, -h) b3 /= phi(1, -h) ab = [ [a2_1, 0, 0], [a3_1, a3_2, 0], [b1, b2, b3], ] ci = [c1, c2, c3, 1] #ci = [0, c2, c3, 1] case "dpmpp_2s": #c2 = 0.5 a2_1 = c2 * phi(1, -h*c2) b1 = (1 - 1/(2*c2)) * phi(1, -h) b2 = (1/(2*c2)) * phi(1, -h) a2_1 /= (1 - torch.exp(-h*c2)) / h b1 /= phi(1, -h) b2 /= phi(1, -h) ab = [ [a2_1, 0], [b1, b2], ] ci = [0, c2, 1] case "dpmpp_sde_2s": c2 = 1.0 #hardcoded to 1.0 to more closely emulate the configuration for k-diffusion's implementation a2_1 = c2 * phi(1, -h*c2) b1 = (1 - 1/(2*c2)) * phi(1, -h) b2 = (1/(2*c2)) * phi(1, -h) a2_1 /= (1 - torch.exp(-h*c2)) / h b1 /= phi(1, -h) b2 /= phi(1, -h) ab = [ [a2_1, 0], [b1, b2], ] ci = [0, c2, 1] case "dpmpp_3s": a2_1 = c2 * phi(1, -h*c2) a3_2 = (c3**2 / c2) * phi(2, -h*c3) a3_1 = c3 * phi(1, -h*c3) - a3_2 b2 = 0 b3 = (1/c3) * phi(2, -h) b1 = phi(1, -h) - b2 - b3 a2_1 /= (1 - torch.exp(-h*c2)) / h a3_2 /= (1 - torch.exp(-h*c3)) / h a3_1 /= (1 - torch.exp(-h*c3)) / h b1 /= phi(1, -h) b2 /= phi(1, -h) b3 /= phi(1, -h) ab = [ [a2_1, 0, 0], [a3_1, a3_2, 0], [b1, b2, b3], ] ci = [0, c2, c3, 1] case "rk_exp_5s": c1, c2, c3, c4, c5 = 0., 0.5, 0.5, 1., 0.5 a2_1 = 0.5 * phi(1, -h * c2) a3_1 = 0.5 * phi(1, -h * c3) - phi(2, -h * c3) a3_2 = phi(2, -h * c3) a4_1 = phi(1, -h * c4) - 2 * phi(2, -h * c4) a4_2 = a4_3 = phi(2, -h * c4) a5_2 = a5_3 = 0.5 * phi(2, -h * c5) - phi(3, -h * c4) + 0.25 * phi(2, -h * c4) - 0.5 * phi(3, -h * c5) a5_4 = 0.25 * phi(2, -h * c5) - a5_2 a5_1 = 0.5 * phi(1, -h * c5) - 2 * a5_2 - a5_4 b1 = phi(1, -h) - 3 * phi(2, -h) + 4 * phi(3, -h) b2 = b3 = 0 b4 = -phi(2, -h) + 4*phi(3, -h) b5 = 4 * phi(2, -h) - 8 * phi(3, -h) a2_1 /= (1 - torch.exp(-h*c2)) / h a3_1 /= (1 - torch.exp(-h*c3)) / h a3_2 /= (1 - torch.exp(-h*c3)) / h a4_1 /= (1 - torch.exp(-h*c4)) / h a4_2 /= (1 - torch.exp(-h*c4)) / h a4_3 /= (1 - torch.exp(-h*c4)) / h a5_1 /= (1 - torch.exp(-h*c5)) / h a5_2 /= (1 - torch.exp(-h*c5)) / h a5_3 /= (1 - torch.exp(-h*c5)) / h a5_4 /= (1 - torch.exp(-h*c5)) / h b1 /= phi(1, -h) b2 /= phi(1, -h) b3 /= phi(1, -h) b4 /= phi(1, -h) b5 /= phi(1, -h) ab = [ [a2_1, 0, 0, 0, 0], [a3_1, a3_2, 0, 0, 0], [a4_1, a4_2, a4_3, 0, 0], [a5_1, a5_2, a5_3, a5_4, 0], [b1, b2, b3, b4, b5], ] ci = [0., 0.5, 0.5, 1., 0.5, 1] return ab, ci, multistep_stages, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED def get_rk_methods_order(rk_type): ab, ci, multistep_stages, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods(rk_type, torch.tensor(1.0).to('cuda').to(torch.float64), c1=0.0, c2=0.5, c3=1.0) return len(ci)-1 def get_rk_methods_order_and_fn(rk_type, h=None, c1=None, c2=None, c3=None, h_prev=None, h_prev2=None, stepcount=0, sigmas=None): if h == None: ab, ci, multistep_stages, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods(rk_type, torch.tensor(1.0).to('cuda').to(torch.float64), c1=0.0, c2=0.5, c3=1.0) else: ab, ci, multistep_stages, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods(rk_type, h, c1, c2, c3, h_prev, h_prev2, stepcount, sigmas) return len(ci)-1, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED def get_rk_methods_coeff(rk_type, h, c1, c2, c3, h_prev=None, h_prev2=None, stepcount=0, sigmas=None): ab, ci, multistep_stages, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods(rk_type, h, c1, c2, c3, h_prev, h_prev2, stepcount, sigmas) return ab, ci, multistep_stages, EPS_PRED @torch.no_grad() def legacy_sample_rk(model, x, sigmas, extra_args=None, callback=None, disable=None, noise_sampler=None, noise_sampler_type="brownian", noise_mode="hard", noise_seed=-1, rk_type="res_2m", implicit_sampler_name="default", sigma_fn_formula="", t_fn_formula="", eta=0.0, eta_var=0.0, s_noise=1., d_noise=1., alpha=-1.0, k=1.0, scale=0.1, c1=0.0, c2=0.5, c3=1.0, MULTISTEP=False, cfgpp=0.0, implicit_steps=0, reverse_weight=0.0, exp_mode=False, latent_guide=None, latent_guide_inv=None, latent_guide_weight=0.0, latent_guide_weights=None, guide_mode="blend", GARBAGE_COLLECT=False, mask=None, LGW_MASK_RESCALE_MIN=True, sigmas_override=None, t_is=None, ): extra_args = {} if extra_args is None else extra_args if sigmas_override is not None: sigmas = sigmas_override.clone() sigmas = sigmas.clone() * d_noise sigmin = model.inner_model.inner_model.model_sampling.sigma_min sigmax = model.inner_model.inner_model.model_sampling.sigma_max UNSAMPLE = False if sigmas[0] == 0.0: #remove padding used to avoid need for model patch with noise inversion UNSAMPLE = True sigmas = sigmas[1:-1] if mask is None: mask = torch.ones_like(x) LGW_MASK_RESCALE_MIN = False else: mask = mask.unsqueeze(1) mask = mask.repeat(1, x.shape[1], 1, 1) mask = F.interpolate(mask, size=(x.shape[2], x.shape[3]), mode='bilinear', align_corners=False) mask = mask.to(x.dtype).to(x.device) y0, y0_inv = torch.zeros_like(x), torch.zeros_like(x) if latent_guide is not None: if sigmas[0] > sigmas[1]: y0 = latent_guide = model.inner_model.inner_model.process_latent_in(latent_guide['samples']).clone().to(x.device) else: x = model.inner_model.inner_model.process_latent_in(latent_guide['samples']).clone().to(x.device) if latent_guide_inv is not None: if sigmas[0] > sigmas[1]: y0_inv = latent_guide_inv = model.inner_model.inner_model.process_latent_in(latent_guide_inv['samples']).clone().to(x.device) elif UNSAMPLE and mask is not None: x = mask * x + (1-mask) * model.inner_model.inner_model.process_latent_in(latent_guide_inv['samples']).clone().to(x.device) uncond = [torch.full_like(x, 0.0)] if cfgpp != 0.0: def post_cfg_function(args): uncond[0] = args["uncond_denoised"] return args["denoised"] model_options = extra_args.get("model_options", {}).copy() extra_args["model_options"] = comfy.model_patcher.set_model_options_post_cfg_function(model_options, post_cfg_function, disable_cfg1_optimization=True) if noise_seed == -1: seed = torch.initial_seed() + 1 else: seed = noise_seed if noise_sampler_type == "fractal": noise_sampler = NOISE_GENERATOR_CLASSES.get(noise_sampler_type)(x=x, seed=seed, sigma_min=sigmin, sigma_max=sigmax) noise_sampler.alpha = alpha noise_sampler.k = k noise_sampler.scale = scale else: noise_sampler = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_sampler_type)(x=x, seed=seed, sigma_min=sigmin, sigma_max=sigmax) if UNSAMPLE and sigmas[0] < sigmas[1]: #sigma_next > sigma: y0 = noise_sampler(sigma=sigmax, sigma_next=sigmin) y0 = (y0 - y0.mean()) / y0.std() y0_inv = noise_sampler(sigma=sigmax, sigma_next=sigmin) y0_inv = (y0_inv - y0_inv.mean()) / y0_inv.std() order, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods_order_and_fn(rk_type) if exp_mode: model_call = get_denoised alpha_fn = lambda neg_h: torch.exp(neg_h) t_fn = lambda sigma: sigma.log().neg() sigma_fn = lambda t: t.neg().exp() xi, ki, ki_u = [torch.zeros_like(x)]*(order+2), [torch.zeros_like(x)]*(order+1), [torch.zeros_like(x)]*(order+1) h, h_prev, h_prev2 = None, None, None xi[0] = x for _ in trange(len(sigmas)-1, disable=disable): sigma, sigma_next = sigmas[_], sigmas[_+1] if sigma_next == 0.0: rk_type = "buehler" eta, eta_var = 0, 0 order, model_call, alpha_fn, t_fn, sigma_fn, h_fn, FSAL, EPS_PRED = get_rk_methods_order_and_fn(rk_type) #sigma_up, sigma, sigma_down, alpha_ratio = get_res4lyf_step_with_model(model, sigma, sigma_next, eta, eta_var, noise_mode, h_fn(sigma_next,sigma) ) sigma_up, sigma, sigma_down, alpha_ratio = get_res4lyf_step_with_model(model, sigma, sigma_next, eta, noise_mode) t_down, t = t_fn(sigma_down), t_fn(sigma) h = h_fn(sigma_down, sigma) c2, c3 = get_res4lyf_half_step3(sigma, sigma_down, c2, c3, t_fn=t_fn, sigma_fn=sigma_fn, t_fn_formula=t_fn_formula, sigma_fn_formula=sigma_fn_formula) ab, ci, multistep_stages, EPS_PRED = get_rk_methods_coeff(rk_type, h, c1, c2, c3, h_prev, h_prev2, _, sigmas) order = len(ci)-1 if exp_mode: for i in range(order): for j in range(order): ab[i][j] = ab[i][j] * phi(1, -h * ci[i+1]) if isinstance(model.inner_model.inner_model.model_sampling, comfy.model_sampling.CONST) == False and noise_mode == "hard" and sigma_next > 0.0: noise = noise_sampler(sigma=sigmas[_], sigma_next=sigmas[_+1]) noise = torch.nan_to_num((noise - noise.mean()) / noise.std(), 0.0) xi[0] = alpha_ratio * xi[0] + noise * s_noise * sigma_up xi_0 = xi[0] # needed for implicit sampling if (MULTISTEP == False and FSAL == False) or _ == 0: ki[0] = model_call(model, xi_0, sigma, **extra_args) if EPS_PRED and rk_type.startswith("deis"): ki[0] = (xi_0 - ki[0]) / sigma ki[0] = ki[0] * (sigma_down-sigma)/(sigma_next-sigma) ki_u[0] = uncond[0] if cfgpp != 0.0: ki[0] = uncond[0] + cfgpp * (ki[0] - uncond[0]) ki_u[0] = uncond[0] for iteration in range(implicit_steps+1): for i in range(multistep_stages, order): if implicit_steps > 0 and iteration > 0 and implicit_sampler_name != "default": ab, ci, multistep_stages, EPS_PRED = get_rk_methods_coeff(implicit_sampler_name, h, c1, c2, c3, h_prev, h_prev2, _, sigmas) order = len(ci)-1 if len(ki) < order + 1: last_value_ki = ki[-1] last_value_ki_u = ki_u[-1] ki.extend( [last_value_ki] * ((order + 1) - len(ki))) ki_u.extend([last_value_ki_u] * ((order + 1) - len(ki_u))) if len(xi) < order + 2: xi.extend([torch.zeros_like(xi[0])] * ((order + 2) - len(xi))) ki[0] = model_call(model, xi_0, sigma, **extra_args) ki_u[0] = uncond[0] sigma_mid = sigma_fn(t + h*ci[i+1]) alpha_t_1 = alpha_t_1_inv = torch.exp(torch.log(sigma_down/sigma) * ci[i+1] ) if sigma_next > sigma: alpha_t_1_inv = torch.nan_to_num( torch.exp(torch.log((sigmax - sigma_down)/(sigmax - sigma)) * ci[i+1]), 1.) if LGW_MASK_RESCALE_MIN: lgw_mask = mask * (1 - latent_guide_weights[_]) + latent_guide_weights[_] lgw_mask_inv = (1-mask) * (1 - latent_guide_weights[_]) + latent_guide_weights[_] else: lgw_mask = mask * latent_guide_weights[_] lgw_mask_inv = (1-mask) * latent_guide_weights[_] ks, ks_u, ys, ys_inv = torch.zeros_like(x), torch.zeros_like(x), torch.zeros_like(x), torch.zeros_like(x) for j in range(order): ks += ab[i][j] * ki[j] ks_u += ab[i][j] * ki_u[j] ys += ab[i][j] * y0 ys_inv += ab[i][j] * y0_inv if EPS_PRED and rk_type.startswith("deis"): epsilon = (h * ks) / (sigma_down - sigma) #xi[(i+1)%order] = xi_0 + h*ks ks = xi_0 - epsilon * sigma # denoised else: if implicit_sampler_name.startswith("lobatto") == False: ks /= sum(ab[i]) elif iteration == 0: ks /= sum(ab[i]) if UNSAMPLE == False and latent_guide is not None and latent_guide_weights[_] > 0.0: if guide_mode == "hard_light": lg = latent_guide * sum(ab[i]) if EPS_PRED: lg = (alpha_fn(-h*ci[i+1]) * xi[0] - latent_guide) / (sigma_fn(t + h*ci[i]) + 1e-8) hard_light_blend_1 = hard_light_blend(lg, ks) ks = (1 - lgw_mask) * ks + lgw_mask * hard_light_blend_1 elif guide_mode == "mean_std": ks2 = torch.zeros_like(x) for n in range(latent_guide.shape[1]): ks2[0][n] = (ks[0][n] - ks[0][n].mean()) / ks[0][n].std() ks2[0][n] = (ks2[0][n] * latent_guide[0][n].std()) + latent_guide[0][n].mean() ks = (1 - lgw_mask) * ks + lgw_mask * ks2 elif guide_mode == "mean": ks2 = torch.zeros_like(x) for n in range(latent_guide.shape[1]): ks2[0][n] = (ks[0][n] - ks[0][n].mean()) ks2[0][n] = (ks2[0][n]) + latent_guide[0][n].mean() ks3 = torch.zeros_like(x) for n in range(latent_guide.shape[1]): ks3[0][n] = (ks[0][n] - ks[0][n].mean()) ks3[0][n] = (ks3[0][n]) + latent_guide_inv[0][n].mean() ks = (1 - lgw_mask) * ks + lgw_mask * ks2 ks = (1 - lgw_mask_inv) * ks + lgw_mask_inv * ks3 elif guide_mode == "std": ks2 = torch.zeros_like(x) for n in range(latent_guide.shape[1]): ks2[0][n] = (ks[0][n]) / ks[0][n].std() ks2[0][n] = (ks2[0][n] * latent_guide[0][n].std()) ks = (1 - lgw_mask) * ks + lgw_mask * ks2 elif guide_mode == "blend": ks = (1 - lgw_mask) * ks + lgw_mask * ys #+ (1-lgw_mask) * latent_guide_inv ks = (1 - lgw_mask_inv) * ks + lgw_mask_inv * ys_inv elif guide_mode == "inversion": UNSAMPLE = True cfgpp_term = cfgpp*h*(ks - ks_u) xi[(i+1)%order] = (1-UNSAMPLE * lgw_mask) * (alpha_t_1 * (xi_0 + cfgpp_term) + (1 - alpha_t_1) * ks ) \ + UNSAMPLE * lgw_mask * (alpha_t_1_inv * (xi_0 + cfgpp_term) + (1 - alpha_t_1_inv) * ys ) if UNSAMPLE: xi[(i+1)%order] = (1-lgw_mask_inv) * xi[(i+1)%order] + UNSAMPLE * lgw_mask_inv * (alpha_t_1_inv * (xi_0 + cfgpp_term) + (1 - alpha_t_1_inv) * ys_inv ) if (i+1)%order > 0 and (i+1)%order > multistep_stages-1: if GARBAGE_COLLECT: gc.collect(); torch.cuda.empty_cache() ki[i+1] = model_call(model, xi[i+1], sigma_fn(t + h*ci[i+1]), **extra_args) if EPS_PRED and rk_type.startswith("deis"): ki[i+1] = (xi[i+1] - ki[i+1]) / sigma_fn(t + h*ci[i+1]) ki[i+1] = ki[i+1] * (sigma_down-sigma)/(sigma_next-sigma) ki_u[i+1] = uncond[0] if FSAL and _ > 0: ki [0] = ki[order-1] ki_u[0] = ki_u[order-1] if MULTISTEP and _ > 0: ki [0] = denoised ki_u[0] = ki_u[order-1] for ms in range(multistep_stages): ki [multistep_stages - ms] = ki [multistep_stages - ms - 1] ki_u[multistep_stages - ms] = ki_u[multistep_stages - ms - 1] if iteration < implicit_steps and implicit_sampler_name == "default": ki [0] = model_call(model, xi[0], sigma_down, **extra_args) ki_u[0] = uncond[0] elif iteration == implicit_steps and implicit_sampler_name != "default" and implicit_steps > 0: ks, ks_u, ys, ys_inv = torch.zeros_like(x), torch.zeros_like(x), torch.zeros_like(x), torch.zeros_like(x) for j in range(order): ks += ab[i+1][j] * ki[j] ks_u += ab[i+1][j] * ki_u[j] ys += ab[i+1][j] * y0 ys_inv += ab[i+1][j] * y0_inv ks /= sum(ab[i+1]) cfgpp_term = cfgpp*h*(ks - ks_u) #GUIDES NOT FULLY IMPLEMENTED HERE WITH IMPLICIT FINAL STEP xi[(i+1)%order] = (1-UNSAMPLE * lgw_mask) * (alpha_t_1 * (xi_0 + cfgpp_term) + (1 - alpha_t_1) * ks ) \ + UNSAMPLE * lgw_mask * (alpha_t_1_inv * (xi_0 + cfgpp_term) + (1 - alpha_t_1_inv) * ys ) if UNSAMPLE: xi[(i+1)%order] = (1-lgw_mask_inv) * xi[(i+1)%order] + UNSAMPLE * lgw_mask_inv * (alpha_t_1_inv * (xi_0 + cfgpp_term) + (1 - alpha_t_1_inv) * ys_inv ) if EPS_PRED == True and exp_mode == False and not rk_type.startswith("deis"): denoised = alpha_fn(-h*ci[i+1]) * xi[0] - sigma * ks elif EPS_PRED == True and rk_type.startswith("deis"): epsilon = (h * ks) / (sigma_down - sigma) denoised = xi_0 - epsilon * sigma # denoised elif iteration == implicit_steps and implicit_sampler_name != "default" and implicit_steps > 0: denoised = ks else: denoised = ks / sum(ab[i]) """if iteration < implicit_steps and implicit_sampler_name != "default": for idx in range(len(ki)): ki[idx] = denoised""" if callback is not None: callback({'x': xi[0], 'i': _, 'sigma': sigma, 'sigma_next': sigma_next, 'denoised': denoised}) if (isinstance(model.inner_model.inner_model.model_sampling, comfy.model_sampling.CONST) or noise_mode != "hard") and sigma_next > 0.0: noise = noise_sampler(sigma=sigma, sigma_next=sigma_next) noise = (noise - noise.mean()) / noise.std() if guide_mode == "noise_mean": noise2 = torch.zeros_like(x) for n in range(latent_guide.shape[1]): noise2[0][n] = (noise[0][n] - noise[0][n].mean()) noise2[0][n] = (noise2[0][n]) + latent_guide[0][n].mean() noise = (1 - lgw_mask) * noise + lgw_mask * noise2 xi[0] = alpha_ratio * xi[0] + noise * s_noise * sigma_up h_prev2 = h_prev h_prev = h return xi[0] ================================================ FILE: legacy/legacy_samplers.py ================================================ import torch import torch.nn.functional as F import comfy.samplers import comfy.sample import comfy.sampler_helpers import comfy.model_sampling import comfy.latent_formats import comfy.sd from comfy_extras.nodes_model_advanced import ModelSamplingSD3, ModelSamplingFlux, ModelSamplingAuraFlow, ModelSamplingStableCascade import comfy.supported_models import latent_preview from .noise_classes import NOISE_GENERATOR_NAMES, NOISE_GENERATOR_NAMES_SIMPLE, NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_CLASSES from .sigmas import get_sigmas from .helper import get_res4lyf_scheduler_list def initialize_or_scale(tensor, value, steps): if tensor is None: return torch.full((steps,), value) else: return value * tensor def move_to_same_device(*tensors): if not tensors: return tensors device = tensors[0].device return tuple(tensor.to(device) for tensor in tensors) RK_SAMPLER_NAMES = ["res_2m", "res_3m", "res_2s", "res_3s", "rk_exp_5s", "deis_2m", "deis_3m", "deis_4m", "ralston_2s", "ralston_3s", "ralston_4s", "dpmpp_2m", "dpmpp_3m", "dpmpp_2s", "dpmpp_sde_2s", "dpmpp_3s", "midpoint_2s", "heun_2s", "heun_3s", "houwen-wray_3s", "kutta_3s", "ssprk3_3s", "rk38_4s", "rk4_4s", "dormand-prince_6s", "dormand-prince_13s", "bogacki-shampine_7s", "ddim", "buehler", ] IRK_SAMPLER_NAMES = [ "gauss-legendre_2s", "gauss-legendre_3s", "gauss-legendre_4s", "gauss-legendre_5s", "radau_iia_2s", "radau_iia_3s", "lobatto_iiic_2s", "lobatto_iiic_3s", "crouzeix_2s", "crouzeix_3s", "irk_exp_diag_2s", "use_explicit", ] class Legacy_ClownsharKSampler: @classmethod def INPUT_TYPES(s): return {"required": {"model": ("MODEL",), #"add_noise": ("BOOLEAN", {"default": True}), "noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "brownian"}), "noise_mode_sde": (["hard", "hard_var", "hard_sq", "soft", "softer", "exp"], {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "eta": ("FLOAT", {"default": 0.25, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "noise_seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}), #"sampler_mode": (['standard', 'unsample', 'resample'],), "sampler_mode": (['standard', 'unsample', 'resample',],), "sampler_name": (RK_SAMPLER_NAMES, {"default": "res_2m"}), "implicit_sampler_name": (["default", "gauss-legendre_5s", "gauss-legendre_4s", "gauss-legendre_3s", "gauss-legendre_2s", "crouzeix_2s", "radau_iia_3s", "radau_iia_2s", "lobatto_iiic_3s", "lobatto_iiic_2s", ], {"default": "default"}), "scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},), "steps": ("INT", {"default": 30, "min": 1, "max": 10000}), "implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}), "denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "cfg": ("FLOAT", {"default": 5.0, "min": -100.0, "max": 100.0, "step":0.1, "round": False, }), "shift": ("FLOAT", {"default": 3.0, "min": -1.0, "max": 100.0, "step":0.1, "round": False, }), "base_shift": ("FLOAT", {"default": 0.85, "min": -1.0, "max": 100.0, "step":0.1, "round": False, }), "truncate_conditioning": (['false', 'true'], {"default": "true"}), }, "optional": { "positive": ("CONDITIONING", ), "negative": ("CONDITIONING", ), "sigmas": ("SIGMAS", ), "latent_image": ("LATENT", ), "guides": ("GUIDES", ), "options": ("OPTIONS", ), } } RETURN_TYPES = ("LATENT","LATENT", ) #"LATENT","LATENT") RETURN_NAMES = ("output", "denoised",) # "output_fp64", "denoised_fp64") FUNCTION = "main" CATEGORY = "RES4LYF/legacy/samplers" DEPRECATED = True def main(self, model, cfg, truncate_conditioning, sampler_mode, scheduler, steps, denoise=1.0, denoise_alt=1.0, noise_type_init="gaussian", noise_type_sde="brownian", noise_mode_sde="hard", latent_image=None, positive=None, negative=None, sigmas=None, latent_noise=None, latent_noise_match=None, noise_stdev=1.0, noise_mean=0.0, noise_normalize=True, noise_is_latent=False, eta=0.25, eta_var=0.0, d_noise=1.0, s_noise=1.0, alpha_init=-1.0, k_init=1.0, alpha_sde=-1.0, k_sde=1.0, cfgpp=0.0, c1=0.0, c2=0.5, c3=1.0, multistep=False, noise_seed=-1, sampler_name="res_2m", implicit_sampler_name="default", exp_mode=False, t_fn_formula=None, sigma_fn_formula=None, implicit_steps=0, latent_guide=None, latent_guide_inv=None, latent_guide_weight=0.0, guide_mode="blend", latent_guide_weights=None, latent_guide_mask=None, rescale_floor=True, sigmas_override=None, unsampler_type="linear", shift=3.0, base_shift=0.85, guides=None, options=None, ): default_dtype = torch.float64 max_steps = 10000 if noise_seed == -1: seed = torch.initial_seed() + 1 else: seed = noise_seed torch.manual_seed(noise_seed) noise_seed_sde = seed + 1 if options is not None: noise_stdev = options.get('noise_init_stdev', noise_stdev) noise_mean = options.get('noise_init_mean', noise_mean) noise_type_init = options.get('noise_type_init', noise_type_init) noise_type_sde = options.get('noise_type_sde', noise_type_sde) noise_mode_sde = options.get('noise_mode_sde', noise_mode_sde) eta = options.get('eta', eta) s_noise = options.get('s_noise', s_noise) d_noise = options.get('d_noise', d_noise) alpha_init = options.get('alpha_init', alpha_init) k_init = options.get('k_init', k_init) alpha_sde = options.get('alpha_sde', alpha_sde) k_sde = options.get('k_sde', k_sde) noise_seed_sde = options.get('noise_seed_sde', noise_seed+1) c1 = options.get('c1', c1) c2 = options.get('c2', c2) c3 = options.get('c3', c3) t_fn_formula = options.get('t_fn_formula', t_fn_formula) sigma_fn_formula = options.get('sigma_fn_formula', sigma_fn_formula) #unsampler_type = options.get('unsampler_type', unsampler_type) if guides is not None: guide_mode, rescale_floor, latent_guide_weight, latent_guide_weights, t_is, latent_guide, latent_guide_inv, latent_guide_mask, scheduler_, steps_, denoise_ = guides """if scheduler == "constant": latent_guide_weights = initialize_or_scale(latent_guide_weights, latent_guide_weight, max_steps).to(default_dtype) latent_guide_weights = F.pad(latent_guide_weights, (0, max_steps), value=0.0)""" if scheduler_ != "constant": latent_guide_weights = get_sigmas(model, scheduler_, steps_, denoise_).to(default_dtype) latent_guide_weights = initialize_or_scale(latent_guide_weights, latent_guide_weight, max_steps).to(default_dtype) latent_guide_weights = F.pad(latent_guide_weights, (0, max_steps), value=0.0) if shift >= 0: if isinstance(model.model.model_config, comfy.supported_models.SD3): model = ModelSamplingSD3().patch(model, shift)[0] elif isinstance(model.model.model_config, comfy.supported_models.AuraFlow): model = ModelSamplingAuraFlow().patch_aura(model, shift)[0] elif isinstance(model.model.model_config, comfy.supported_models.Stable_Cascade_C): model = ModelSamplingStableCascade().patch(model, shift)[0] if shift >= 0 and base_shift >= 0: if isinstance(model.model.model_config, comfy.supported_models.Flux) or isinstance(model.model.model_config, comfy.supported_models.FluxSchnell): model = ModelSamplingFlux().patch(model, shift, base_shift, latent_image['samples'].shape[3], latent_image['samples'].shape[2])[0] latent = latent_image latent_image_dtype = latent_image['samples'].dtype if positive is None: positive = [[ torch.zeros((1, 154, 4096)), {'pooled_output': torch.zeros((1, 2048))} ]] if negative is None: negative = [[ torch.zeros((1, 154, 4096)), {'pooled_output': torch.zeros((1, 2048))} ]] if denoise_alt < 0: d_noise = denoise_alt = -denoise_alt if options is not None: d_noise = options.get('d_noise', d_noise) if sigmas is not None: sigmas = sigmas.clone().to(default_dtype) else: sigmas = get_sigmas(model, scheduler, steps, denoise).to(default_dtype) sigmas *= denoise_alt if sampler_mode.startswith("unsample"): null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype) sigmas = torch.flip(sigmas, dims=[0]) sigmas = torch.cat([sigmas, null]) elif sampler_mode.startswith("resample"): null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype) sigmas = torch.cat([null, sigmas]) sigmas = torch.cat([sigmas, null]) if sampler_mode.startswith("unsample_"): unsampler_type = sampler_mode.split("_", 1)[1] elif sampler_mode.startswith("resample_"): unsampler_type = sampler_mode.split("_", 1)[1] else: unsampler_type = "" x = latent_image["samples"].clone().to(default_dtype) if latent_image is not None: if "samples_fp64" in latent_image: if latent_image['samples'].shape == latent_image['samples_fp64'].shape: if torch.norm(latent_image['samples'] - latent_image['samples_fp64']) < 0.01: x = latent_image["samples_fp64"].clone() if latent_noise is not None: latent_noise["samples"] = latent_noise["samples"].clone().to(default_dtype) if latent_noise_match is not None: latent_noise_match["samples"] = latent_noise_match["samples"].clone().to(default_dtype) if truncate_conditioning == "true" or truncate_conditioning == "true_and_zero_neg": if positive is not None: positive[0][0] = positive[0][0].clone().to(default_dtype) positive[0][1]["pooled_output"] = positive[0][1]["pooled_output"].clone().to(default_dtype) if negative is not None: negative[0][0] = negative[0][0].clone().to(default_dtype) negative[0][1]["pooled_output"] = negative[0][1]["pooled_output"].clone().to(default_dtype) c = [] for t in positive: d = t[1].copy() pooled_output = d.get("pooled_output", None) if pooled_output is not None: d["pooled_output"] = d["pooled_output"][:, :2048] n = [t[0][:, :154, :4096], d] c.append(n) positive = c c = [] for t in negative: d = t[1].copy() pooled_output = d.get("pooled_output", None) if pooled_output is not None: if truncate_conditioning == "true_and_zero_neg": d["pooled_output"] = torch.zeros((1,2048), dtype=t[0].dtype, device=t[0].device) n = [torch.zeros((1,154,4096), dtype=t[0].dtype, device=t[0].device), d] else: d["pooled_output"] = d["pooled_output"][:, :2048] n = [t[0][:, :154, :4096], d] c.append(n) negative = c sigmin = model.model.model_sampling.sigma_min sigmax = model.model.model_sampling.sigma_max if noise_type_init == "none": noise = torch.zeros_like(x) elif latent_noise is None: noise_sampler_init = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_type_init)(x=x, seed=seed, sigma_min=sigmin, sigma_max=sigmax) if noise_type_init == "fractal": noise_sampler_init.alpha = alpha_init noise_sampler_init.k = k_init noise_sampler_init.scale = 0.1 noise = noise_sampler_init(sigma=sigmax, sigma_next=sigmin) else: noise = latent_noise["samples"] if noise_is_latent: #add noise and latent together and normalize --> noise noise += x.cpu() noise.sub_(noise.mean()).div_(noise.std()) if noise_normalize and noise.std() > 0: noise.sub_(noise.mean()).div_(noise.std()) noise *= noise_stdev noise = (noise - noise.mean()) + noise_mean if latent_noise_match: for i in range(latent_noise_match["samples"].shape[1]): noise[0][i] = (noise[0][i] - noise[0][i].mean()) noise[0][i] = (noise[0][i]) + latent_noise_match["samples"][0][i].mean() noise_mask = latent["noise_mask"] if "noise_mask" in latent else None x0_output = {} callback = latent_preview.prepare_callback(model, sigmas.shape[-1] - 1, x0_output) disable_pbar = False if noise_type_sde == "none": eta_var = eta = 0.0 noise_type_sde = "gaussian" if noise_mode_sde == "hard_var": eta_var = eta eta = 0.0 if cfg < 0: cfgpp = -cfg cfg = 1.0 sampler = comfy.samplers.ksampler("legacy_rk", {"eta": eta, "eta_var": eta_var, "s_noise": s_noise, "d_noise": d_noise, "alpha": alpha_sde, "k": k_sde, "c1": c1, "c2": c2, "c3": c3, "cfgpp": cfgpp, "MULTISTEP": multistep, "noise_sampler_type": noise_type_sde, "noise_mode": noise_mode_sde, "noise_seed": noise_seed_sde, "rk_type": sampler_name, "implicit_sampler_name": implicit_sampler_name, "exp_mode": exp_mode, "t_fn_formula": t_fn_formula, "sigma_fn_formula": sigma_fn_formula, "implicit_steps": implicit_steps, "latent_guide": latent_guide, "latent_guide_inv": latent_guide_inv, "mask": latent_guide_mask, "latent_guide_weights": latent_guide_weights, "guide_mode": guide_mode, #"unsampler_type": unsampler_type, "LGW_MASK_RESCALE_MIN": rescale_floor, "sigmas_override": sigmas_override}) samples = comfy.sample.sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, x.clone(), noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed) out = latent.copy() out["samples"] = samples if "x0" in x0_output: out_denoised = latent.copy() out_denoised["samples"] = model.model.process_latent_out(x0_output["x0"].cpu()) else: out_denoised = out out["samples_fp64"] = out["samples"].clone() out["samples"] = out["samples"].to(latent_image_dtype) out_denoised["samples_fp64"] = out_denoised["samples"].clone() out_denoised["samples"] = out_denoised["samples"].to(latent_image_dtype) return ( out, out_denoised, ) class Legacy_SamplerRK: @classmethod def INPUT_TYPES(s): return {"required": {#"momentum": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), "eta": ("FLOAT", {"default": 0.25, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "eta_var": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculate variance-corrected noise amount (overrides eta/noise_mode settings). Cannot be used at very low sigma values; reverts to eta/noise_mode for final steps."}), "s_noise": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Ratio of calculated noise amount actually added after each step. >1.0 will leave extra noise behind, <1.0 will remove more noise than it adds."}), "d_noise": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Ratio of calculated noise amount actually added after each step. >1.0 will leave extra noise behind, <1.0 will remove more noise than it adds."}), "noise_mode": (["hard", "hard_sq", "soft", "softer", "exp"], {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "noise_sampler_type": (NOISE_GENERATOR_NAMES, {"default": "brownian"}), "alpha": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": False, "tooltip": "Fractal noise mode: <0 = extra high frequency noise, >0 = extra low frequency noise, 0 = white noise."}), "k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": False, "tooltip": "Fractal noise mode: all that matters is positive vs. negative. Effect unclear."}), "noise_seed": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff, "tooltip": "Seed for the SDE noise that is added after each step if eta or eta_var are non-zero. If set to -1, it will use the increment the seed most recently used by the workflow."}), "rk_type": (RK_SAMPLER_NAMES, {"default": "res_2m"}), "exp_mode": ("BOOLEAN", {"default": False, "tooltip": "Convert linear RK methods to exponential form."}), "multistep": ("BOOLEAN", {"default": False, "tooltip": "For samplers ending in S only. Reduces cost by one model call per step by reusing the previous step as the current predictor step."}), "implicit_steps": ("INT", {"default": 0, "min": 0, "max": 100, "step":1, "tooltip": "Number of implicit Runge-Kutta refinement steps to run after each explicit step."}), "cfgpp": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "CFG++ scale. Use in place of, or with, CFG. Currently only working with RES, DPMPP, and DDIM samplers."}), "latent_guide_weight": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), #"guide_mode": (["hard_light", "mean_std", "mean", "std", "noise_mean", "blend", "inversion"], {"default": 'mean', "tooltip": "The mode used. noise_mean and inversion are currently for test purposes only."}), "guide_mode": (["hard_light", "mean_std", "mean", "std", "blend",], {"default": 'mean', "tooltip": "The mode used. noise_mean and inversion are currently for test purposes only."}), #"guide_mode": (["hard_light", "blend", "mean_std", "mean", "std"], {"default": 'mean', "tooltip": "The mode used."}), "rescale_floor": ("BOOLEAN", {"default": True, "tooltip": "Latent_guide_weight(s) control the minimum value for the latent_guide_mask. If false, they control the maximum value."}), }, "optional": { "latent_guide": ("LATENT", ), "latent_guide_inv": ("LATENT", ), "latent_guide_mask": ("MASK", ), "latent_guide_weights": ("SIGMAS", ), "sigmas_override": ("SIGMAS", ), } } RETURN_TYPES = ("SAMPLER",) CATEGORY = "RES4LYF/legacy/samplers" FUNCTION = "get_sampler" DEPRECATED = True def get_sampler(self, eta=0.25, eta_var=0.0, d_noise=1.0, s_noise=1.0, alpha=-1.0, k=1.0, cfgpp=0.0, multistep=False, noise_sampler_type="brownian", noise_mode="hard", noise_seed=-1, rk_type="dormand-prince", exp_mode=False, t_fn_formula=None, sigma_fn_formula=None, implicit_steps=0, latent_guide=None, latent_guide_inv=None, latent_guide_weight=0.0, guide_mode="hard_light", latent_guide_weights=None, latent_guide_mask=None, rescale_floor=True, sigmas_override=None, ): sampler_name = "legacy_rk" if latent_guide is None and latent_guide_inv is None: latent_guide_weight = 0.0 steps = 10000 latent_guide_weights = initialize_or_scale(latent_guide_weights, latent_guide_weight, steps) latent_guide_weights = F.pad(latent_guide_weights, (0, 10000), value=0.0) sampler = comfy.samplers.ksampler(sampler_name, {"eta": eta, "eta_var": eta_var, "s_noise": s_noise, "d_noise": d_noise, "alpha": alpha, "k": k, "cfgpp": cfgpp, "MULTISTEP": multistep, "noise_sampler_type": noise_sampler_type, "noise_mode": noise_mode, "noise_seed": noise_seed, "rk_type": rk_type, "exp_mode": exp_mode, "t_fn_formula": t_fn_formula, "sigma_fn_formula": sigma_fn_formula, "implicit_steps": implicit_steps, "latent_guide": latent_guide, "latent_guide_inv": latent_guide_inv, "mask": latent_guide_mask, "latent_guide_weight": latent_guide_weight, "latent_guide_weights": latent_guide_weights, "guide_mode": guide_mode, "LGW_MASK_RESCALE_MIN": rescale_floor, "sigmas_override": sigmas_override}) return (sampler, ) class Legacy_ClownsharKSamplerGuides: @classmethod def INPUT_TYPES(s): return {"required": {"guide_mode": (["hard_light", "mean_std", "mean", "std", "blend"], {"default": 'blend', "tooltip": "The mode used."}), "latent_guide_weight": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), "scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "steps": ("INT", {"default": 30, "min": 1, "max": 10000}), "denoise": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), "rescale_floor": ("BOOLEAN", {"default": False, "tooltip": "If true, latent_guide_weight(s) primarily affect the masked areas. If false, they control the unmasked areas."}), }, "optional": { "latent_guide": ("LATENT", ), "latent_guide_inv": ("LATENT", ), "latent_guide_mask": ("MASK", ), "latent_guide_weights": ("SIGMAS", ), } } RETURN_TYPES = ("GUIDES",) CATEGORY = "RES4LYF/legacy/samplers" FUNCTION = "get_sampler" DEPRECATED = True def get_sampler(self, model=None, scheduler="constant", steps=30, denoise=1.0, latent_guide=None, latent_guide_inv=None, latent_guide_weight=0.0, guide_mode="blend", latent_guide_weights=None, latent_guide_mask=None, rescale_floor=True, t_is=None, ): default_dtype = torch.float64 max_steps = 10000 #if scheduler != "constant": # latent_guide_weights = get_sigmas(model, scheduler, steps, latent_guide_weight).to(default_dtype) if scheduler == "constant": latent_guide_weights = initialize_or_scale(None, latent_guide_weight, steps).to(default_dtype) latent_guide_weights = F.pad(latent_guide_weights, (0, max_steps), value=0.0) if latent_guide is not None: x = latent_guide["samples"].clone().to(default_dtype) if latent_guide_inv is not None: x = latent_guide_inv["samples"].clone().to(default_dtype) guides = (guide_mode, rescale_floor, latent_guide_weight, latent_guide_weights, t_is, latent_guide, latent_guide_inv, latent_guide_mask, scheduler, steps, denoise) return (guides, ) class Legacy_SharkSampler: @classmethod def INPUT_TYPES(s): return {"required": {"model": ("MODEL",), "add_noise": ("BOOLEAN", {"default": True}), "noise_normalize": ("BOOLEAN", {"default": True}), "noise_stdev": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }), "noise_mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }), "noise_is_latent": ("BOOLEAN", {"default": False}), "noise_type": (NOISE_GENERATOR_NAMES, {"default": "gaussian"}), "alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": False, }), "k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": False, }), "noise_seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}), "sampler_mode": (['standard', 'unsample', 'resample'],), #"scheduler": (comfy.samplers.SCHEDULER_NAMES, ), "scheduler": (get_res4lyf_scheduler_list(),), "steps": ("INT", {"default": 30, "min": 1, "max": 10000}), "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10000, "step":0.01}), "cfg": ("FLOAT", {"default": 5.0, "min": 0.0, "max": 100.0, "step":0.5, "round": False, }), "truncate_conditioning": (['false', 'true', 'true_and_zero_neg'], ), "positive": ("CONDITIONING", ), "negative": ("CONDITIONING", ), "sampler": ("SAMPLER", ), "latent_image": ("LATENT", ), }, "optional": { "sigmas": ("SIGMAS", ), "latent_noise": ("LATENT", ), "latent_noise_match": ("LATENT",), } } RETURN_TYPES = ("LATENT","LATENT","LATENT","LATENT") RETURN_NAMES = ("output", "denoised", "output_fp64", "denoised_fp64") FUNCTION = "main" CATEGORY = "RES4LYF/legacy/samplers" DEPRECATED = True def main(self, model, add_noise, noise_stdev, noise_mean, noise_normalize, noise_is_latent, noise_type, noise_seed, cfg, truncate_conditioning, alpha, k, positive, negative, sampler, latent_image, sampler_mode, scheduler, steps, denoise, sigmas=None, latent_noise=None, latent_noise_match=None,): latent = latent_image latent_image_dtype = latent_image['samples'].dtype default_dtype = torch.float64 if positive is None: positive = [[ torch.zeros((1, 154, 4096)), # blah[0][0], a tensor of shape (1, 154, 4096) {'pooled_output': torch.zeros((1, 2048))} ]] if negative is None: negative = [[ torch.zeros((1, 154, 4096)), # blah[0][0], a tensor of shape (1, 154, 4096) {'pooled_output': torch.zeros((1, 2048))} ]] if denoise < 0: sampler.extra_options['d_noise'] = -denoise denoise = 1.0 if sigmas is not None: sigmas = sigmas.clone().to(default_dtype) else: sigmas = get_sigmas(model, scheduler, steps, denoise).to(default_dtype) #sigmas = sigmas.clone().to(torch.float64) if sampler_mode == "unsample": null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype) sigmas = torch.flip(sigmas, dims=[0]) sigmas = torch.cat([sigmas, null]) elif sampler_mode == "resample": null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype) sigmas = torch.cat([null, sigmas]) sigmas = torch.cat([sigmas, null]) if latent_image is not None: x = latent_image["samples"].clone().to(default_dtype) #x = {"samples": x} if latent_noise is not None: latent_noise["samples"] = latent_noise["samples"].clone().to(default_dtype) if latent_noise_match is not None: latent_noise_match["samples"] = latent_noise_match["samples"].clone().to(default_dtype) if truncate_conditioning == "true" or truncate_conditioning == "true_and_zero_neg": if positive is not None: positive[0][0] = positive[0][0].clone().to(default_dtype) positive[0][1]["pooled_output"] = positive[0][1]["pooled_output"].clone().to(default_dtype) c = [] for t in positive: d = t[1].copy() pooled_output = d.get("pooled_output", None) if pooled_output is not None: d["pooled_output"] = d["pooled_output"][:, :2048] n = [t[0][:, :154, :4096], d] c.append(n) positive = c c = [] for t in negative: if negative is not None: negative[0][0] = negative[0][0].clone().to(default_dtype) negative[0][1]["pooled_output"] = negative[0][1]["pooled_output"].clone().to(default_dtype) d = t[1].copy() pooled_output = d.get("pooled_output", None) if pooled_output is not None: if truncate_conditioning == "true_and_zero_neg": d["pooled_output"] = torch.zeros((1,2048), dtype=t[0].dtype, device=t[0].device) n = [torch.zeros((1,154,4096), dtype=t[0].dtype, device=t[0].device), d] else: d["pooled_output"] = d["pooled_output"][:, :2048] n = [t[0][:, :154, :4096], d] c.append(n) negative = c sigmin = model.model.model_sampling.sigma_min sigmax = model.model.model_sampling.sigma_max if noise_seed == -1: seed = torch.initial_seed() + 1 else: seed = noise_seed torch.manual_seed(noise_seed) noise_sampler = NOISE_GENERATOR_CLASSES.get(noise_type)(x=x, seed=seed, sigma_min=sigmin, sigma_max=sigmax) if noise_type == "fractal": noise_sampler.alpha = alpha noise_sampler.k = k noise_sampler.scale = 0.1 if not add_noise: noise = torch.zeros_like(x) elif latent_noise is None: noise = noise_sampler(sigma=sigmax, sigma_next=sigmin) else: noise = latent_noise["samples"] if noise_is_latent: #add noise and latent together and normalize --> noise noise += x.cpu() noise.sub_(noise.mean()).div_(noise.std()) if noise_normalize: noise.sub_(noise.mean()).div_(noise.std()) noise *= noise_stdev noise = (noise - noise.mean()) + noise_mean if latent_noise_match: for i in range(latent_noise_match["samples"].shape[1]): noise[0][i] = (noise[0][i] - noise[0][i].mean()) noise[0][i] = (noise[0][i]) + latent_noise_match["samples"][0][i].mean() noise_mask = latent["noise_mask"] if "noise_mask" in latent else None x0_output = {} callback = latent_preview.prepare_callback(model, sigmas.shape[-1] - 1, x0_output) disable_pbar = False samples = comfy.sample.sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, x, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed) out = latent.copy() out["samples"] = samples if "x0" in x0_output: out_denoised = latent.copy() out_denoised["samples"] = model.model.process_latent_out(x0_output["x0"].cpu()) else: out_denoised = out out_orig_dtype = out['samples'].clone().to(latent_image_dtype) out_denoised_orig_dtype = out_denoised['samples'].clone().to(latent_image_dtype) return ( {'samples': out_orig_dtype}, {'samples': out_denoised_orig_dtype}, out, out_denoised,) ================================================ FILE: legacy/models.py ================================================ # Code adapted from https://github.com/comfyanonymous/ComfyUI/ import comfy.samplers import comfy.sample import comfy.sampler_helpers import comfy.utils from comfy.cli_args import args from comfy_extras.nodes_model_advanced import ModelSamplingSD3, ModelSamplingFlux, ModelSamplingAuraFlow, ModelSamplingStableCascade import torch import folder_paths import os import json import math import comfy.model_management from .flux.model import ReFlux from .flux.layers import SingleStreamBlock as ReSingleStreamBlock, DoubleStreamBlock as ReDoubleStreamBlock from comfy.ldm.flux.model import Flux from comfy.ldm.flux.layers import SingleStreamBlock, DoubleStreamBlock from .helper import get_orthogonal, get_cosine_similarity from ..res4lyf import RESplain class ReFluxPatcher: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "enable": ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" def main(self, model, enable=True): m = model #.clone() if enable: m.model.diffusion_model.__class__ = ReFlux m.model.diffusion_model.threshold_inv = False for i, block in enumerate(m.model.diffusion_model.double_blocks): block.__class__ = ReDoubleStreamBlock block.idx = i for i, block in enumerate(m.model.diffusion_model.single_blocks): block.__class__ = ReSingleStreamBlock block.idx = i else: m.model.diffusion_model.__class__ = Flux for i, block in enumerate(m.model.diffusion_model.double_blocks): block.__class__ = DoubleStreamBlock block.idx = i for i, block in enumerate(m.model.diffusion_model.single_blocks): block.__class__ = SingleStreamBlock block.idx = i return (m,) import types class FluxOrthoCFGPatcher: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "enable": ("BOOLEAN", {"default": True}), "ortho_T5": ("BOOLEAN", {"default": True}), "ortho_clip_L": ("BOOLEAN", {"default": True}), "zero_clip_L": ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" original_forward = Flux.forward @staticmethod def new_forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs): for _ in range(500): if self.ortho_T5 and get_cosine_similarity(context[0], context[1]) != 0: context[0] = get_orthogonal(context[0], context[1]) if self.ortho_clip_L and get_cosine_similarity(y[0], y[1]) != 0: y[0] = get_orthogonal(y[0].unsqueeze(0), y[1].unsqueeze(0)).squeeze(0) RESplain("postcossim1: ", get_cosine_similarity(context[0], context[1])) RESplain("postcossim2: ", get_cosine_similarity(y[0], y[1])) if self.zero_clip_L: y[0] = torch.zeros_like(y[0]) return FluxOrthoCFGPatcher.original_forward(self, x, timestep, context, y, guidance, control, transformer_options, **kwargs) def main(self, model, enable=True, ortho_T5=True, ortho_clip_L=True, zero_clip_L=True): m = model.clone() if enable: m.model.diffusion_model.ortho_T5 = ortho_T5 m.model.diffusion_model.ortho_clip_L = ortho_clip_L m.model.diffusion_model.zero_clip_L = zero_clip_L Flux.forward = types.MethodType(FluxOrthoCFGPatcher.new_forward, m.model.diffusion_model) else: Flux.forward = FluxOrthoCFGPatcher.original_forward return (m,) class FluxGuidanceDisable: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "disable": ("BOOLEAN", {"default": True}), "zero_clip_L": ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) FUNCTION = "main" CATEGORY = "RES4LYF/model_patches" original_forward = Flux.forward @staticmethod def new_forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs): y = torch.zeros_like(y) return FluxGuidanceDisable.original_forward(self, x, timestep, context, y, guidance, control, transformer_options, **kwargs) def main(self, model, disable=True, zero_clip_L=True): m = model.clone() if disable: m.model.diffusion_model.params.guidance_embed = False else: m.model.diffusion_model.params.guidance_embed = True #m.model.diffusion_model.zero_clip_L = zero_clip_L if zero_clip_L: Flux.forward = types.MethodType(FluxGuidanceDisable.new_forward, m.model.diffusion_model) return (m,) def time_snr_shift_exponential(alpha, t): return math.exp(alpha) / (math.exp(alpha) + (1 / t - 1) ** 1.0) def time_snr_shift_linear(alpha, t): if alpha == 1.0: return t return alpha * t / (1 + (alpha - 1) * t) class ModelSamplingAdvanced: # this is used to set the "shift" using either exponential scaling (default for SD3.5M and Flux) or linear scaling (default for SD3.5L and SD3 2B beta) @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "scaling": (["exponential", "linear"], {"default": 'exponential'}), "shift": ("FLOAT", {"default": 3.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), #"base_shift": ("FLOAT", {"default": 3.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) FUNCTION = "main" CATEGORY = "RES4LYF/model_shift" def sigma_exponential(self, timestep): return time_snr_shift_exponential(self.timestep_shift, timestep / self.multiplier) def sigma_linear(self, timestep): return time_snr_shift_linear(self.timestep_shift, timestep / self.multiplier) def main(self, model, scaling, shift): m = model.clone() self.timestep_shift = shift self.multiplier = 1000 timesteps = 1000 sampling_base = None if isinstance(m.model.model_config, comfy.supported_models.Flux) or isinstance(m.model.model_config, comfy.supported_models.FluxSchnell): self.multiplier = 1 timesteps = 10000 sampling_base = comfy.model_sampling.ModelSamplingFlux sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.AuraFlow): self.multiplier = 1 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.HunyuanVideo): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.CosmosT2V) or isinstance(m.model.model_config, comfy.supported_models.CosmosI2V): self.multiplier = 1 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingContinuousEDM sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.LTXV): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingFlux sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.SD3): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST if sampling_base is None: raise ValueError("Model not supported by ModelSamplingAdvanced") class ModelSamplingAdvanced(sampling_base, sampling_type): pass m.object_patches['model_sampling'] = m.model.model_sampling = ModelSamplingAdvanced(m.model.model_config) m.model.model_sampling.__dict__['shift'] = self.timestep_shift m.model.model_sampling.__dict__['multiplier'] = self.multiplier s_range = torch.arange(1, timesteps + 1, 1).to(torch.float64) if scaling == "exponential": ts = self.sigma_exponential((s_range / timesteps) * self.multiplier) elif scaling == "linear": ts = self.sigma_linear((s_range / timesteps) * self.multiplier) m.model.model_sampling.register_buffer('sigmas', ts) m.object_patches['model_sampling'].sigmas = m.model.model_sampling.sigmas return (m,) class ModelSamplingAdvancedResolution: # this is used to set the "shift" using either exponential scaling (default for SD3.5M and Flux) or linear scaling (default for SD3.5L and SD3 2B beta) @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "scaling": (["exponential", "linear"], {"default": 'exponential'}), "max_shift": ("FLOAT", {"default": 1.35, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), "base_shift": ("FLOAT", {"default": 0.85, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), "latent_image": ("LATENT",), } } RETURN_TYPES = ("MODEL",) FUNCTION = "main" CATEGORY = "RES4LYF/model_shift" def sigma_exponential(self, timestep): return time_snr_shift_exponential(self.timestep_shift, timestep / self.multiplier) def sigma_linear(self, timestep): return time_snr_shift_linear(self.timestep_shift, timestep / self.multiplier) def main(self, model, scaling, max_shift, base_shift, latent_image): m = model.clone() height, width = latent_image['samples'].shape[2:] x1 = 256 x2 = 4096 mm = (max_shift - base_shift) / (x2 - x1) b = base_shift - mm * x1 shift = (width * height / (8 * 8 * 2 * 2)) * mm + b self.timestep_shift = shift self.multiplier = 1000 timesteps = 1000 if isinstance(m.model.model_config, comfy.supported_models.Flux) or isinstance(m.model.model_config, comfy.supported_models.FluxSchnell): self.multiplier = 1 timesteps = 10000 sampling_base = comfy.model_sampling.ModelSamplingFlux sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.AuraFlow): self.multiplier = 1 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.SD3): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST class ModelSamplingAdvanced(sampling_base, sampling_type): pass m.object_patches['model_sampling'] = m.model.model_sampling = ModelSamplingAdvanced(m.model.model_config) m.model.model_sampling.__dict__['shift'] = self.timestep_shift m.model.model_sampling.__dict__['multiplier'] = self.multiplier s_range = torch.arange(1, timesteps + 1, 1).to(torch.float64) if scaling == "exponential": ts = self.sigma_exponential((s_range / timesteps) * self.multiplier) elif scaling == "linear": ts = self.sigma_linear((s_range / timesteps) * self.multiplier) m.model.model_sampling.register_buffer('sigmas', ts) m.object_patches['model_sampling'].sigmas = m.model.model_sampling.sigmas return (m,) class UNetSave: def __init__(self): self.output_dir = folder_paths.get_output_directory() @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "filename_prefix": ("STRING", {"default": "models/ComfyUI"}),}, "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},} RETURN_TYPES = () FUNCTION = "save" OUTPUT_NODE = True CATEGORY = "RES4LYF/model_merging" DESCRIPTION = "Save a .safetensors containing only the model data." def save(self, model, filename_prefix, prompt=None, extra_pnginfo=None): save_checkpoint(model, clip=None, vae=None, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo) return {} def save_checkpoint(model, clip=None, vae=None, clip_vision=None, filename_prefix=None, output_dir=None, prompt=None, extra_pnginfo=None): full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, output_dir) prompt_info = "" if prompt is not None: prompt_info = json.dumps(prompt) metadata = {} enable_modelspec = True if isinstance(model.model, comfy.model_base.SDXL): if isinstance(model.model, comfy.model_base.SDXL_instructpix2pix): metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-edit" else: metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-base" elif isinstance(model.model, comfy.model_base.SDXLRefiner): metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-refiner" elif isinstance(model.model, comfy.model_base.SVD_img2vid): metadata["modelspec.architecture"] = "stable-video-diffusion-img2vid-v1" elif isinstance(model.model, comfy.model_base.SD3): metadata["modelspec.architecture"] = "stable-diffusion-v3-medium" #TODO: other SD3 variants else: enable_modelspec = False if enable_modelspec: metadata["modelspec.sai_model_spec"] = "1.0.0" metadata["modelspec.implementation"] = "sgm" metadata["modelspec.title"] = "{} {}".format(filename, counter) #TODO: # "stable-diffusion-v1", "stable-diffusion-v1-inpainting", "stable-diffusion-v2-512", # "stable-diffusion-v2-768-v", "stable-diffusion-v2-unclip-l", "stable-diffusion-v2-unclip-h", # "v2-inpainting" extra_keys = {} model_sampling = model.get_model_object("model_sampling") if isinstance(model_sampling, comfy.model_sampling.ModelSamplingContinuousEDM): if isinstance(model_sampling, comfy.model_sampling.V_PREDICTION): extra_keys["edm_vpred.sigma_max"] = torch.tensor(model_sampling.sigma_max).float() extra_keys["edm_vpred.sigma_min"] = torch.tensor(model_sampling.sigma_min).float() if model.model.model_type == comfy.model_base.ModelType.EPS: metadata["modelspec.predict_key"] = "epsilon" elif model.model.model_type == comfy.model_base.ModelType.V_PREDICTION: metadata["modelspec.predict_key"] = "v" if not args.disable_metadata: metadata["prompt"] = prompt_info if extra_pnginfo is not None: for x in extra_pnginfo: metadata[x] = json.dumps(extra_pnginfo[x]) output_checkpoint = f"{filename}_{counter:05}_.safetensors" output_checkpoint = os.path.join(full_output_folder, output_checkpoint) sd_save_checkpoint(output_checkpoint, model, clip, vae, clip_vision, metadata=metadata, extra_keys=extra_keys) def sd_save_checkpoint(output_path, model, clip=None, vae=None, clip_vision=None, metadata=None, extra_keys={}): clip_sd = None load_models = [model] if clip is not None: load_models.append(clip.load_model()) clip_sd = clip.get_sd() comfy.model_management.load_models_gpu(load_models, force_patch_weights=True) clip_vision_sd = clip_vision.get_sd() if clip_vision is not None else None vae_sd = vae.get_sd() if vae is not None else None #THIS ALLOWS SAVING UNET ONLY sd = model.model.state_dict_for_saving(clip_sd, vae_sd, clip_vision_sd) for k in extra_keys: sd[k] = extra_keys[k] for k in sd: t = sd[k] if not t.is_contiguous(): sd[k] = t.contiguous() comfy.utils.save_torch_file(sd, output_path, metadata=metadata) class TorchCompileModelFluxAdvanced: #adapted from https://github.com/kijai/ComfyUI-KJNodes def __init__(self): self._compiled = False @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "backend": (["inductor", "cudagraphs"],), "fullgraph": ("BOOLEAN", {"default": False, "tooltip": "Enable full graph mode"}), "mode": (["default", "max-autotune", "max-autotune-no-cudagraphs", "reduce-overhead"], {"default": "default"}), "double_blocks": ("STRING", {"default": "0-18", "multiline": True}), "single_blocks": ("STRING", {"default": "0-37", "multiline": True}), "dynamic": ("BOOLEAN", {"default": False, "tooltip": "Enable dynamic mode"}), }} RETURN_TYPES = ("MODEL",) FUNCTION = "patch" CATEGORY = "RES4LYF/model_patches" EXPERIMENTAL = True def parse_blocks(self, blocks_str): blocks = [] for part in blocks_str.split(','): part = part.strip() if '-' in part: start, end = map(int, part.split('-')) blocks.extend(range(start, end + 1)) else: blocks.append(int(part)) return blocks def patch(self, model, backend, mode, fullgraph, single_blocks, double_blocks, dynamic): single_block_list = self.parse_blocks(single_blocks) double_block_list = self.parse_blocks(double_blocks) m = model.clone() diffusion_model = m.get_model_object("diffusion_model") if not self._compiled: try: for i, block in enumerate(diffusion_model.double_blocks): if i in double_block_list: #print("Compiling double_block", i) m.add_object_patch(f"diffusion_model.double_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) for i, block in enumerate(diffusion_model.single_blocks): if i in single_block_list: #print("Compiling single block", i) m.add_object_patch(f"diffusion_model.single_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True compile_settings = { "backend": backend, "mode": mode, "fullgraph": fullgraph, "dynamic": dynamic, } setattr(m.model, "compile_settings", compile_settings) except: raise RuntimeError("Failed to compile model") return (m, ) # rest of the layers that are not patched # diffusion_model.final_layer = torch.compile(diffusion_model.final_layer, mode=mode, fullgraph=fullgraph, backend=backend) # diffusion_model.guidance_in = torch.compile(diffusion_model.guidance_in, mode=mode, fullgraph=fullgraph, backend=backend) # diffusion_model.img_in = torch.compile(diffusion_model.img_in, mode=mode, fullgraph=fullgraph, backend=backend) # diffusion_model.time_in = torch.compile(diffusion_model.time_in, mode=mode, fullgraph=fullgraph, backend=backend) # diffusion_model.txt_in = torch.compile(diffusion_model.txt_in, mode=mode, fullgraph=fullgraph, backend=backend) # diffusion_model.vector_in = torch.compile(diffusion_model.vector_in, mode=mode, fullgraph=fullgraph, backend=backend) ================================================ FILE: legacy/noise_classes.py ================================================ import torch from torch import nn, Tensor, Generator, lerp from torch.nn.functional import unfold import torch.nn.functional as F from typing import Callable, Tuple from math import pi from comfy.k_diffusion.sampling import BrownianTreeNoiseSampler from torch.distributions import StudentT, Laplace import numpy as np import pywt import functools from ..res4lyf import RESplain # Set this to "True" if you have installed OpenSimplex. Recommended to install without dependencies due to conflicting packages: pip3 install opensimplex --no-deps OPENSIMPLEX_ENABLE = False if OPENSIMPLEX_ENABLE: from opensimplex import OpenSimplex class PrecisionTool: def __init__(self, cast_type='fp64'): self.cast_type = cast_type def cast_tensor(self, func): @functools.wraps(func) def wrapper(*args, **kwargs): if self.cast_type not in ['fp64', 'fp32', 'fp16']: return func(*args, **kwargs) target_device = None for arg in args: if torch.is_tensor(arg): target_device = arg.device break if target_device is None: for v in kwargs.values(): if torch.is_tensor(v): target_device = v.device break # recursively zs_recast tensors in nested dictionaries def cast_and_move_to_device(data): if torch.is_tensor(data): if self.cast_type == 'fp64': return data.to(torch.float64).to(target_device) elif self.cast_type == 'fp32': return data.to(torch.float32).to(target_device) elif self.cast_type == 'fp16': return data.to(torch.float16).to(target_device) elif isinstance(data, dict): return {k: cast_and_move_to_device(v) for k, v in data.items()} return data new_args = [cast_and_move_to_device(arg) for arg in args] new_kwargs = {k: cast_and_move_to_device(v) for k, v in kwargs.items()} return func(*new_args, **new_kwargs) return wrapper def set_cast_type(self, new_value): if new_value in ['fp64', 'fp32', 'fp16']: self.cast_type = new_value else: self.cast_type = 'fp64' precision_tool = PrecisionTool(cast_type='fp64') def noise_generator_factory(cls, **fixed_params): def create_instance(**kwargs): params = {**fixed_params, **kwargs} return cls(**params) return create_instance def like(x): return {'size': x.shape, 'dtype': x.dtype, 'layout': x.layout, 'device': x.device} def scale_to_range(x, scaled_min = -1.73, scaled_max = 1.73): #1.73 is roughly the square root of 3 return scaled_min + (x - x.min()) * (scaled_max - scaled_min) / (x.max() - x.min()) def normalize(x): return (x - x.mean())/ x.std() class NoiseGenerator: def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None): self.seed = seed if x is not None: self.x = x self.size = x.shape self.dtype = x.dtype self.layout = x.layout self.device = x.device else: self.x = torch.zeros(size, dtype, layout, device) # allow overriding parameters imported from latent 'x' if specified if size is not None: self.size = size if dtype is not None: self.dtype = dtype if layout is not None: self.layout = layout if device is not None: self.device = device self.sigma_max = sigma_max.to(device) if isinstance(sigma_max, torch.Tensor) else sigma_max self.sigma_min = sigma_min.to(device) if isinstance(sigma_min, torch.Tensor) else sigma_min self.last_seed = seed if generator is None: self.generator = torch.Generator(device=self.device).manual_seed(seed) else: self.generator = generator def __call__(self): raise NotImplementedError("This method got clownsharked!") def update(self, **kwargs): if not isinstance(self, BrownianNoiseGenerator): self.last_seed += 1 updated_values = [] for attribute_name, value in kwargs.items(): if value is not None: setattr(self, attribute_name, value) updated_values.append(getattr(self, attribute_name)) return tuple(updated_values) class BrownianNoiseGenerator(NoiseGenerator): def __call__(self, *, sigma=None, sigma_next=None, **kwargs): return BrownianTreeNoiseSampler(self.x, self.sigma_min, self.sigma_max, seed=self.seed, cpu = self.device.type=='cpu')(sigma, sigma_next) class FractalNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, alpha=0.0, k=1.0, scale=0.1): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(alpha=alpha, k=k, scale=scale) def __call__(self, *, alpha=None, k=None, scale=None, **kwargs): self.update(alpha=alpha, k=k, scale=scale) if len(self.size) == 5: b, c, t, h, w = self.size else: b, c, h, w = self.size noise = torch.normal(mean=0.0, std=1.0, size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) y_freq = torch.fft.fftfreq(h, 1/h, device=self.device) x_freq = torch.fft.fftfreq(w, 1/w, device=self.device) if len(self.size) == 5: t_freq = torch.fft.fftfreq(t, 1/t, device=self.device) freq = torch.sqrt(y_freq[:, None, None]**2 + x_freq[None, :, None]**2 + t_freq[None, None, :]**2).clamp(min=1e-10) else: freq = torch.sqrt(y_freq[:, None]**2 + x_freq[None, :]**2).clamp(min=1e-10) spectral_density = self.k / torch.pow(freq, self.alpha * self.scale) spectral_density[0, 0] = 0 noise_fft = torch.fft.fftn(noise) modified_fft = noise_fft * spectral_density noise = torch.fft.ifftn(modified_fft).real return noise / torch.std(noise) class SimplexNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, scale=0.01): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.noise = OpenSimplex(seed=seed) self.scale = scale def __call__(self, *, scale=None, **kwargs): self.update(scale=scale) if len(self.size) == 5: b, c, t, h, w = self.size else: b, c, h, w = self.size noise_array = self.noise.noise3array(np.arange(w),np.arange(h),np.arange(c)) self.noise = OpenSimplex(seed=self.noise.get_seed()+1) noise_tensor = torch.from_numpy(noise_array).to(self.device) noise_tensor = torch.unsqueeze(noise_tensor, dim=0) if len(self.size) == 5: noise_tensor = torch.unsqueeze(noise_tensor, dim=0) return noise_tensor / noise_tensor.std() #return normalize(scale_to_range(noise_tensor)) class HiresPyramidNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, discount=0.7, mode='nearest-exact'): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(discount=discount, mode=mode) def __call__(self, *, discount=None, mode=None, **kwargs): self.update(discount=discount, mode=mode) if len(self.size) == 5: b, c, t, h, w = self.size orig_h, orig_w, orig_t = h, w, t u = nn.Upsample(size=(orig_h, orig_w, orig_t), mode=self.mode).to(self.device) else: b, c, h, w = self.size orig_h, orig_w = h, w orig_t = t = 1 u = nn.Upsample(size=(orig_h, orig_w), mode=self.mode).to(self.device) noise = ((torch.rand(size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) - 0.5) * 2 * 1.73) for i in range(4): r = torch.rand(1, device=self.device, generator=self.generator).item() * 2 + 2 h, w = min(orig_h * 15, int(h * (r ** i))), min(orig_w * 15, int(w * (r ** i))) if len(self.size) == 5: t = min(orig_t * 15, int(t * (r ** i))) new_noise = torch.randn((b, c, t, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) else: new_noise = torch.randn((b, c, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) upsampled_noise = u(new_noise) noise += upsampled_noise * self.discount ** i if h >= orig_h * 15 or w >= orig_w * 15 or t >= orig_t * 15: break # if resolution is too high return noise / noise.std() class PyramidNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, discount=0.8, mode='nearest-exact'): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(discount=discount, mode=mode) def __call__(self, *, discount=None, mode=None, **kwargs): self.update(discount=discount, mode=mode) x = torch.zeros(self.size, dtype=self.dtype, layout=self.layout, device=self.device) if len(self.size) == 5: b, c, t, h, w = self.size orig_h, orig_w, orig_t = h, w, t else: b, c, h, w = self.size orig_h, orig_w = h, w r = 1 for i in range(5): r *= 2 if len(self.size) == 5: scaledSize = (b, c, t * r, h * r, w * r) origSize = (orig_h, orig_w, orig_t) else: scaledSize = (b, c, h * r, w * r) origSize = (orig_h, orig_w) x += torch.nn.functional.interpolate( torch.normal(mean=0, std=0.5 ** i, size=scaledSize, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator), size=origSize, mode=self.mode ) * self.discount ** i return x / x.std() class InterpolatedPyramidNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, discount=0.7, mode='nearest-exact'): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(discount=discount, mode=mode) def __call__(self, *, discount=None, mode=None, **kwargs): self.update(discount=discount, mode=mode) if len(self.size) == 5: b, c, t, h, w = self.size orig_t, orig_h, orig_w = t, h, w else: b, c, h, w = self.size orig_h, orig_w = h, w t = orig_t = 1 noise = ((torch.rand(size=self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) - 0.5) * 2 * 1.73) multipliers = [1] for i in range(4): r = torch.rand(1, device=self.device, generator=self.generator).item() * 2 + 2 h, w = min(orig_h * 15, int(h * (r ** i))), min(orig_w * 15, int(w * (r ** i))) if len(self.size) == 5: t = min(orig_t * 15, int(t * (r ** i))) new_noise = torch.randn((b, c, t, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) upsampled_noise = nn.functional.interpolate(new_noise, size=(orig_t, orig_h, orig_w), mode=self.mode) else: new_noise = torch.randn((b, c, h, w), dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) upsampled_noise = nn.functional.interpolate(new_noise, size=(orig_h, orig_w), mode=self.mode) noise += upsampled_noise * self.discount ** i multipliers.append(self.discount ** i) if h >= orig_h * 15 or w >= orig_w * 15 or (len(self.size) == 5 and t >= orig_t * 15): break # if resolution is too high noise = noise / sum([m ** 2 for m in multipliers]) ** 0.5 return noise / noise.std() class CascadeBPyramidNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, levels=10, mode='nearest', size_range=[1,16]): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(epsilon=x, levels=levels, mode=mode, size_range=size_range) def __call__(self, *, levels=10, mode='nearest', size_range=[1,16], **kwargs): self.update(levels=levels, mode=mode) b, c, h, w = self.size epsilon = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) multipliers = [1] for i in range(1, levels): m = 0.75 ** i h, w = int(epsilon.size(-2) // (2 ** i)), int(epsilon.size(-2) // (2 ** i)) if size_range is None or (size_range[0] <= h <= size_range[1] or size_range[0] <= w <= size_range[1]): offset = torch.randn(epsilon.size(0), epsilon.size(1), h, w, device=self.device, generator=self.generator) epsilon = epsilon + torch.nn.functional.interpolate(offset, size=epsilon.shape[-2:], mode=self.mode) * m multipliers.append(m) if h <= 1 or w <= 1: break epsilon = epsilon / sum([m ** 2 for m in multipliers]) ** 0.5 #divides the epsilon tensor by the square root of the sum of the squared multipliers. return epsilon class UniformNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, mean=0.0, scale=1.73): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(mean=mean, scale=scale) def __call__(self, *, mean=None, scale=None, **kwargs): self.update(mean=mean, scale=scale) noise = torch.rand(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) return self.scale * 2 * (noise - 0.5) + self.mean class GaussianNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, mean=0.0, std=1.0): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(mean=mean, std=std) def __call__(self, *, mean=None, std=None, **kwargs): self.update(mean=mean, std=std) noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) return (noise - noise.mean()) / noise.std() class GaussianBackwardsNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, mean=0.0, std=1.0): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(mean=mean, std=std) def __call__(self, *, mean=None, std=None, **kwargs): self.update(mean=mean, std=std) RESplain("GaussianBackwards last seed:", self.generator.initial_seed()) self.generator.manual_seed(self.generator.initial_seed() - 1) noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) return (noise - noise.mean()) / noise.std() class LaplacianNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, loc=0, scale=1.0): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(loc=loc, scale=scale) def __call__(self, *, loc=None, scale=None, **kwargs): self.update(loc=loc, scale=scale) # b, c, h, w = self.size # orig_h, orig_w = h, w noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 4.0 rng_state = torch.random.get_rng_state() torch.manual_seed(self.generator.initial_seed()) laplacian_noise = Laplace(loc=self.loc, scale=self.scale).rsample(self.size).to(self.device) self.generator.manual_seed(self.generator.initial_seed() + 1) torch.random.set_rng_state(rng_state) noise += laplacian_noise return noise / noise.std() class StudentTNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, loc=0, scale=0.2, df=1): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(loc=loc, scale=scale, df=df) def __call__(self, *, loc=None, scale=None, df=None, **kwargs): self.update(loc=loc, scale=scale, df=df) # b, c, h, w = self.size # orig_h, orig_w = h, w rng_state = torch.random.get_rng_state() torch.manual_seed(self.generator.initial_seed()) noise = StudentT(loc=self.loc, scale=self.scale, df=self.df).rsample(self.size) s = torch.quantile(noise.flatten(start_dim=1).abs(), 0.75, dim=-1) if len(self.size) == 5: s = s.reshape(*s.shape, 1, 1, 1, 1) else: s = s.reshape(*s.shape, 1, 1, 1) noise = noise.clamp(-s, s) noise_latent = torch.copysign(torch.pow(torch.abs(noise), 0.5), noise).to(self.device) self.generator.manual_seed(self.generator.initial_seed() + 1) torch.random.set_rng_state(rng_state) return (noise_latent - noise_latent.mean()) / noise_latent.std() class WaveletNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, wavelet='haar'): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(wavelet=wavelet) def __call__(self, *, wavelet=None, **kwargs): self.update(wavelet=wavelet) # b, c, h, w = self.size # orig_h, orig_w = h, w # noise for spatial dimensions only coeffs = pywt.wavedecn(torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator).to(self.device), wavelet=self.wavelet, mode='periodization') noise = pywt.waverecn(coeffs, wavelet=self.wavelet, mode='periodization') noise_tensor = torch.tensor(noise, dtype=self.dtype, device=self.device) noise_tensor = (noise_tensor - noise_tensor.mean()) / noise_tensor.std() return noise_tensor class PerlinNoiseGenerator(NoiseGenerator): def __init__(self, x=None, size=None, dtype=None, layout=None, device=None, seed=42, generator=None, sigma_min=None, sigma_max=None, detail=0.0): super().__init__(x, size, dtype, layout, device, seed, generator, sigma_min, sigma_max) self.update(detail=detail) @staticmethod def get_positions(block_shape: Tuple[int, int]) -> Tensor: bh, bw = block_shape positions = torch.stack( torch.meshgrid( [(torch.arange(b) + 0.5) / b for b in (bw, bh)], indexing="xy", ), -1, ).view(1, bh, bw, 1, 1, 2) return positions @staticmethod def unfold_grid(vectors: Tensor) -> Tensor: batch_size, _, gpy, gpx = vectors.shape return ( unfold(vectors, (2, 2)) .view(batch_size, 2, 4, -1) .permute(0, 2, 3, 1) .view(batch_size, 4, gpy - 1, gpx - 1, 2) ) @staticmethod def smooth_step(t: Tensor) -> Tensor: return t * t * (3.0 - 2.0 * t) @staticmethod def perlin_noise_tensor( self, vectors: Tensor, positions: Tensor, step: Callable = None ) -> Tensor: if step is None: step = self.smooth_step batch_size = vectors.shape[0] # grid height, grid width gh, gw = vectors.shape[2:4] # block height, block width bh, bw = positions.shape[1:3] for i in range(2): if positions.shape[i + 3] not in (1, vectors.shape[i + 2]): raise Exception( f"Blocks shapes do not match: vectors ({vectors.shape[1]}, {vectors.shape[2]}), positions {gh}, {gw})" ) if positions.shape[0] not in (1, batch_size): raise Exception( f"Batch sizes do not match: vectors ({vectors.shape[0]}), positions ({positions.shape[0]})" ) vectors = vectors.view(batch_size, 4, 1, gh * gw, 2) positions = positions.view(positions.shape[0], bh * bw, -1, 2) step_x = step(positions[..., 0]) step_y = step(positions[..., 1]) row0 = lerp( (vectors[:, 0] * positions).sum(dim=-1), (vectors[:, 1] * (positions - positions.new_tensor((1, 0)))).sum(dim=-1), step_x, ) row1 = lerp( (vectors[:, 2] * (positions - positions.new_tensor((0, 1)))).sum(dim=-1), (vectors[:, 3] * (positions - positions.new_tensor((1, 1)))).sum(dim=-1), step_x, ) noise = lerp(row0, row1, step_y) return ( noise.view( batch_size, bh, bw, gh, gw, ) .permute(0, 3, 1, 4, 2) .reshape(batch_size, gh * bh, gw * bw) ) def perlin_noise( self, grid_shape: Tuple[int, int], out_shape: Tuple[int, int], batch_size: int = 1, generator: Generator = None, *args, **kwargs, ) -> Tensor: gh, gw = grid_shape # grid height and width oh, ow = out_shape # output height and width bh, bw = oh // gh, ow // gw # block height and width if oh != bh * gh: raise Exception(f"Output height {oh} must be divisible by grid height {gh}") if ow != bw * gw != 0: raise Exception(f"Output width {ow} must be divisible by grid width {gw}") angle = torch.empty( [batch_size] + [s + 1 for s in grid_shape], device=self.device, *args, **kwargs ).uniform_(to=2.0 * pi, generator=self.generator) # random vectors on grid points vectors = self.unfold_grid(torch.stack((torch.cos(angle), torch.sin(angle)), dim=1)) # positions inside grid cells [0, 1) positions = self.get_positions((bh, bw)).to(vectors) return self.perlin_noise_tensor(self, vectors, positions).squeeze(0) def __call__(self, *, detail=None, **kwargs): self.update(detail=detail) #currently unused if len(self.size) == 5: b, c, t, h, w = self.size noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 2.0 for tt in range(t): for i in range(2): perlin_slice = self.perlin_noise((h, w), (h, w), batch_size=c, generator=self.generator).to(self.device) perlin_expanded = perlin_slice.unsqueeze(0).unsqueeze(2) time_slice = noise[:, :, tt:tt+1, :, :] noise[:, :, tt:tt+1, :, :] += perlin_expanded else: b, c, h, w = self.size #orig_h, orig_w = h, w noise = torch.randn(self.size, dtype=self.dtype, layout=self.layout, device=self.device, generator=self.generator) / 2.0 for i in range(2): noise += self.perlin_noise((h, w), (h, w), batch_size=c, generator=self.generator).to(self.device) return noise / noise.std() from functools import partial NOISE_GENERATOR_CLASSES = { "fractal": FractalNoiseGenerator, "gaussian": GaussianNoiseGenerator, "gaussian_backwards": GaussianBackwardsNoiseGenerator, "uniform": UniformNoiseGenerator, "pyramid-cascade_B": CascadeBPyramidNoiseGenerator, "pyramid-interpolated": InterpolatedPyramidNoiseGenerator, "pyramid-bilinear": noise_generator_factory(PyramidNoiseGenerator, mode='bilinear'), "pyramid-bicubic": noise_generator_factory(PyramidNoiseGenerator, mode='bicubic'), "pyramid-nearest": noise_generator_factory(PyramidNoiseGenerator, mode='nearest'), "hires-pyramid-bilinear": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bilinear'), "hires-pyramid-bicubic": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bicubic'), "hires-pyramid-nearest": noise_generator_factory(HiresPyramidNoiseGenerator, mode='nearest'), "brownian": BrownianNoiseGenerator, "laplacian": LaplacianNoiseGenerator, "studentt": StudentTNoiseGenerator, "wavelet": WaveletNoiseGenerator, "perlin": PerlinNoiseGenerator, } NOISE_GENERATOR_CLASSES_SIMPLE = { "none": GaussianNoiseGenerator, "brownian": BrownianNoiseGenerator, "gaussian": GaussianNoiseGenerator, "gaussian_backwards": GaussianBackwardsNoiseGenerator, "laplacian": LaplacianNoiseGenerator, "perlin": PerlinNoiseGenerator, "studentt": StudentTNoiseGenerator, "uniform": UniformNoiseGenerator, "wavelet": WaveletNoiseGenerator, "brown": noise_generator_factory(FractalNoiseGenerator, alpha=2.0), "pink": noise_generator_factory(FractalNoiseGenerator, alpha=1.0), "white": noise_generator_factory(FractalNoiseGenerator, alpha=0.0), "blue": noise_generator_factory(FractalNoiseGenerator, alpha=-1.0), "violet": noise_generator_factory(FractalNoiseGenerator, alpha=-2.0), "hires-pyramid-bicubic": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bicubic'), "hires-pyramid-bilinear": noise_generator_factory(HiresPyramidNoiseGenerator, mode='bilinear'), "hires-pyramid-nearest": noise_generator_factory(HiresPyramidNoiseGenerator, mode='nearest'), "pyramid-bicubic": noise_generator_factory(PyramidNoiseGenerator, mode='bicubic'), "pyramid-bilinear": noise_generator_factory(PyramidNoiseGenerator, mode='bilinear'), "pyramid-nearest": noise_generator_factory(PyramidNoiseGenerator, mode='nearest'), "pyramid-interpolated": InterpolatedPyramidNoiseGenerator, "pyramid-cascade_B": CascadeBPyramidNoiseGenerator, } if OPENSIMPLEX_ENABLE: NOISE_GENERATOR_CLASSES.update({ "simplex": SimplexNoiseGenerator, }) NOISE_GENERATOR_NAMES = tuple(NOISE_GENERATOR_CLASSES.keys()) NOISE_GENERATOR_NAMES_SIMPLE = tuple(NOISE_GENERATOR_CLASSES_SIMPLE.keys()) @precision_tool.cast_tensor def prepare_noise(latent_image, seed, noise_type, noise_inds=None, alpha=1.0, k=1.0): # adapted from comfy/sample.py: https://github.com/comfyanonymous/ComfyUI #optional arg skip can be used to skip and discard x number of noise generations for a given seed noise_func = NOISE_GENERATOR_CLASSES.get(noise_type)(x=latent_image, seed=seed, sigma_min=0.0291675, sigma_max=14.614642) if noise_type == "fractal": noise_func.alpha = alpha noise_func.k = k # from here until return is very similar to comfy/sample.py if noise_inds is None: return noise_func(sigma=14.614642, sigma_next=0.0291675) unique_inds, inverse = np.unique(noise_inds, return_inverse=True) noises = [] for i in range(unique_inds[-1]+1): noise = noise_func(size = [1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, device=latent_image.device) if i in unique_inds: noises.append(noise) noises = [noises[i] for i in inverse] noises = torch.cat(noises, axis=0) return noises ================================================ FILE: legacy/noise_sigmas_timesteps_scaling.py ================================================ import torch #from..noise_classes import * import comfy.model_patcher from .helper import has_nested_attr def get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max=1.0): if sigma_up >= sigma_next and sigma_next > 0: print("Maximum VPSDE noise level exceeded: falling back to hard noise mode.") # Values below are the theoretical max, but break with exponential integrator stepsize calcs: #sigma_up = sigma_next #alpha_ratio = sigma_max - sigma_next #sigma_down = 0 * sigma_next #return alpha_ratio, sigma_up, sigma_down if eta >= 1: sigma_up = sigma_next * 0.9999 #avoid sqrt(neg_num) later else: sigma_up = sigma_next * eta sigma_signal = sigma_max - sigma_next sigma_residual = torch.sqrt(sigma_next**2 - sigma_up**2) alpha_ratio = sigma_signal + sigma_residual sigma_down = sigma_residual / alpha_ratio return alpha_ratio, sigma_up, sigma_down def get_alpha_ratio_from_sigma_down(sigma_down, sigma_next, eta, sigma_max=1.0): alpha_ratio = (1 - sigma_next) / (1 - sigma_down) sigma_up = (sigma_next ** 2 - sigma_down ** 2 * alpha_ratio ** 2) ** 0.5 if sigma_up >= sigma_next: # "clamp" noise level to max if max exceeded alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max) return alpha_ratio, sigma_up, sigma_down def get_ancestral_step_RF_var(sigma, sigma_next, eta, sigma_max=1.0): dtype = sigma.dtype #calculate variance adjusted sigma up... sigma_up = sqrt(dt) sigma, sigma_next = sigma.to(torch.float64), sigma_next.to(torch.float64) # float64 is very important to avoid numerical precision issues sigma_diff = (sigma - sigma_next).abs() + 1e-10 sigma_up = torch.sqrt(sigma_diff).to(torch.float64) * eta sigma_down_num = (sigma_next**2 - sigma_up**2).to(torch.float64) sigma_down = torch.sqrt(sigma_down_num) / ((1 - sigma_next).to(torch.float64) + torch.sqrt(sigma_down_num).to(torch.float64)) alpha_ratio = (1 - sigma_next).to(torch.float64) / (1 - sigma_down).to(torch.float64) return sigma_up.to(dtype), sigma_down.to(dtype), alpha_ratio.to(dtype) def get_ancestral_step_RF_lorentzian(sigma, sigma_next, eta, sigma_max=1.0): dtype = sigma.dtype alpha = 1 / ((sigma.to(torch.float64))**2 + 1) sigma_up = eta * (1 - alpha) ** 0.5 alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max) return sigma_up.to(dtype), sigma_down.to(dtype), alpha_ratio.to(dtype) def get_ancestral_step_EPS(sigma, sigma_next, eta=1.): # Calculates the noise level (sigma_down) to step down to and the amount of noise to add (sigma_up) when doing an ancestral sampling step. alpha_ratio = torch.full_like(sigma, 1.0) if not eta or not sigma_next: return torch.full_like(sigma, 0.0), sigma_next, alpha_ratio sigma_up = min(sigma_next, eta * (sigma_next ** 2 * (sigma ** 2 - sigma_next ** 2) / sigma ** 2) ** 0.5) sigma_down = (sigma_next ** 2 - sigma_up ** 2) ** 0.5 return sigma_up, sigma_down, alpha_ratio def get_ancestral_step_RF_sinusoidal(sigma_next, eta, sigma_max=1.0): sigma_up = eta * sigma_next * torch.sin(torch.pi * sigma_next) ** 2 alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max) return sigma_up, sigma_down, alpha_ratio def get_ancestral_step_RF_softer(sigma, sigma_next, eta, sigma_max=1.0): # math adapted from get_ancestral_step_EPS to work with RF sigma_down = sigma_next * torch.sqrt(1 - (eta**2 * (sigma**2 - sigma_next**2)) / sigma**2) alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_down(sigma_down, sigma_next, eta, sigma_max) return sigma_up, sigma_down, alpha_ratio def get_ancestral_step_RF_soft(sigma, sigma_next, eta, sigma_max=1.0): """Calculates the noise level (sigma_down) to step down to and the amount of noise to add (sigma_up) when doing a rectified flow sampling step, and a mixing ratio (alpha_ratio) for scaling the latent during noise addition. Scale is to shape the sigma_down curve.""" down_ratio = (1 - eta) + eta * ((sigma_next) / sigma) sigma_down = down_ratio * sigma_next alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_down(sigma_down, sigma_next, eta, sigma_max) return sigma_up, sigma_down, alpha_ratio def get_ancestral_step_RF_soft_linear(sigma, sigma_next, eta, sigma_max=1.0): sigma_down = sigma_next + eta * (sigma_next - sigma) if sigma_down < 0: return torch.full_like(sigma, 0.), sigma_next, torch.full_like(sigma, 1.) alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_down(sigma_down, sigma_next, eta, sigma_max) return sigma_up, sigma_down, alpha_ratio def get_ancestral_step_RF_exp(sigma, sigma_next, eta, sigma_max=1.0): # TODO: fix black image issue with linear RK h = -torch.log(sigma_next/sigma) sigma_up = sigma_next * (1 - (-2*eta*h).exp())**0.5 alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max) return sigma_up, sigma_down, alpha_ratio def get_ancestral_step_RF_sqrd(sigma, sigma_next, eta, sigma_max=1.0): sigma_hat = sigma * (1 + eta) sigma_up = (sigma_hat ** 2 - sigma ** 2) ** .5 alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max) return sigma_up, sigma_down, alpha_ratio def get_ancestral_step_RF_hard(sigma_next, eta, sigma_max=1.0): sigma_up = sigma_next * eta alpha_ratio, sigma_up, sigma_down = get_alpha_ratio_from_sigma_up(sigma_up, sigma_next, eta, sigma_max) return sigma_up, sigma_down, alpha_ratio def get_vpsde_step_RF(sigma, sigma_next, eta, sigma_max=1.0): dt = sigma - sigma_next sigma_up = eta * sigma * dt**0.5 alpha_ratio = 1 - dt * (eta**2/4) * (1 + sigma) sigma_down = sigma_next - (eta/4)*sigma*(1-sigma)*(sigma - sigma_next) return sigma_up, sigma_down, alpha_ratio def get_fuckery_step_RF(sigma, sigma_next, eta, sigma_max=1.0): sigma_down = (1-eta) * sigma_next sigma_up = torch.sqrt(sigma_next**2 - sigma_down**2) alpha_ratio = torch.ones_like(sigma_next) return sigma_up, sigma_down, alpha_ratio def get_res4lyf_step_with_model(model, sigma, sigma_next, eta=0.0, noise_mode="hard"): su, sd, alpha_ratio = torch.zeros_like(sigma), sigma_next.clone(), torch.ones_like(sigma) if has_nested_attr(model, "inner_model.inner_model.model_sampling"): model_sampling = model.inner_model.inner_model.model_sampling elif has_nested_attr(model, "model.model_sampling"): model_sampling = model.model.model_sampling if isinstance(model_sampling, comfy.model_sampling.CONST): sigma_var = (-1 + torch.sqrt(1 + 4 * sigma)) / 2 # sigma_var = (torch.sqrt(1 + 4 * sigma) - 1) / 2 sigma_var = ((4*sigma+1)**0.5 - 1) / 2 if noise_mode == "hard_var" and eta > 0.0 and sigma_next > sigma_var: su, sd, alpha_ratio = get_ancestral_step_RF_var(sigma, sigma_next, eta) else: if noise_mode == "soft": su, sd, alpha_ratio = get_ancestral_step_RF_soft(sigma, sigma_next, eta) elif noise_mode == "softer": su, sd, alpha_ratio = get_ancestral_step_RF_softer(sigma, sigma_next, eta) elif noise_mode == "hard_sq": su, sd, alpha_ratio = get_ancestral_step_RF_sqrd(sigma, sigma_next, eta) elif noise_mode == "sinusoidal": su, sd, alpha_ratio = get_ancestral_step_RF_sinusoidal(sigma_next, eta) elif noise_mode == "exp": su, sd, alpha_ratio = get_ancestral_step_RF_exp(sigma, sigma_next, eta) elif noise_mode == "soft-linear": su, sd, alpha_ratio = get_ancestral_step_RF_soft_linear(sigma, sigma_next, eta) elif noise_mode == "lorentzian": su, sd, alpha_ratio = get_ancestral_step_RF_lorentzian(sigma, sigma_next, eta) elif noise_mode == "vpsde": su, sd, alpha_ratio = get_vpsde_step_RF(sigma, sigma_next, eta) elif noise_mode == "fuckery": su, sd, alpha_ratio = get_fuckery_step_RF(sigma, sigma_next, eta) else: #elif noise_mode == "hard": #fall back to hard noise from hard_var su, sd, alpha_ratio = get_ancestral_step_RF_hard(sigma_next, eta) else: alpha_ratio = torch.full_like(sigma, 1.0) if noise_mode == "hard_sq": sd = sigma_next sigma_hat = sigma * (1 + eta) su = (sigma_hat ** 2 - sigma ** 2) ** .5 sigma = sigma_hat elif noise_mode == "hard": su = eta * sigma_next sd = (sigma_next ** 2 - su ** 2) ** 0.5 elif noise_mode == "exp": h = -torch.log(sigma_next/sigma) su = sigma_next * (1 - (-2*eta*h).exp())**0.5 sd = (sigma_next ** 2 - su ** 2) ** 0.5 else: #if noise_mode == "soft" or noise_mode == "softer": su = min(sigma_next, eta * (sigma_next ** 2 * (sigma ** 2 - sigma_next ** 2) / sigma ** 2) ** 0.5) #su, sd, alpha_ratio = get_ancestral_step_EPS(sigma, sigma_next, eta) su = torch.nan_to_num(su, 0.0) sd = torch.nan_to_num(sd, float(sigma_next)) alpha_ratio = torch.nan_to_num(alpha_ratio, 1.0) return su, sigma, sd, alpha_ratio NOISE_MODE_NAMES = ["none", "hard_sq", "hard", #"hard_down", "lorentzian", "soft", "soft-linear", "softer", "eps", "sinusoidal", "exp", "vpsde", #"fuckery", "hard_var", ] def get_res4lyf_half_step3(sigma, sigma_next, c2=0.5, c3=1.0, t_fn=None, sigma_fn=None, t_fn_formula="", sigma_fn_formula="", ): t_fn_x = eval(f"lambda sigma: {t_fn_formula}", {"torch": torch}) if t_fn_formula else t_fn sigma_fn_x = eval(f"lambda t: {sigma_fn_formula}", {"torch": torch}) if sigma_fn_formula else sigma_fn t_x, t_next_x = t_fn_x(sigma), t_fn_x(sigma_next) h_x = t_next_x - t_x s2 = t_x + h_x * c2 s3 = t_x + h_x * c3 sigma_2 = sigma_fn_x(s2) sigma_3 = sigma_fn_x(s3) h = t_fn(sigma_next) - t_fn(sigma) c2 = (t_fn(sigma_2) - t_fn(sigma)) / h c3 = (t_fn(sigma_3) - t_fn(sigma)) / h return c2, c3 ================================================ FILE: legacy/phi_functions.py ================================================ import torch import math from typing import Optional # Remainder solution def _phi(j, neg_h): remainder = torch.zeros_like(neg_h) for k in range(j): remainder += (neg_h)**k / math.factorial(k) phi_j_h = ((neg_h).exp() - remainder) / (neg_h)**j return phi_j_h def calculate_gamma(c2, c3): return (3*(c3**3) - 2*c3) / (c2*(2 - 3*c2)) # Exact analytic solution originally calculated by Clybius. https://github.com/Clybius/ComfyUI-Extra-Samplers/tree/main def _gamma(n: int,) -> int: """ https://en.wikipedia.org/wiki/Gamma_function for every positive integer n, Γ(n) = (n-1)! """ return math.factorial(n-1) def _incomplete_gamma(s: int, x: float, gamma_s: Optional[int] = None) -> float: """ https://en.wikipedia.org/wiki/Incomplete_gamma_function#Special_values if s is a positive integer, Γ(s, x) = (s-1)!*∑{k=0..s-1}(x^k/k!) """ if gamma_s is None: gamma_s = _gamma(s) sum_: float = 0 # {k=0..s-1} inclusive for k in range(s): numerator: float = x**k denom: int = math.factorial(k) quotient: float = numerator/denom sum_ += quotient incomplete_gamma_: float = sum_ * math.exp(-x) * gamma_s return incomplete_gamma_ def phi(j: int, neg_h: float, ): """ For j={1,2,3}: you could alternatively use Kat's phi_1, phi_2, phi_3 which perform fewer steps Lemma 1 https://arxiv.org/abs/2308.02157 ϕj(-h) = 1/h^j*∫{0..h}(e^(τ-h)*(τ^(j-1))/((j-1)!)dτ) https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84 = 1/h^j*[(e^(-h)*(-τ)^(-j)*τ(j))/((j-1)!)]{0..h} https://www.wolframalpha.com/input?i=integrate+e%5E%28%CF%84-h%29*%28%CF%84%5E%28j-1%29%2F%28j-1%29%21%29d%CF%84+between+0+and+h = 1/h^j*((e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h)))/(j-1)!) = (e^(-h)*(-h)^(-j)*h^j*(Γ(j)-Γ(j,-h))/((j-1)!*h^j) = (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/(j-1)! = (e^(-h)*(-h)^(-j)*(Γ(j)-Γ(j,-h))/Γ(j) = (e^(-h)*(-h)^(-j)*(1-Γ(j,-h)/Γ(j)) requires j>0 """ assert j > 0 gamma_: float = _gamma(j) incomp_gamma_: float = _incomplete_gamma(j, neg_h, gamma_s=gamma_) phi_: float = math.exp(neg_h) * neg_h**-j * (1-incomp_gamma_/gamma_) return phi_ class Phi: def __init__(self, h, c, analytic_solution=False): self.h = h self.c = c self.cache = {} if analytic_solution: self.phi_f = phi else: self.phi_f = _phi # remainder method def __call__(self, j, i=-1): if (j, i) in self.cache: return self.cache[(j, i)] if i < 0: c = 1 else: c = self.c[i - 1] if c == 0: self.cache[(j, i)] = 0 return 0 if j == 0: result = torch.exp(-self.h * c) else: result = self.phi_f(j, -self.h * c) self.cache[(j, i)] = result return result ================================================ FILE: legacy/rk_coefficients.py ================================================ import torch import copy import math from .deis_coefficients import get_deis_coeff_list from .phi_functions import * from .helper import get_extra_options_kv from itertools import permutations #, combinations import random RK_SAMPLER_NAMES = ["none", "res_2m", "res_3m", "res_2s", "res_3s", "res_3s_alt", "res_3s_cox_matthews", "res_3s_lie", "res_3s_strehmel_weiner", "res_4s_krogstad", "res_4s_strehmel_weiner", "res_4s_cox_matthews", "res_4s_munthe-kaas", "res_5s", "res_6s", "res_8s", "res_10s", "res_15s", "res_16s", "etdrk2_2s", "etdrk3_a_3s", "etdrk3_b_3s", #"etdrk4_4s" "deis_2m", "deis_3m", "deis_4m", "ralston_2s", "ralston_3s", "ralston_4s", "dpmpp_2m", "dpmpp_3m", "dpmpp_2s", "dpmpp_sde_2s", "dpmpp_3s", "lawson4_4s", "genlawson41_4s", "modgenlawson41_4s", "midpoint_2s", "heun_2s", "heun_3s", "houwen-wray_3s", "kutta_3s", "ssprk3_3s", "rk38_4s", "rk4_4s", "rk5_7s", "rk6_7s", "bogacki-shampine_4s", "bogacki-shampine_7s", "dormand-prince_6s", "dormand-prince_13s", "tsi_7s", #"verner_robust_16s", "ddim", "buehler", ] IRK_SAMPLER_NAMES = ["none", "explicit_diagonal", "explicit_full", "irk_exp_diag_2s", "gauss-legendre_2s", "gauss-legendre_3s", "gauss-legendre_4s", "gauss-legendre_5s", "radau_ia_2s", "radau_ia_3s", "radau_iia_2s", "radau_iia_3s", "lobatto_iiia_2s", "lobatto_iiia_3s", "lobatto_iiib_2s", "lobatto_iiib_3s", "lobatto_iiic_2s", "lobatto_iiic_3s", "lobatto_iiic_star_2s", "lobatto_iiic_star_3s", "lobatto_iiid_2s", "lobatto_iiid_3s", "kraaijevanger_spijker_2s", "qin_zhang_2s", "pareschi_russo_2s", "pareschi_russo_alt_2s", "crouzeix_2s", "crouzeix_3s", ] alpha_crouzeix = (2/(3**0.5)) * math.cos(math.pi / 18) rk_coeff = { "gauss-legendre_5s": ( [ [4563950663 / 32115191526, (310937500000000 / 2597974476091533 + 45156250000 * (739**0.5) / 8747388808389), (310937500000000 / 2597974476091533 - 45156250000 * (739**0.5) / 8747388808389), (5236016175 / 88357462711 + 709703235 * (739**0.5) / 353429850844), (5236016175 / 88357462711 - 709703235 * (739**0.5) / 353429850844)], [(4563950663 / 32115191526 - 38339103 * (739**0.5) / 6250000000), (310937500000000 / 2597974476091533 + 9557056475401 * (739**0.5) / 3498955523355600000), (310937500000000 / 2597974476091533 - 14074198220719489 * (739**0.5) / 3498955523355600000), (5236016175 / 88357462711 + 5601362553163918341 * (739**0.5) / 2208936567775000000000), (5236016175 / 88357462711 - 5040458465159165409 * (739**0.5) / 2208936567775000000000)], [(4563950663 / 32115191526 + 38339103 * (739**0.5) / 6250000000), (310937500000000 / 2597974476091533 + 14074198220719489 * (739**0.5) / 3498955523355600000), (310937500000000 / 2597974476091533 - 9557056475401 * (739**0.5) / 3498955523355600000), (5236016175 / 88357462711 + 5040458465159165409 * (739**0.5) / 2208936567775000000000), (5236016175 / 88357462711 - 5601362553163918341 * (739**0.5) / 2208936567775000000000)], [(4563950663 / 32115191526 - 38209 * (739**0.5) / 7938810), (310937500000000 / 2597974476091533 - 359369071093750 * (739**0.5) / 70145310854471391), (310937500000000 / 2597974476091533 - 323282178906250 * (739**0.5) / 70145310854471391), (5236016175 / 88357462711 - 470139 * (739**0.5) / 1413719403376), (5236016175 / 88357462711 - 44986764863 * (739**0.5) / 21205791050640)], [(4563950663 / 32115191526 + 38209 * (739**0.5) / 7938810), (310937500000000 / 2597974476091533 + 359369071093750 * (739**0.5) / 70145310854471391), (310937500000000 / 2597974476091533 + 323282178906250 * (739**0.5) / 70145310854471391), (5236016175 / 88357462711 + 44986764863 * (739**0.5) / 21205791050640), (5236016175 / 88357462711 + 470139 * (739**0.5) / 1413719403376)], ], [ [4563950663 / 16057595763, 621875000000000 / 2597974476091533, 621875000000000 / 2597974476091533, 10472032350 / 88357462711, 10472032350 / 88357462711] ], [ 1 / 2, 1 / 2 - 99 * (739**0.5) / 10000, 1 / 2 + 99 * (739**0.5) / 10000, 1 / 2 - (739**0.5) / 60, 1 / 2 + (739**0.5) / 60 ] ), "gauss-legendre_4s": ( [ [1/4, 1/4 - 15**0.5 / 6, 1/4 + 15**0.5 / 6, 1/4], [1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6, 1/4], [1/4, 1/4 + 15**0.5 / 6, 1/4, 1/4 - 15**0.5 / 6], [1/4 - 15**0.5 / 6, 1/4, 1/4 + 15**0.5 / 6, 1/4], ], [ [1/8, 3/8, 3/8, 1/8] ], [ 1/2 - 15**0.5 / 10, 1/2 + 15**0.5 / 10, 1/2 + 15**0.5 / 10, 1/2 - 15**0.5 / 10 ] ), "gauss-legendre_3s": ( [ [5/36, 2/9 - 15**0.5 / 15, 5/36 - 15**0.5 / 30], [5/36 + 15**0.5 / 24, 2/9, 5/36 - 15**0.5 / 24], [5/36 + 15**0.5 / 30, 2/9 + 15**0.5 / 15, 5/36], ], [ [5/18, 4/9, 5/18] ], [1/2 - 15**0.5 / 10, 1/2, 1/2 + 15**0.5 / 10] ), "gauss-legendre_2s": ( [ [1/4, 1/4 - 3**0.5 / 6], [1/4 + 3**0.5 / 6, 1/4], ], [ [1/2, 1/2], ], [1/2 - 3**0.5 / 6, 1/2 + 3**0.5 / 6] ), "radau_iia_4s": ( [ [], [], [], [], ], [ [1/4, 1/4, 1/4, 1/4], ], [(1/11)*(4-6**0.5), (1/11)*(4+6**0.5), 1/2, 1] ), "radau_iia_3s": ( [ [11/45 - 7*6**0.5 / 360, 37/225 - 169*6**0.5 / 1800, -2/225 + 6**0.5 / 75], [37/225 + 169*6**0.5 / 1800, 11/45 + 7*6**0.5 / 360, -2/225 - 6**0.5 / 75], [4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9], ], [ [4/9 - 6**0.5 / 36, 4/9 + 6**0.5 / 36, 1/9], ], [2/5 - 6**0.5 / 10, 2/5 + 6**0.5 / 10, 1.] ), "radau_iia_2s": ( [ [5/12, -1/12], [3/4, 1/4], ], [ [3/4, 1/4], ], [1/3, 1] ), "radau_ia_3s": ( [ [1/9, (-1-6**0.5)/18, (-1+6**0.5)/18], [1/9, 11/45 + 7*6**0.5/360, 11/45-43*6**0.5/360], [1/9, 11/45-43*6**0.5/360, 11/45 + 7*6**0.5/360], ], [ [1/9, 4/9 + 6**0.5/36, 4/9 - 6**0.5/36], ], [0, 3/5-6**0.5/10, 3/5+6**0.5/10] ), "radau_ia_2s": ( [ [1/4, -1/4], [1/4, 5/12], ], [ [1/4, 3/4], ], [0, 2/3] ), "lobatto_iiia_3s": ( [ [0, 0, 0], [5/24, 1/3, -1/24], [1/6, 2/3, 1/6], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1] ), "lobatto_iiia_2s": ( [ [0, 0], [1/2, 1/2], ], [ [1/2, 1/2], ], [0, 1] ), "lobatto_iiib_3s": ( [ [1/6, -1/6, 0], [1/6, 1/3, 0], [1/6, 5/6, 0], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1] ), "lobatto_iiib_2s": ( [ [1/2, 0], [1/2, 0], ], [ [1/2, 1/2], ], [0, 1] ), "lobatto_iiic_3s": ( [ [1/6, -1/3, 1/6], [1/6, 5/12, -1/12], [1/6, 2/3, 1/6], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1] ), "lobatto_iiic_2s": ( [ [1/2, -1/2], [1/2, 1/2], ], [ [1/2, 1/2], ], [0, 1] ), "lobatto_iiic_star_3s": ( [ [0, 0, 0], [1/4, 1/4, 0], [0, 1, 0], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1] ), "lobatto_iiic_star_2s": ( [ [0, 0], [1, 0], ], [ [1/2, 1/2], ], [0, 1] ), "lobatto_iiid_3s": ( [ [1/6, 0, -1/6], [1/12, 5/12, 0], [1/2, 1/3, 1/6], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1] ), "lobatto_iiid_2s": ( [ [1/2, 1/2], [-1/2, 1/2], ], [ [1/2, 1/2], ], [0, 1] ), "kraaijevanger_spijker_2s": ( [ [1/2, 0], [-1/2, 2], ], [ [-1/2, 3/2], ], [1/2, 3/2] ), "qin_zhang_2s": ( [ [1/4, 0], [1/2, 1/4], ], [ [1/2, 1/2], ], [1/4, 3/4] ), "pareschi_russo_2s": ( [ [(1-2**0.5/2), 0], [1-2*(1-2**0.5/2), (1-2**0.5/2)], ], [ [1/2, 1/2], ], [(1-2**0.5/2), 1-(1-2**0.5/2)] ), "pareschi_russo_alt_2s": ( [ [(1-2**0.5/2), 0], [1-(1-2**0.5/2), (1-2**0.5/2)], ], [ [1-(1-2**0.5/2), (1-2**0.5/2)], ], [(1-2**0.5/2), 1] ), "crouzeix_3s": ( [ [(1+alpha_crouzeix)/2, 0, 0], [-alpha_crouzeix/2, (1+alpha_crouzeix)/2, 0], [1+alpha_crouzeix, -(1+2*alpha_crouzeix), (1+alpha_crouzeix)/2], ], [ [1/(6*alpha_crouzeix**2), 1-(1/(3*alpha_crouzeix**2)), 1/(6*alpha_crouzeix**2)], ], [(1+alpha_crouzeix)/2, 1/2, (1-alpha_crouzeix)/2], ), "crouzeix_2s": ( [ [1/2 + 3**0.5 / 6, 0], [-(3**0.5 / 3), 1/2 + 3**0.5 / 6] ], [ [1/2, 1/2], ], [1/2 + 3**0.5 / 6, 1/2 - 3**0.5 / 6], ), "verner_13s": ( #verner9. some values are missing, need to revise [ [], ], [ [], ], [ 0.03462, 0.09702435063878045, 0.14553652595817068, 0.561, 0.22900791159048503, 0.544992088409515, 0.645, 0.48375, 0.06757, 0.25, 0.6590650618730999, 0.8206, 0.9012, ] ), "verner_robust_16s": ( [ [], [0.04], [-0.01988527319182291, 0.11637263332969652], [0.0361827600517026, 0, 0.10854828015510781], [2.272114264290177, 0, -8.526886447976398, 6.830772183686221], [0.050943855353893744, 0, 0, 0.1755865049809071, 0.007022961270757467], [0.1424783668683285, 0, 0, -0.3541799434668684, 0.07595315450295101, 0.6765157656337123], [0.07111111111111111, 0, 0, 0, 0, 0.3279909287605898, 0.24089796012829906], [0.07125, 0, 0, 0, 0, 0.32688424515752457, 0.11561575484247544, -0.03375], [0.0482267732246581, 0, 0, 0, 0, 0.039485599804954, 0.10588511619346581, -0.021520063204743093, -0.10453742601833482], [-0.026091134357549235, 0, 0, 0, 0, 0.03333333333333333, -0.1652504006638105, 0.03434664118368617, 0.1595758283215209, 0.21408573218281934], [-0.03628423396255658, 0, 0, 0, 0, -1.0961675974272087, 0.1826035504321331, 0.07082254444170683, -0.02313647018482431, 0.2711204726320933, 1.3081337494229808], [-0.5074635056416975, 0, 0, 0, 0, -6.631342198657237, -0.2527480100908801, -0.49526123800360955, 0.2932525545253887, 1.440108693768281, 6.237934498647056, 0.7270192054526988], [0.6130118256955932, 0, 0, 0, 0, 9.088803891640463, -0.40737881562934486, 1.7907333894903747, 0.714927166761755, -1.4385808578417227, -8.26332931206474, -1.537570570808865, 0.34538328275648716], [-1.2116979103438739, 0, 0, 0, 0, -19.055818715595954, 1.263060675389875, -6.913916969178458, -0.6764622665094981, 3.367860445026608, 18.00675164312591, 6.83882892679428, -1.0315164519219504, 0.4129106232130623], [2.1573890074940536, 0, 0, 0, 0, 23.807122198095804, 0.8862779249216555, 13.139130397598764, -2.604415709287715, -5.193859949783872, -20.412340711541507, -12.300856252505723, 1.5215530950085394], ], [ 0.014588852784055396, 0, 0, 0, 0, 0, 0, 0.0020241978878893325, 0.21780470845697167, 0.12748953408543898, 0.2244617745463132, 0.1787254491259903, 0.07594344758096556, 0.12948458791975614, 0.029477447612619417, 0 ], [ 0, 0.04, 0.09648736013787361, 0.1447310402068104, 0.576, 0.2272326564618766, 0.5407673435381234, 0.64, 0.48, 0.06754, 0.25, 0.6770920153543243, 0.8115, 0.906, 1, 1 ], ), "dormand-prince_13s": ( [ [], [1/18], [1/48, 1/16], [1/32, 0, 3/32], [5/16, 0, -75/64, 75/64], [3/80, 0, 0, 3/16, 3/20], [29443841/614563906, 0, 0, 77736538/692538347, -28693883/1125000000, 23124283/1800000000], [16016141/946692911, 0, 0, 61564180/158732637, 22789713/633445777, 545815736/2771057229, -180193667/1043307555], [39632708/573591083, 0, 0, -433636366/683701615, -421739975/2616292301, 100302831/723423059, 790204164/839813087, 800635310/3783071287], [246121993/1340847787, 0, 0, -37695042795/15268766246, -309121744/1061227803, -12992083/490766935, 6005943493/2108947869, 393006217/1396673457, 123872331/1001029789], [-1028468189/846180014, 0, 0, 8478235783/508512852, 1311729495/1432422823, -10304129995/1701304382, -48777925059/3047939560, 15336726248/1032824649, -45442868181/3398467696, 3065993473/597172653], [185892177/718116043, 0, 0, -3185094517/667107341, -477755414/1098053517, -703635378/230739211, 5731566787/1027545527, 5232866602/850066563, -4093664535/808688257, 3962137247/1805957418, 65686358/487910083], [403863854/491063109, 0, 0, -5068492393/434740067, -411421997/543043805, 652783627/914296604, 11173962825/925320556, -13158990841/6184727034, 3936647629/1978049680, -160528059/685178525, 248638103/1413531060], ], [ [14005451/335480064, 0, 0, 0, 0, -59238493/1068277825, 181606767/758867731, 561292985/797845732, -1041891430/1371343529, 760417239/1151165299, 118820643/751138087, -528747749/2220607170, 1/4], ], [0, 1/18, 1/12, 1/8, 5/16, 3/8, 59/400, 93/200, 5490023248 / 9719169821, 13/20, 1201146811 / 1299019798, 1, 1], ), "dormand-prince_6s": ( [ [], [1/5], [3/40, 9/40], [44/45, -56/15, 32/9], [19372/6561, -25360/2187, 64448/6561, -212/729], [9017/3168, -355/33, 46732/5247, 49/176, -5103/18656], ], [ [35/384, 0, 500/1113, 125/192, -2187/6784, 11/84], ], [0, 1/5, 3/10, 4/5, 8/9, 1], ), "bogacki-shampine_7s": ( #5th order [ [], [1/6], [2/27, 4/27], [183/1372, -162/343, 1053/1372], [68/297, -4/11, 42/143, 1960/3861], [597/22528, 81/352, 63099/585728, 58653/366080, 4617/20480], [174197/959244, -30942/79937, 8152137/19744439, 666106/1039181, -29421/29068, 482048/414219], ], [ [587/8064, 0, 4440339/15491840, 24353/124800, 387/44800, 2152/5985, 7267/94080], ], [0, 1/6, 2/9, 3/7, 2/3, 3/4, 1] ), "bogacki-shampine_4s": ( #5th order [ [], [1/2], [0, 3/4], [2/9, 1/3, 4/9], ], [ [2/9, 1/3, 4/9, 0], ], [0, 1/2, 3/4, 1] ), "tsi_7s": ( #5th order [ [], [0.161], [-0.008480655492356989, 0.335480655492357], [2.8971530571054935, -6.359448489975075, 4.3622954328695815], [5.325864828439257, -11.748883564062828, 7.4955393428898365, -0.09249506636175525], [5.86145544294642, -12.92096931784711, 8.159367898576159, -0.071584973281401, -0.02826905039406838], [0.09646076681806523, 0.01, 0.4798896504144996, 1.379008574103742, -3.290069515436081, 2.324710524099774], ], [ [0.09646076681806523, 0.01, 0.4798896504144996, 1.379008574103742, -3.290069515436081, 2.324710524099774, 0.0], ], [0.0, 0.161, 0.327, 0.9, 0.9800255409045097, 1.0, 1.0], ), "rk6_7s": ( #5th order [ [], [1/3], [0, 2/3], [1/12, 1/3, -1/12], [-1/16, 9/8, -3/16, -3/8], [0, 9/8, -3/8, -3/4, 1/2], [9/44, -9/11, 63/44, 18/11, 0, -16/11], ], [ [11/120, 0, 27/40, 27/40, -4/15, -4/15, 11/120], ], [0, 1/3, 2/3, 1/3, 1/2, 1/2, 1], ), "rk5_7s": ( #5th order [ [], [1/5], [3/40, 9/40], [44/45, -56/15, 32/9], [19372/6561, -25360/2187, 64448/6561, 212/729], #flipped 212 sign [-9017/3168, -355/33, 46732/5247, 49/176, -5103/18656], [35/384, 0, 500/1113, 125/192, -2187/6784, 11/84], ], [ [5179/57600, 0, 7571/16695, 393/640, -92097/339200, 187/2100, 1/40], ], [0, 1/5, 3/10, 4/5, 8/9, 1, 1], ), "ssprk_4s": ( #https://link.springer.com/article/10.1007/s41980-022-00731-x [ [], [1/2], [1/2, 1/2], [1/6, 1/6, 1/6], ], [ [1/6, 1/6, 1/6, 1/2], ], [0, 1/2, 1, 1/2], ), "rk4_4s": ( [ [], [1/2], [0, 1/2], [0, 0, 1], ], [ [1/6, 1/3, 1/3, 1/6], ], [0, 1/2, 1/2, 1], ), "rk38_4s": ( [ [], [1/3], [-1/3, 1], [1, -1, 1], ], [ [1/8, 3/8, 3/8, 1/8], ], [0, 1/3, 2/3, 1], ), "ralston_4s": ( [ [], [2/5], [(-2889+1428 * 5**0.5)/1024, (3785-1620 * 5**0.5)/1024], [(-3365+2094 * 5**0.5)/6040, (-975-3046 * 5**0.5)/2552, (467040+203968*5**0.5)/240845], ], [ [(263+24*5**0.5)/1812, (125-1000*5**0.5)/3828, (3426304+1661952*5**0.5)/5924787, (30-4*5**0.5)/123], ], [0, 2/5, (14-3 * 5**0.5)/16, 1], ), "heun_3s": ( [ [], [1/3], [0, 2/3], ], [ [1/4, 0, 3/4], ], [0, 1/3, 2/3], ), "kutta_3s": ( [ [], [1/2], [-1, 2], ], [ [1/6, 2/3, 1/6], ], [0, 1/2, 1], ), "ralston_3s": ( [ [], [1/2], [0, 3/4], ], [ [2/9, 1/3, 4/9], ], [0, 1/2, 3/4], ), "houwen-wray_3s": ( [ [], [8/15], [1/4, 5/12], ], [ [1/4, 0, 3/4], ], [0, 8/15, 2/3], ), "ssprk3_3s": ( [ [], [1], [1/4, 1/4], ], [ [1/6, 1/6, 2/3], ], [0, 1, 1/2], ), "midpoint_2s": ( [ [], [1/2], ], [ [0, 1], ], [0, 1/2], ), "heun_2s": ( [ [], [1], ], [ [1/2, 1/2], ], [0, 1], ), "ralston_2s": ( [ [], [2/3], ], [ [1/4, 3/4], ], [0, 2/3], ), "buehler": ( [ [], ], [ [1], ], [0], ), } def get_rk_methods(rk_type, h, c1=0.0, c2=0.5, c3=1.0, h_prev=None, h_prev2=None, step=0, sigmas=None, sigma=None, sigma_next=None, sigma_down=None, extra_options=None): FSAL = False multistep_stages = 0 if rk_type.startswith(("res", "dpmpp", "ddim" )): h_no_eta = -torch.log(sigma_next/sigma) h_prev_no_eta = -torch.log(sigmas[step] /sigmas[step-1]) if step >= 1 else None h_prev2_no_eta = -torch.log(sigmas[step-1]/sigmas[step-2]) if step >= 2 else None else: h_no_eta = sigma_next - sigma h_prev_no_eta = sigmas[step] - sigmas[step-1] if step >= 1 else None h_prev2_no_eta = sigmas[step-1] - sigmas[step-2] if step >= 2 else None if type(c1) == torch.Tensor: c1 = c1.item() if type(c2) == torch.Tensor: c2 = c2.item() if type(c3) == torch.Tensor: c3 = c3.item() if c1 == -1: c1 = random.uniform(0, 1) if c2 == -1: c2 = random.uniform(0, 1) if c3 == -1: c3 = random.uniform(0, 1) if rk_type[:4] == "deis": order = int(rk_type[-2]) if step < order: if order == 4: rk_type = "res_3s" order = 3 elif order == 3: rk_type = "res_3s" elif order == 2: rk_type = "res_2s" else: rk_type = "deis" multistep_stages = order-1 if rk_type[-2:] == "2m": #multistep method rk_type = rk_type[:-2] + "2s" if h_prev is not None: multistep_stages = 1 c2 = (-h_prev / h).item() #print("c2: ", c2, h_prev, h) if rk_type[-2:] == "3m": #multistep method rk_type = rk_type[:-2] + "3s" if h_prev2 is not None: multistep_stages = 2 #print("3m") #c2 = (-h_prev2 / (h_prev + h)).item() c2 = (-h_prev2 / h).item() #c3 = (-h_prev / h).item() c3 = (-(h_prev2 + h_prev) / h).item() #print(c2, h_prev2, h_prev) #print(c3, h_prev, h) if rk_type in rk_coeff: a, b, ci = copy.deepcopy(rk_coeff[rk_type]) a, b, ci = rk_coeff[rk_type] a = [row + [0] * (len(ci) - len(row)) for row in a] match rk_type: case "deis": coeff_list = get_deis_coeff_list(sigmas, multistep_stages+1, deis_mode="rhoab") coeff_list = [[elem / h for elem in inner_list] for inner_list in coeff_list] if multistep_stages == 1: b1, b2 = coeff_list[step] a = [ [0, 0], [0, 0], ] b = [ [b1, b2], ] ci = [0, 0] if multistep_stages == 2: b1, b2, b3 = coeff_list[step] a = [ [0, 0, 0], [0, 0, 0], [0, 0, 0], ] b = [ [b1, b2, b3], ] ci = [0, 0, 0] if multistep_stages == 3: b1, b2, b3, b4 = coeff_list[step] a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], ] b = [ [b1, b2, b3, b4], ] ci = [0, 0, 0, 0] if multistep_stages > 0: for i in range(len(b[0])): b[0][i] *= ((sigma_down - sigma) / (sigma_next - sigma)) case "dormand-prince_6s": FSAL = True case "ddim": b1 = phi(1, -h) a = [ [0], ] b = [ [b1], ] ci = [0] case "res_2s": c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) ci = [0, c2] φ = Phi(h, ci) a2_1 = c2 * φ(1,2) b2 = φ(2)/c2 b1 = φ(1) - b2 a = [ [0,0], [a2_1, 0], ] b = [ [b1, b2], ] case "res_3s": c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) c3 = float(get_extra_options_kv("c3", str(c3), extra_options)) gamma = calculate_gamma(c2, c3) a2_1 = c2 * phi(1, -h*c2) a3_2 = gamma * c2 * phi(2, -h*c2) + (c3 ** 2 / c2) * phi(2, -h*c3) #phi_2_c3_h # a32 from k2 to k3 a3_1 = c3 * phi(1, -h*c3) - a3_2 # a31 from k1 to k3 b3 = (1 / (gamma * c2 + c3)) * phi(2, -h) b2 = gamma * b3 #simplified version of: b2 = (gamma / (gamma * c2 + c3)) * phi_2_h b1 = phi(1, -h) - b2 - b3 a = [ [0, 0, 0], [a2_1, 0, 0], [a3_1, a3_2, 0], ] b = [ [b1, b2, b3], ] ci = [c1, c2, c3] case "res_3s_alt": c2 = 1/3 c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) c1,c2,c3 = 0, c2, 2/3 ci = [c1,c2,c3] φ = Phi(h, ci) a = [ [0, 0, 0], [0, 0, 0], [0, (4/(9*c2)) * φ(2,3), 0], ] b = [ [0, 0, (1/c3)*φ(2)], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_3s_strehmel_weiner": # weak 4th order, Krogstad c2 = 1/2 c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) ci = [0,c2,1] φ = Phi(h, ci) a = [ [0, 0, 0], [0, 0, 0], [0, (1/c2) * φ(2,3), 0], ] b = [ [0, 0, φ(2)], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_3s_cox_matthews": # Cox & Matthews; known as ETD3RK c1,c2,c3 = 0,1/2,1 ci = [0,c2,1] φ = Phi(h, ci) a = [ [0, 0, 0], [0, 0, 0], [0, (1/c2) * φ(1,3), 0], # paper said 2 * φ(1,3), but this is the same and more consistent with res_3s_strehmel_weiner ] b = [ [0, -8*φ(3) + 4*φ(2), 4*φ(3) - φ(2)], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_3s_lie": # Lie; known as ETD2CF3 c1,c2,c3 = 0, 1/3, 2/3 ci = [c1,c2,c3] φ = Phi(h, ci) a = [ [0, 0, 0], [0, 0, 0], [0, (4/3)*φ(2,3), 0], # paper said 2 * φ(1,3), but this is the same and more consistent with res_3s_strehmel_weiner ] b = [ [0, 6*φ(2) - 18*φ(3), (-3/2)*φ(2) + 9*φ(3)], ] a, b = gen_first_col_exp(a,b,ci,φ) case "res_4s_cox_matthews": # weak 4th order, Cox & Matthews; unresolved issue, see below c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci) a2_1 = c2 * φ(1,2) a3_2 = c3 * φ(1,3) a4_1 = (1/2) * φ(1,3) * (φ(0,3) - 1) # φ(0,3) == torch.exp(-h*c3) a4_3 = φ(1,3) b1 = φ(1) - 3*φ(2) + 4*φ(3) b2 = 2*φ(2) - 4*φ(3) b3 = 2*φ(2) - 4*φ(3) b4 = 4*φ(3) - φ(2) a = [ [0, 0,0,0], [a2_1, 0,0,0], [0, a3_2,0,0], [a4_1, 0,a4_3,0], ] b = [ [b1, b2, b3, b4], ] case "res_4s_munthe-kaas": # unstable RKMK4t c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci) a = [ [0, 0, 0, 0], [c2*φ(1,2), 0, 0, 0], [(h/8)*φ(1,2), (1/2)*(1-h/4)*φ(1,2), 0, 0], [0, 0, φ(1), 0], ] b = [ [(1/6)*φ(1)*(1+h/2), (1/3)*φ(1), (1/3)*φ(1), (1/6)*φ(1)*(1-h/2)], ] case "res_4s_krogstad": # weak 4th order, Krogstad c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, φ(2,3), 0, 0], [0, 0, 2*φ(2,4), 0], ] b = [ [0, 2*φ(2) - 4*φ(3), 2*φ(2) - 4*φ(3), -φ(2) + 4*φ(3)], ] #a = [row + [0] * (len(ci) - len(row)) for row in a] a, b = gen_first_col_exp(a,b,ci,φ) case "res_4s_strehmel_weiner": # weak 4th order, Strehmel & Weiner c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, c3*φ(2,3), 0, 0], [0, -2*φ(2,4), 4*φ(2,4), 0], ] b = [ [0, 0, 4*φ(2) - 8*φ(3), -φ(2) + 4*φ(3)], ] a, b = gen_first_col_exp(a,b,ci,φ) case "lawson4_4s": c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci) a2_1 = c2 * φ(0,2) a3_2 = 1/2 a4_3 = φ(0,2) b1 = (1/6) * φ(0) b2 = (1/3) * φ(0,2) b3 = (1/3) * φ(0,2) b4 = 1/6 a = [ [0, 0, 0, 0], [a2_1, 0, 0, 0], [0, a3_2, 0, 0], [0, 0, a4_3, 0], ] b = [ [b1,b2,b3,b4], ] case "genlawson41_4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci) a3_2 = 1/2 a4_3 = φ(0,2) b2 = (1/3) * φ(0,2) b3 = (1/3) * φ(0,2) b4 = 1/6 a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, a3_2, 0, 0], [0, 0, a4_3, 0], ] b = [ [0, b2, b3, b4,], ] a, b = gen_first_col_exp(a,b,ci,φ) case "modgenlawson41_4s": # GenLawson4 https://ora.ox.ac.uk/objects/uuid:cc001282-4285-4ca2-ad06-31787b540c61/files/m611df1a355ca243beb09824b70e5e774 c1,c2,c3,c4 = 0, 1/2, 1/2, 1 ci = [c1,c2,c3,c4] φ = Phi(h, ci) a3_2 = 1/2 a4_3 = φ(0,2) b2 = (1/3) * φ(0,2) b3 = (1/3) * φ(0,2) b4 = φ(2) - (1/3)*φ(0,2) a = [ [0, 0, 0, 0], [0, 0, 0, 0], [0, a3_2, 0, 0], [0, 0, a4_3, 0], ] b = [ [0, b2, b3, b4,], ] a, b = gen_first_col_exp(a,b,ci,φ) case "etdrk2_2s": # https://arxiv.org/pdf/2402.15142v1 c1,c2 = 0, 1 ci = [c1,c2] φ = Phi(h, ci) a = [ [0, 0], [φ(1), 0], ] b = [ [φ(1)-φ(2), φ(2)], ] case "etdrk3_a_3s": # https://arxiv.org/pdf/2402.15142v1 c1,c2,c3 = 0, 1, 2/3 ci = [c1,c2,c3] φ = Phi(h, ci) a2_1 = c2*φ(1) a3_2 = (4/9)*φ(2,3) a3_1 = c3*φ(1,3) - a3_2 b2 = φ(2) - (1/2)*φ(1) b3 = (3/4) * φ(1) b1 = φ(1) - b2 - b3 a = [ [0, 0, 0], [a2_1, 0, 0], [a3_1, a3_2, 0 ] ] b = [ [b1, b2, b3], ] case "etdrk3_b_3s": # https://arxiv.org/pdf/2402.15142v1 c1,c2,c3 = 0, 4/9, 2/3 ci = [c1,c2,c3] φ = Phi(h, ci) a2_1 = c2*φ(1,2) a3_2 = φ(2,3) a3_1 = c3*φ(1,3) - a3_2 b2 = 0 b3 = (3/2) * φ(2) b1 = φ(1) - b2 - b3 a = [ [0, 0, 0], [a2_1, 0, 0], [a3_1, a3_2, 0 ] ] b = [ [b1, b2, b3], ] case "dpmpp_2s": c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) a2_1 = c2 * phi(1, -h*c2) b1 = (1 - 1/(2*c2)) * phi(1, -h) b2 = (1/(2*c2)) * phi(1, -h) a = [ [0, 0], [a2_1, 0], ] b = [ [b1, b2], ] ci = [0, c2] case "dpmpp_sde_2s": c2 = 1.0 #hardcoded to 1.0 to more closely emulate the configuration for k-diffusion's implementation a2_1 = c2 * phi(1, -h*c2) b1 = (1 - 1/(2*c2)) * phi(1, -h) b2 = (1/(2*c2)) * phi(1, -h) a = [ [0, 0], [a2_1, 0], ] b = [ [b1, b2], ] ci = [0, c2] case "dpmpp_3s": c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) c3 = float(get_extra_options_kv("c3", str(c3), extra_options)) a2_1 = c2 * phi(1, -h*c2) a3_2 = (c3**2 / c2) * phi(2, -h*c3) a3_1 = c3 * phi(1, -h*c3) - a3_2 b2 = 0 b3 = (1/c3) * phi(2, -h) b1 = phi(1, -h) - b2 - b3 a = [ [0, 0, 0], [a2_1, 0, 0], [a3_1, a3_2, 0], ] b = [ [b1, b2, b3], ] ci = [0, c2, c3] case "res_5s": #4th order c1, c2, c3, c4, c5 = 0, 1/2, 1/2, 1, 1/2 a2_1 = c2 * phi(1, -h * c2) a3_2 = phi(2, -h * c3) a3_1 = c3 * phi(1, -h * c3) - a3_2 #a3_1 = c3 * phi(1, -h * c3) - phi(2, -h * c3) a4_2 = a4_3 = phi(2, -h * c4) a4_1 = c4 * phi(1, -h * c4) - a4_2 - a4_3 #a4_1 = phi(1, -h * c4) - 2 * phi(2, -h * c4) a5_2 = a5_3 = 0.5 * phi(2, -h * c5) - phi(3, -h * c4) + 0.25 * phi(2, -h * c4) - 0.5 * phi(3, -h * c5) a5_4 = 0.25 * phi(2, -h * c5) - a5_2 a5_1 = c5 * phi(1, -h * c5) - a5_2 - a5_3 - a5_4 b2 = b3 = 0 b4 = -phi(2, -h) + 4*phi(3, -h) b5 = 4 * phi(2, -h) - 8 * phi(3, -h) #b1 = phi(1, -h) - 3 * phi(2, -h) + 4 * phi(3, -h) b1 = phi(1,-h) - b2 - b3 - b4 - b5 a = [ [0, 0, 0, 0, 0], [a2_1, 0, 0, 0, 0], [a3_1, a3_2, 0, 0, 0], [a4_1, a4_2, a4_3, 0, 0], [a5_1, a5_2, a5_3, a5_4, 0], ] b = [ [b1, b2, b3, b4, b5], ] ci = [0., 0.5, 0.5, 1., 0.5] case "res_6s": #4th order c1, c2, c3, c4, c5, c6 = 0, 1/2, 1/2, 1/3, 1/3, 5/6 ci = [c1, c2, c3, c4, c5, c6] φ = Phi(h, ci) a2_1 = c2 * φ(1,2) a3_1 = 0 a3_2 = (c3**2 / c2) * φ(2,3) a4_1 = 0 a4_2 = (c4**2 / c2) * φ(2,4) a4_3 = (c4**2 * φ(2,4) - a4_2 * c2) / c3 a5_1 = 0 a5_2 = 0 #zero a5_3 = (-c4 * c5**2 * φ(2,5) + 2*c5**3 * φ(3,5)) / (c3 * (c3 - c4)) a5_4 = (-c3 * c5**2 * φ(2,5) + 2*c5**3 * φ(3,5)) / (c4 * (c4 - c3)) a6_1 = 0 a6_2 = 0 #zero a6_3 = (-c4 * c6**2 * φ(2,6) + 2*c6**3 * φ(3,6)) / (c3 * (c3 - c4)) a6_4 = (-c3 * c6**2 * φ(2,6) + 2*c6**3 * φ(3,6)) / (c4 * (c4 - c3)) a6_5 = (c6**2 * φ(2,6) - a6_3*c3 - a6_4*c4) / c5 #a6_5_alt = (2*c6**3 * φ(3,6) - a6_3*c3**2 - a6_4*c4**2) / c5**2 b1 = 0 b2 = 0 b3 = 0 b4 = 0 b5 = (-c6*φ(2) + 2*φ(3)) / (c5 * (c5 - c6)) b6 = (-c5*φ(2) + 2*φ(3)) / (c6 * (c6 - c5)) a = [ [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, a3_2, 0, 0, 0, 0], [0, a4_2, a4_3, 0, 0, 0], [0, a5_2, a5_3, a5_4, 0, 0], [0, a6_2, a6_3, a6_4, a6_5, 0], ] b = [ [0, b2, b3, b4, b5, b6], ] for i in range(len(ci)): a[i][0] = ci[i] * φ(1,i+1) - sum(a[i]) for i in range(len(b)): b[i][0] = φ(1) - sum(b[i]) case "res_8s": #todo: add EKPRK5S8 c1, c2, c3, c4, c5, c6, c7, c8 = 0, 1/2, 1/2, 1/4, 1/2, 1/5, 2/3, 1 ci = [c1, c2, c3, c4, c5, c6, c7, c8] φ = Phi(h, ci, analytic_solution=True) a3_2 = (1/2) * φ(2,3) a4_3 = (1/8) * φ(2,4) a5_3 = (-1/2) * φ(2,5) + 2 * φ(3,5) a5_4 = 2 * φ(2,5) - 4 * φ(3,5) a6_4 = (8/25) * φ(2,6) - (32/125) * φ(3,6) a6_5 = (2/25) * φ(2,6) - (1/2) * a6_4 a7_4 = (-125/162) * a6_4 a7_5 = (125/1944) * a6_4 - (16/27) * φ(2,7) + (320/81) * φ(3,7) a7_6 = (3125/3888) * a6_4 + (100/27) * φ(2,7) - (800/81) * φ(3,7) Φ = (5/32)*a6_4 - (1/28)*φ(2,6) + (36/175)*φ(2,7) - (48/25)*φ(3,7) + (6/175)*φ(4,6) + (192/35)*φ(4,7) + 6*φ(4,8) a8_5 = (208/3)*φ(3,8) - (16/3) *φ(2,8) - 40*Φ a8_6 = (-250/3)*φ(3,8) + (250/21)*φ(2,8) + (250/7)*Φ a8_7 = -27*φ(3,8) + (27/14)*φ(2,8) + (135/7)*Φ b6 = (125/14)*φ(2) - (625/14)*φ(3) + (1125/14)*φ(4) b7 = (-27/14)*φ(2) + (162/7) *φ(3) - (405/7) *φ(4) b8 = (1/2) *φ(2) - (13/2) *φ(3) + (45/2) *φ(4) a2_1 = c2*φ(1,2) a3_1 = c3*φ(1,3) - a3_2 a4_1 = c4*φ(1,4) - a4_3 a5_1 = c5*φ(1,5) - a5_3 - a5_4 a6_1 = c6*φ(1,6) - a6_4 - a6_5 a7_1 = c7*φ(1,7) - a7_4 - a7_5 - a7_6 a8_1 = c8*φ(1,8) - a8_5 - a8_6 - a8_7 b1 = φ(1) - b6 - b7 - b8 a = [ [0, 0, 0, 0, 0, 0, 0, 0], [a2_1, 0, 0, 0, 0, 0, 0, 0], [a3_1, a3_2, 0, 0, 0, 0, 0, 0], [a4_1, 0, a4_3, 0, 0, 0, 0, 0], [a5_1, 0, a5_3, a5_4, 0, 0, 0, 0], [a6_1, 0, 0, a6_4, a6_5, 0, 0, 0], [a7_1 , 0, 0, a7_4, a7_5, a7_6, 0, 0], [a8_1 , 0, 0, 0, a8_5, a8_6, a8_7, 0], ] b = [ [b1, 0, 0, 0, 0, b6, b7, b8], ] a = [ [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, a3_2, 0, 0, 0, 0, 0, 0], [0, 0, a4_3, 0, 0, 0, 0, 0], [0, 0, a5_3, a5_4, 0, 0, 0, 0], [0, 0, 0, a6_4, a6_5, 0, 0, 0], [0 , 0, 0, a7_4, a7_5, a7_6, 0, 0], [0 , 0, 0, 0, a8_5, a8_6, a8_7, 0], ] b = [ [0, 0, 0, 0, 0, b6, b7, b8], ] for i in range(len(a)): a[i][0] = ci[i] * φ(1,i+1) - sum(a[i]) for i in range(len(b)): b[i][0] = φ(1) - sum(b[i]) case "res_10s": c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 = 0, 1/2, 1/2, 1/3, 1/2, 1/3, 1/4, 3/10, 3/4, 1 ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10] φ = Phi(h, ci, analytic_solution=True) a3_2 = (c3**2 / c2) * φ(2,3) a4_2 = (c4**2 / c2) * φ(2,4) b8 = (c9*c10*φ(2) - 2*(c9+c10)*φ(3) + 6*φ(4)) / (c8 * (c8-c9) * (c8-c10)) b9 = (c8*c10*φ(2) - 2*(c8+c10)*φ(3) + 6*φ(4)) / (c9 * (c9-c8) * (c9-c10)) b10 = (c8*c9*φ(2) - 2*(c8+c9) *φ(3) + 6*φ(4)) / (c10 * (c10-c8) * (c10-c9)) a = [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, a3_2, 0, 0, 0, 0, 0, 0, 0, 0], [0, a4_2, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], ] b = [ [0, 0, 0, 0, 0, 0, 0, b8, b9, b10], ] # a5_3, a5_4 # a6_3, a6_4 # a7_3, a7_4 for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k jk = [(3, 4), (4, 3)] jk = list(permutations([3, 4], 2)) for j,k in jk: a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / (ci[j-1] * (ci[j-1] - ci[k-1])) for i in range(8, 11): # i=8,9,10 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff jkl = list(permutations([5, 6, 7], 3)) for j,k,l in jkl: a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1])) for i in range(len(a)): a[i][0] = ci[i] * φ(1,i+1) - sum(a[i]) for i in range(len(b)): b[i][0] = φ(1) - sum(b[i]) case "res_15s": c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15 = 0, 1/2, 1/2, 1/3, 1/2, 1/5, 1/4, 18/25, 1/3, 3/10, 1/6, 90/103, 1/3, 3/10, 1/5 c1 = 0 c2 = c3 = c5 = 1/2 c4 = c9 = c13 = 1/3 c6 = c15 = 1/5 c7 = 1/4 c8 = 18/25 c10 = c14 = 3/10 c11 = 1/6 c12 = 90/103 c15 = 1/5 ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15] φ = Phi(h, ci, analytic_solution=True) a = [[0 for _ in range(15)] for _ in range(15)] b = [[0 for _ in range(15)]] for i in range(3, 5): # i=3,4 j=2 j=2 a[i-1][j-1] = (ci[i-1]**2 / ci[j-1]) * φ(j,i) for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k jk = list(permutations([3, 4], 2)) for j,k in jk: a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / prod_diff(ci[j-1], ci[k-1]) for i in range(8, 12): # i=8,9,10,11 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff jkl = list(permutations([5, 6, 7], 3)) for j,k,l in jkl: a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1])) for i in range(12,16): # i=12,13,14,15 jkld = list(permutations([8,9,10,11], 4)) for j,k,l,d in jkld: numerator = -ci[i-1]**2 * ci[d-1]*ci[k-1]*ci[l-1] * φ(2,i) + 2*ci[i-1]**3 * (ci[d-1]*ci[k-1] + ci[d-1]*ci[l-1] + ci[k-1]*ci[l-1]) * φ(3,i) - 6*ci[i-1]**4 * (ci[d-1] + ci[k-1] + ci[l-1]) * φ(4,i) + 24*ci[i-1]**5 * φ(5,i) a[i-1][j-1] = numerator / prod_diff(ci[j-1], ci[k-1], ci[l-1], ci[d-1]) """ijkl = list(permutations([12,13,14,15], 4)) for i,j,k,l in ijkl: #numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5) #b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1]) for jjj in range (2, 6): # 2,3,4,5 b[0][i-1] += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj) b[0][i-1] /= prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])""" ijkl = list(permutations([12,13,14,15], 4)) for i,j,k,l in ijkl: numerator = 0 for jjj in range(2, 6): # 2, 3, 4, 5 numerator += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj) #print(i,j,k,l) b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1]) ijkl = list(permutations([12, 13, 14, 15], 4)) selected_permutations = {} sign = 1 for i in range(12, 16): results = [] for j, k, l, d in ijkl: if i != j and i != k and i != l and i != d: numerator = 0 for jjj in range(2, 6): # 2, 3, 4, 5 numerator += mu_numerator(jjj, ci[j-1], ci[i-1], ci[k-1], ci[l-1]) * φ(jjj) theta_value = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1]) results.append((theta_value, (i, j, k, l, d))) results.sort(key=lambda x: abs(x[0])) for theta_value, permutation in results: if sign == 1 and theta_value > 0: selected_permutations[i] = (theta_value, permutation) sign *= -1 break elif sign == -1 and theta_value < 0: selected_permutations[i] = (theta_value, permutation) sign *= -1 break for i in range(12, 16): if i in selected_permutations: theta_value, (i, j, k, l, d) = selected_permutations[i] b[0][i-1] = theta_value for i in selected_permutations: theta_value, permutation = selected_permutations[i] #print(f"i={i}") #print(f" Selected Theta: {theta_value:.6f}, Permutation: {permutation}") for i in range(len(a)): a[i][0] = ci[i] * φ(1,i+1) - sum(a[i]) for i in range(len(b)): b[i][0] = φ(1) - sum(b[i]) case "res_16s": # 6th order without weakened order conditions c1 = 0 c2 = c3 = c5 = c8 = c12 = 1/2 c4 = c11 = c15 = 1/3 c6 = c9 = c13 = 1/5 c7 = c10 = c14 = 1/4 c16 = 1 ci = [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16] φ = Phi(h, ci, analytic_solution=True) a3_2 = (1/2) * φ(2,3) a = [[0 for _ in range(16)] for _ in range(16)] b = [[0 for _ in range(16)]] for i in range(3, 5): # i=3,4 j=2 j=2 a[i-1][j-1] = (ci[i-1]**2 / ci[j-1]) * φ(j,i) for i in range(5, 8): # i=5,6,7 j,k ∈ {3, 4}, j != k jk = list(permutations([3, 4], 2)) for j,k in jk: a[i-1][j-1] = (-ci[i-1]**2 * ci[k-1] * φ(2,i) + 2*ci[i-1]**3 * φ(3,i)) / prod_diff(ci[j-1], ci[k-1]) for i in range(8, 12): # i=8,9,10,11 j,k,l ∈ {5, 6, 7}, j != k != l [ (5, 6, 7), (5, 7, 6), (6, 5, 7), (6, 7, 5), (7, 5, 6), (7, 6, 5)] 6 total coeff jkl = list(permutations([5, 6, 7], 3)) for j,k,l in jkl: a[i-1][j-1] = (ci[i-1]**2 * ci[k-1] * ci[l-1] * φ(2,i) - 2*ci[i-1]**3 * (ci[k-1] + ci[l-1]) * φ(3,i) + 6*ci[i-1]**4 * φ(4,i)) / (ci[j-1] * (ci[j-1] - ci[k-1]) * (ci[j-1] - ci[l-1])) for i in range(12,17): # i=12,13,14,15,16 jkld = list(permutations([8,9,10,11], 4)) for j,k,l,d in jkld: numerator = -ci[i-1]**2 * ci[d-1]*ci[k-1]*ci[l-1] * φ(2,i) + 2*ci[i-1]**3 * (ci[d-1]*ci[k-1] + ci[d-1]*ci[l-1] + ci[k-1]*ci[l-1]) * φ(3,i) - 6*ci[i-1]**4 * (ci[d-1] + ci[k-1] + ci[l-1]) * φ(4,i) + 24*ci[i-1]**5 * φ(5,i) a[i-1][j-1] = numerator / prod_diff(ci[j-1], ci[k-1], ci[l-1], ci[d-1]) """ijdkl = list(permutations([12,13,14,15,16], 5)) for i,j,d,k,l in ijdkl: #numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5) b[0][i-1] = theta(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6) #b[0][i-1] = numerator / prod_diff(ci[i-1], ci[j-1], ci[k-1], ci[l-1])""" ijdkl = list(permutations([12,13,14,15,16], 5)) for i,j,d,k,l in ijdkl: #numerator = -ci[j-1]*ci[k-1]*ci[l-1]*φ(2) + 2*(ci[j-1]*ci[k-1] + ci[j-1]*ci[l-1] + ci[k-1]*ci[l-1])*φ(3) - 6*(ci[j-1] + ci[k-1] + ci[l-1])*φ(4) + 24*φ(5) #numerator = theta_numerator(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta_numerator(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta_numerator(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta_numerator(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta_numerator(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6) #b[0][i-1] = numerator / (ci[i-1] *, ci[d-1], ci[j-1], ci[k-1], ci[l-1]) #b[0][i-1] = numerator / denominator(ci[i-1], ci[d-1], ci[j-1], ci[k-1], ci[l-1]) b[0][i-1] = theta(2, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(2) + theta(3, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(3) + theta(4, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(4) + theta(5, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1])*φ(5) + theta(6, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(6) ijdkl = list(permutations([12,13,14,15,16], 5)) for i,j,d,k,l in ijdkl: numerator = 0 for jjj in range(2, 7): # 2, 3, 4, 5, 6 numerator += theta_numerator(jjj, ci[d-1], ci[i-1], ci[k-1], ci[j-1], ci[l-1]) * φ(jjj) #print(i,j,d,k,l) b[0][i-1] = numerator / (ci[i-1] * (ci[i-1] - ci[k-1]) * (ci[i-1] - ci[j-1] * (ci[i-1] - ci[d-1]) * (ci[i-1] - ci[l-1]))) for i in range(len(a)): a[i][0] = ci[i] * φ(1,i+1) - sum(a[i]) for i in range(len(b)): b[i][0] = φ(1) - sum(b[i]) case "irk_exp_diag_2s": c1 = 1/3 c2 = 2/3 c1 = float(get_extra_options_kv("c1", str(c1), extra_options)) c2 = float(get_extra_options_kv("c2", str(c2), extra_options)) lam = (1 - torch.exp(-c1 * h)) / h a2_1 = ( torch.exp(c2*h) - torch.exp(c1*h)) / (h * torch.exp(2*c1*h)) b1 = (1 + c2*h + torch.exp(h) * (-1 + h - c2*h)) / ((c1-c2) * h**2 * torch.exp(c1*h)) b2 = -(1 + c1*h - torch.exp(h) * ( 1 - h + c1*h)) / ((c1-c2) * h**2 * torch.exp(c2*h)) a = [ [lam, 0], [a2_1, lam], ] b = [ [b1, b2], ] ci = [c1, c2] ci = ci[:] if rk_type.startswith("lob") == False: ci.append(1) return a, b, ci, multistep_stages, FSAL def gen_first_col_exp(a, b, c, φ): for i in range(len(c)): a[i][0] = c[i] * φ(1,i+1) - sum(a[i]) for i in range(len(b)): b[i][0] = φ(1) - sum(b[i]) return a, b def rho(j, ci, ck, cl): if j == 2: numerator = ck*cl if j == 3: numerator = (-2 * (ck + cl)) if j == 4: numerator = 6 return numerator / denominator(ci, ck, cl) def mu(j, cd, ci, ck, cl): if j == 2: numerator = -cd * ck * cl if j == 3: numerator = 2 * (cd * ck + cd * cl + ck * cl) if j == 4: numerator = -6 * (cd + ck + cl) if j == 5: numerator = 24 return numerator / denominator(ci, cd, ck, cl) def mu_numerator(j, cd, ci, ck, cl): if j == 2: numerator = -cd * ck * cl if j == 3: numerator = 2 * (cd * ck + cd * cl + ck * cl) if j == 4: numerator = -6 * (cd + ck + cl) if j == 5: numerator = 24 return numerator #/ denominator(ci, cd, ck, cl) def theta_numerator(j, cd, ci, ck, cj, cl): if j == 2: numerator = -cj * cd * ck * cl if j == 3: numerator = 2 * (cj * ck * cd + cj*ck*cl + ck*cd*cl + cd*cl*cj) if j == 4: numerator = -6*(cj*ck + cj*cd + cj*cl + ck*cd + ck*cl + cd*cl) if j == 5: numerator = 24 * (cj + ck + cl + cd) if j == 6: numerator = -120 return numerator # / denominator(ci, cj, ck, cl, cd) def theta(j, cd, ci, ck, cj, cl): if j == 2: numerator = -cj * cd * ck * cl if j == 3: numerator = 2 * (cj * ck * cd + cj*ck*cl + ck*cd*cl + cd*cl*cj) if j == 4: numerator = -6*(cj*ck + cj*cd + cj*cl + ck*cd + ck*cl + cd*cl) if j == 5: numerator = 24 * (cj + ck + cl + cd) if j == 6: numerator = -120 return numerator / ( ci * (ci - cj) * (ci - ck) * (ci - cl) * (ci - cd)) return numerator / denominator(ci, cj, ck, cl, cd) def prod_diff(cj, ck, cl=None, cd=None, cblah=None): if cl is None and cd is None: return cj * (cj - ck) if cd is None: return cj * (cj - ck) * (cj - cl) else: return cj * (cj - ck) * (cj - cl) * (cj - cd) def denominator(ci, *args): result = ci for arg in args: result *= (ci - arg) return result def check_condition_4_2(nodes): c12, c13, c14, c15 = nodes term_1 = (1 / 5) * (c12 + c13 + c14 + c15) term_2 = (1 / 4) * (c12 * c13 + c12 * c14 + c12 * c15 + c13 * c14 + c13 * c15 + c14 * c15) term_3 = (1 / 3) * (c12 * c13 * c14 + c12 * c13 * c15 + c12 * c14 * c15 + c13 * c14 * c15) term_4 = (1 / 2) * (c12 * c13 * c14 * c15) result = term_1 - term_2 + term_3 - term_4 return abs(result - (1 / 6)) < 1e-6 ================================================ FILE: legacy/rk_guide_func.py ================================================ import torch import torch.nn.functional as F from typing import Tuple from einops import rearrange from .sigmas import get_sigmas from .latents import hard_light_blend, normalize_latent, initialize_or_scale from .rk_method import RK_Method from .helper import get_extra_options_kv, extra_options_flag, get_cosine_similarity, get_extra_options_list import itertools def normalize_inputs(x, y0, y0_inv, guide_mode, extra_options): if guide_mode == "epsilon_guide_mean_std_from_bkg": y0 = normalize_latent(y0, y0_inv) input_norm = get_extra_options_kv("input_norm", "", extra_options) input_std = float(get_extra_options_kv("input_std", "1.0", extra_options)) if input_norm == "input_ch_mean_set_std_to": x = normalize_latent(x, set_std=input_std) if input_norm == "input_ch_set_std_to": x = normalize_latent(x, set_std=input_std, mean=False) if input_norm == "input_mean_set_std_to": x = normalize_latent(x, set_std=input_std, channelwise=False) if input_norm == "input_std_set_std_to": x = normalize_latent(x, set_std=input_std, mean=False, channelwise=False) return x, y0, y0_inv class LatentGuide: def __init__(self, guides, x, model, sigmas, UNSAMPLE, LGW_MASK_RESCALE_MIN, extra_options, device='cuda', dtype=torch.float64, max_steps=10000): self.model = model self.sigma_min = model.inner_model.inner_model.model_sampling.sigma_min.to(dtype) self.sigma_max = model.inner_model.inner_model.model_sampling.sigma_max.to(dtype) self.sigmas = sigmas self.UNSAMPLE = UNSAMPLE self.SAMPLE = (sigmas[0] > sigmas[1]) self.extra_options = extra_options self.y0 = torch.zeros_like(x) self.y0_inv = torch.zeros_like(x) self.guide_mode = "" self.mask = None self.mask_inv = None self.latent_guide = None self.latent_guide_inv = None self.lgw_masks = [] self.lgw_masks_inv = [] self.lgw, self.lgw_inv = [torch.full_like(sigmas, 0.) for _ in range(2)] self.guide_cossim_cutoff_, self.guide_bkg_cossim_cutoff_ = 1.0, 1.0 latent_guide_weight, latent_guide_weight_inv = 0.,0. latent_guide_weights, latent_guide_weights_inv = None, None latent_guide_weights = torch.zeros_like(sigmas) latent_guide_weights_inv = torch.zeros_like(sigmas) if guides is not None: self.guide_mode, latent_guide_weight, latent_guide_weight_inv, latent_guide_weights, latent_guide_weights_inv, self.latent_guide, self.latent_guide_inv, latent_guide_mask, latent_guide_mask_inv, scheduler_, scheduler_inv_, steps_, steps_inv_, denoise_, denoise_inv_ = guides self.mask, self.mask_inv = latent_guide_mask, latent_guide_mask_inv self.guide_cossim_cutoff_, self.guide_bkg_cossim_cutoff_ = denoise_, denoise_inv_ if latent_guide_weights == None: latent_guide_weights = get_sigmas(model, scheduler_, steps_, 1.0).to(x.dtype) if latent_guide_weights_inv == None: latent_guide_weights_inv = get_sigmas(model, scheduler_inv_, steps_inv_, 1.0).to(x.dtype) latent_guide_weights = initialize_or_scale(latent_guide_weights, latent_guide_weight, max_steps).to(dtype) latent_guide_weights_inv = initialize_or_scale(latent_guide_weights_inv, latent_guide_weight_inv, max_steps).to(dtype) latent_guide_weights = F.pad(latent_guide_weights, (0, max_steps), value=0.0) latent_guide_weights_inv = F.pad(latent_guide_weights_inv, (0, max_steps), value=0.0) if latent_guide_weights is not None: self.lgw = latent_guide_weights.to(x.device) if latent_guide_weights_inv is not None: self.lgw_inv = latent_guide_weights_inv.to(x.device) self.mask, LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask, LGW_MASK_RESCALE_MIN) if self.mask_inv is not None: self.mask_inv, LGW_MASK_RESCALE_MIN = prepare_mask(x, self.mask_inv, LGW_MASK_RESCALE_MIN) elif not self.SAMPLE: self.mask_inv = (1-self.mask) for step in range(len(self.sigmas)-1): lgw_mask, lgw_mask_inv = prepare_weighted_masks(self.mask, self.mask_inv, self.lgw[step], self.lgw_inv[step], self.latent_guide, self.latent_guide_inv, LGW_MASK_RESCALE_MIN) self.lgw_masks.append(lgw_mask) self.lgw_masks_inv.append(lgw_mask_inv) def init_guides(self, x, noise_sampler, latent_guide=None, latent_guide_inv=None): self.y0, self.y0_inv = torch.zeros_like(x), torch.zeros_like(x) latent_guide = self.latent_guide if latent_guide is None else latent_guide latent_guide_inv = self.latent_guide_inv if latent_guide_inv is None else latent_guide_inv if latent_guide is not None: if type(latent_guide) == dict: latent_guide_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide['samples']).clone().to(x.device) else: latent_guide_samples = latent_guide if self.SAMPLE: self.y0 = latent_guide_samples elif self.UNSAMPLE: # and self.mask is not None: x = (1-self.mask) * x + self.mask * latent_guide_samples else: x = latent_guide_samples if latent_guide_inv is not None: if type(latent_guide_inv) == dict: latent_guide_inv_samples = self.model.inner_model.inner_model.process_latent_in(latent_guide_inv['samples']).clone().to(x.device) else: latent_guide_inv_samples = latent_guide_inv if self.SAMPLE: self.y0_inv = latent_guide_inv_samples elif self.UNSAMPLE: # and self.mask is not None: x = (1-self.mask_inv) * x + self.mask_inv * latent_guide_inv_samples #fixed old approach, which was mask, (1-mask) else: x = latent_guide_inv_samples #THIS COULD LEAD TO WEIRD BEHAVIOR! OVERWRITING X WITH LG_INV AFTER SETTING TO LG above! if self.UNSAMPLE and not self.SAMPLE: #sigma_next > sigma: self.y0 = noise_sampler(sigma=self.sigma_max, sigma_next=self.sigma_min) self.y0 = (self.y0 - self.y0.mean()) / self.y0.std() self.y0_inv = noise_sampler(sigma=self.sigma_max, sigma_next=self.sigma_min) self.y0_inv = (self.y0_inv - self.y0_inv.mean()) / self.y0_inv.std() x, self.y0, self.y0_inv = normalize_inputs(x, self.y0, self.y0_inv, self.guide_mode, self.extra_options) return x def process_guides_substep(self, x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_, unsample_resample_scale, rk, rk_type, extra_options, frame_weights_grp=None): y0 = self.y0 if self.y0.shape[0] > 1: y0 = self.y0[min(step, self.y0.shape[0]-1)].unsqueeze(0) y0_inv = self.y0_inv lgw_mask = self.lgw_masks[step].clone() lgw_mask_inv = self.lgw_masks_inv[step].clone() if self.lgw_masks_inv is not None else None lgw = self.lgw[step] lgw_inv = self.lgw_inv[step] latent_guide = self.latent_guide latent_guide_inv = self.latent_guide_inv guide_mode = self.guide_mode UNSAMPLE = self.UNSAMPLE if x_0.dim() == 5 and frame_weights_grp is not None: apply_frame_weights(lgw_mask, frame_weights_grp[0]) apply_frame_weights(lgw_mask_inv, frame_weights_grp[1]) if self.guide_mode: data_norm = data_[row] - data_[row].mean(dim=(-2,-1), keepdim=True) y0_norm = y0 - y0.mean(dim=(-2,-1), keepdim=True) y0_inv_norm = y0_inv - y0_inv.mean(dim=(-2,-1), keepdim=True) y0_cossim = get_cosine_similarity(data_norm*lgw_mask, y0_norm *lgw_mask) y0_cossim_inv = get_cosine_similarity(data_norm*lgw_mask_inv, y0_inv_norm*lgw_mask_inv) if y0_cossim < self.guide_cossim_cutoff_ or y0_cossim_inv < self.guide_bkg_cossim_cutoff_: lgw_mask_cossim, lgw_mask_cossim_inv = lgw_mask, lgw_mask_inv if y0_cossim >= self.guide_cossim_cutoff_: lgw_mask_cossim = torch.zeros_like(lgw_mask) if y0_cossim_inv >= self.guide_bkg_cossim_cutoff_: lgw_mask_cossim_inv = torch.zeros_like(lgw_mask_inv) lgw_mask = lgw_mask_cossim lgw_mask_inv = lgw_mask_cossim_inv else: return eps_, x_ else: return eps_, x_ if self.UNSAMPLE and RK_Method.is_exponential(rk_type): if not (extra_options_flag("disable_power_unsample", extra_options) or extra_options_flag("disable_power_resample", extra_options)): extra_options += "\npower_unsample\npower_resample\n" if not extra_options_flag("disable_lgw_scaling_substep_ch_mean_std", extra_options): extra_options += "\nsubstep_eps_ch_mean_std\n" s_in = x_0.new_ones([x_0.shape[0]]) eps_orig = eps_.clone() if extra_options_flag("dynamic_guides_mean_std", extra_options): y_shift, y_inv_shift = normalize_latent([y0, y0_inv], [data_, data_]) y0 = y_shift if extra_options_flag("dynamic_guides_inv", extra_options): y0_inv = y_inv_shift if extra_options_flag("dynamic_guides_mean", extra_options): y_shift, y_inv_shift = normalize_latent([y0, y0_inv], [data_, data_], std=False) y0 = y_shift if extra_options_flag("dynamic_guides_inv", extra_options): y0_inv = y_inv_shift if "data" == guide_mode: y0_tmp = y0.clone() if latent_guide_inv is not None: y0_tmp = (1-lgw_mask) * data_[row] + lgw_mask * y0 y0_tmp = (1-lgw_mask_inv) * y0_tmp + lgw_mask_inv * y0_inv x_[row+1] = y0_tmp + eps_[row] if guide_mode == "data_projection": d_lerp = data_[row] + lgw_mask * (y0-data_[row]) + lgw_mask_inv * (y0_inv-data_[row]) d_collinear_d_lerp = get_collinear(data_[row], d_lerp) d_lerp_ortho_d = get_orthogonal(d_lerp, data_[row]) data_[row] = d_collinear_d_lerp + d_lerp_ortho_d x_[row+1] = data_[row] + eps_[row] * sigma elif "epsilon" in guide_mode: if sigma > sigma_next: tol_value = float(get_extra_options_kv("tol", "-1.0", extra_options)) if tol_value >= 0 and (lgw > 0 or lgw_inv > 0): for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])): current_diff = torch.norm(data_[row][b][c] - y0 [b][c]) current_diff_inv = torch.norm(data_[row][b][c] - y0_inv[b][c]) lgw_scaled = torch.nan_to_num(1-(tol_value/current_diff), 0) lgw_scaled_inv = torch.nan_to_num(1-(tol_value/current_diff_inv), 0) lgw_tmp = min(lgw , lgw_scaled) lgw_tmp_inv = min(lgw_inv, lgw_scaled_inv) lgw_mask_clamp = torch.clamp(lgw_mask, max=lgw_tmp) lgw_mask_clamp_inv = torch.clamp(lgw_mask_inv, max=lgw_tmp_inv) eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type, b, c) eps_[row][b][c] = eps_[row][b][c] + lgw_mask_clamp[b][c] * (eps_row - eps_[row][b][c]) + lgw_mask_clamp_inv[b][c] * (eps_row_inv - eps_[row][b][c]) elif guide_mode == "epsilon_projection": eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type) if extra_options_flag("eps_proj_v2", extra_options): eps_row_lerp_fg = eps_[row] + lgw_mask * (eps_row-eps_[row]) eps_row_lerp_bg = eps_[row] + lgw_mask_inv * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp_fg = get_collinear(eps_[row], eps_row_lerp_fg) eps_lerp_ortho_eps_fg = get_orthogonal(eps_row_lerp_fg, eps_[row]) eps_collinear_eps_lerp_bg = get_collinear(eps_[row], eps_row_lerp_bg) eps_lerp_ortho_eps_bg = get_orthogonal(eps_row_lerp_bg, eps_[row]) eps_[row] = eps_[row] + lgw_mask * (eps_collinear_eps_lerp_fg + eps_lerp_ortho_eps_fg - eps_[row]) + lgw_mask_inv * (eps_collinear_eps_lerp_bg + eps_lerp_ortho_eps_bg - eps_[row]) elif extra_options_flag("eps_proj_v3", extra_options): eps_collinear_eps_lerp_fg = get_collinear(eps_[row], eps_row) eps_lerp_ortho_eps_fg = get_orthogonal(eps_row, eps_[row]) eps_collinear_eps_lerp_bg = get_collinear(eps_[row], eps_row_inv) eps_lerp_ortho_eps_bg = get_orthogonal(eps_row_inv, eps_[row]) eps_[row] = eps_[row] + lgw_mask * (eps_collinear_eps_lerp_fg + eps_lerp_ortho_eps_fg - eps_[row]) + lgw_mask_inv * (eps_collinear_eps_lerp_bg + eps_lerp_ortho_eps_bg - eps_[row]) elif extra_options_flag("eps_proj_v5", extra_options): eps2g_collin = get_collinear(eps_[row], eps_row) g2eps_ortho = get_orthogonal(eps_row, eps_[row]) g2eps_collin = get_collinear(eps_row, eps_[row]) eps2g_ortho = get_orthogonal(eps_[row], eps_row) eps2i_collin = get_collinear(eps_[row], eps_row_inv) i2eps_ortho = get_orthogonal(eps_row_inv, eps_[row]) i2eps_collin = get_collinear(eps_row_inv, eps_[row]) eps2i_ortho = get_orthogonal(eps_[row], eps_row_inv) #eps_[row] = (eps2g_collin+g2eps_ortho) + (g2eps_collin+eps2g_ortho) + (eps2i_collin+i2eps_ortho) + (i2eps_collin+eps2i_ortho) #eps_[row] = eps_[row] + lgw_mask * (eps2g_collin+g2eps_ortho) + (1-lgw_mask) * (g2eps_collin+eps2g_ortho) + lgw_mask_inv * (eps2i_collin+i2eps_ortho) + (1-lgw_mask_inv) * (i2eps_collin+eps2i_ortho) eps_[row] = lgw_mask * (eps2g_collin+g2eps_ortho) - lgw_mask * (g2eps_collin+eps2g_ortho) + lgw_mask_inv * (eps2i_collin+i2eps_ortho) - lgw_mask_inv * (i2eps_collin+eps2i_ortho) #eps_[row] = eps_[row] + lgw_mask * (eps_collinear_eps_lerp_fg + eps_lerp_ortho_eps_fg - eps_[row]) + lgw_mask_inv * (eps_collinear_eps_lerp_bg + eps_lerp_ortho_eps_bg - eps_[row]) elif extra_options_flag("eps_proj_v4a", extra_options): eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row]) eps_[row] = (1 - torch.clamp(lgw_mask + lgw_mask_inv, max=1.0)) * eps_[row] + torch.clamp((lgw_mask + lgw_mask_inv), max=1.0) * (eps_collinear_eps_lerp + eps_lerp_ortho_eps) elif extra_options_flag("eps_proj_v4b", extra_options): eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row]) eps_[row] = (1 - (lgw_mask + lgw_mask_inv)/2) * eps_[row] + ((lgw_mask + lgw_mask_inv)/2) * (eps_collinear_eps_lerp + eps_lerp_ortho_eps) elif extra_options_flag("eps_proj_v4c", extra_options): eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row]) lgw_mask_sum = (lgw_mask + lgw_mask_inv) eps_[row] = (1 - (lgw_mask + lgw_mask_inv)/2) * eps_[row] + ((lgw_mask + lgw_mask_inv)/2) * (eps_collinear_eps_lerp + eps_lerp_ortho_eps) elif extra_options_flag("eps_proj_v4e", extra_options): eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row]) eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps eps_[row] = eps_[row] + self.mask * (eps_sum - eps_[row]) + self.mask_inv * (eps_sum - eps_[row]) elif extra_options_flag("eps_proj_self1", extra_options): eps_row_lerp = eps_[row] + self.mask * (eps_row-eps_[row]) + self.mask_inv * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_[row]) eps_lerp_ortho_eps = get_orthogonal(eps_[row], eps_[row]) eps_[row] = eps_collinear_eps_lerp + eps_lerp_ortho_eps elif extra_options_flag("eps_proj_v4z", extra_options): eps_row_lerp = eps_[row] + self.mask * (eps_row-eps_[row]) + self.mask_inv * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row]) peak = max(lgw, lgw_inv) lgw_mask_sum = (lgw_mask + lgw_mask_inv) eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps #NOT FINISHED!!! #eps_[row] = eps_[row] + lgw_mask * (eps_sum - eps_[row]) + lgw_mask_inv * (eps_sum - eps_[row]) elif extra_options_flag("eps_proj_v5", extra_options): eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row]) eps_[row] = ((lgw_mask + lgw_mask_inv)==0) * eps_[row] + ((lgw_mask + lgw_mask_inv)>0) * (eps_collinear_eps_lerp + eps_lerp_ortho_eps) elif extra_options_flag("eps_proj_v6", extra_options): eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row]) eps_[row] = ((lgw_mask * lgw_mask_inv)==0) * eps_[row] + ((lgw_mask * lgw_mask_inv)>0) * (eps_collinear_eps_lerp + eps_lerp_ortho_eps) elif extra_options_flag("eps_proj_old_default", extra_options): eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + lgw_mask_inv * (eps_row_inv-eps_[row]) #eps_row_lerp = eps_[row] + lgw_mask * (eps_row-eps_[row]) + (1-lgw_mask) * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row]) eps_[row] = eps_collinear_eps_lerp + eps_lerp_ortho_eps else: #elif extra_options_flag("eps_proj_v4d", extra_options): #if row > 0: #lgw_mask_factor = float(get_extra_options_kv("substep_lgw_mask_factor", "1.0", extra_options)) #lgw_mask_inv_factor = float(get_extra_options_kv("substep_lgw_mask_inv_factor", "1.0", extra_options)) lgw_mask_factor = 1 if extra_options_flag("substep_eps_proj_scaling", extra_options): lgw_mask_factor = 1/(row+1) if extra_options_flag("substep_eps_proj_factors", extra_options): #value_str = get_extra_options_list("substep_eps_proj_factors", "", extra_options) #float_list = [float(item.strip()) for item in value_str.split(',') if item.strip()] float_list = get_extra_options_list("substep_eps_proj_factors", "", extra_options, ret_type=float) lgw_mask_factor = float_list[row] eps_row_lerp = eps_[row] + self.mask * (eps_row-eps_[row]) + (1-self.mask) * (eps_row_inv-eps_[row]) eps_collinear_eps_lerp = get_collinear(eps_[row], eps_row_lerp) eps_lerp_ortho_eps = get_orthogonal(eps_row_lerp, eps_[row]) eps_sum = eps_collinear_eps_lerp + eps_lerp_ortho_eps eps_[row] = eps_[row] + lgw_mask_factor*lgw_mask * (eps_sum - eps_[row]) + lgw_mask_factor*lgw_mask_inv * (eps_sum - eps_[row]) elif extra_options_flag("disable_lgw_scaling", extra_options): eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type) eps_[row] = eps_[row] + lgw_mask * (eps_row - eps_[row]) + lgw_mask_inv * (eps_row_inv - eps_[row]) elif (lgw > 0 or lgw_inv > 0): # default old channelwise epsilon avg, avg_inv = 0, 0 for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])): avg += torch.norm(data_[row][b][c] - y0 [b][c]) avg_inv += torch.norm(data_[row][b][c] - y0_inv[b][c]) avg /= x_0.shape[1] avg_inv /= x_0.shape[1] for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])): ratio = torch.nan_to_num(torch.norm(data_[row][b][c] - y0 [b][c]) / avg, 0) ratio_inv = torch.nan_to_num(torch.norm(data_[row][b][c] - y0_inv[b][c]) / avg_inv, 0) eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type, b, c) eps_[row][b][c] = eps_[row][b][c] + ratio * lgw_mask[b][c] * (eps_row - eps_[row][b][c]) + ratio_inv * lgw_mask_inv[b][c] * (eps_row_inv - eps_[row][b][c]) temporal_smoothing = float(get_extra_options_kv("temporal_smoothing", "0.0", extra_options)) if temporal_smoothing > 0: eps_[row] = apply_temporal_smoothing(eps_[row], temporal_smoothing) elif (UNSAMPLE or guide_mode in {"resample", "unsample"}) and (lgw > 0 or lgw_inv > 0): cvf = rk.get_epsilon(x_0, x_[row+1], y0, sigma, s_[row], sigma_down, unsample_resample_scale, extra_options) if UNSAMPLE and sigma > sigma_next and latent_guide_inv is not None: cvf_inv = rk.get_epsilon(x_0, x_[row+1], y0_inv, sigma, s_[row], sigma_down, unsample_resample_scale, extra_options) else: cvf_inv = torch.zeros_like(cvf) tol_value = float(get_extra_options_kv("tol", "-1.0", extra_options)) if tol_value >= 0: for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])): current_diff = torch.norm(data_[row][b][c] - y0 [b][c]) current_diff_inv = torch.norm(data_[row][b][c] - y0_inv[b][c]) lgw_scaled = torch.nan_to_num(1-(tol_value/current_diff), 0) lgw_scaled_inv = torch.nan_to_num(1-(tol_value/current_diff_inv), 0) lgw_tmp = min(lgw , lgw_scaled) lgw_tmp_inv = min(lgw_inv, lgw_scaled_inv) lgw_mask_clamp = torch.clamp(lgw_mask, max=lgw_tmp) lgw_mask_clamp_inv = torch.clamp(lgw_mask_inv, max=lgw_tmp_inv) eps_[row][b][c] = eps_[row][b][c] + lgw_mask_clamp[b][c] * (cvf[b][c] - eps_[row][b][c]) + lgw_mask_clamp_inv[b][c] * (cvf_inv[b][c] - eps_[row][b][c]) elif extra_options_flag("disable_lgw_scaling", extra_options): eps_[row] = eps_[row] + lgw_mask * (cvf - eps_[row]) + lgw_mask_inv * (cvf_inv - eps_[row]) else: avg, avg_inv = 0, 0 for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])): avg += torch.norm(lgw_mask [b][c] * data_[row][b][c] - lgw_mask [b][c] * y0 [b][c]) avg_inv += torch.norm(lgw_mask_inv[b][c] * data_[row][b][c] - lgw_mask_inv[b][c] * y0_inv[b][c]) avg /= x_0.shape[1] avg_inv /= x_0.shape[1] for b, c in itertools.product(range(x_0.shape[0]), range(x_0.shape[1])): ratio = torch.nan_to_num(torch.norm(lgw_mask [b][c] * data_[row][b][c] - lgw_mask [b][c] * y0 [b][c]) / avg, 0) ratio_inv = torch.nan_to_num(torch.norm(lgw_mask_inv[b][c] * data_[row][b][c] - lgw_mask_inv[b][c] * y0_inv[b][c]) / avg_inv, 0) eps_[row][b][c] = eps_[row][b][c] + ratio * lgw_mask[b][c] * (cvf[b][c] - eps_[row][b][c]) + ratio_inv * lgw_mask_inv[b][c] * (cvf_inv[b][c] - eps_[row][b][c]) if extra_options_flag("substep_eps_ch_mean_std", extra_options): eps_[row] = normalize_latent(eps_[row], eps_orig[row]) if extra_options_flag("substep_eps_ch_mean", extra_options): eps_[row] = normalize_latent(eps_[row], eps_orig[row], std=False) if extra_options_flag("substep_eps_ch_std", extra_options): eps_[row] = normalize_latent(eps_[row], eps_orig[row], mean=False) if extra_options_flag("substep_eps_mean_std", extra_options): eps_[row] = normalize_latent(eps_[row], eps_orig[row], channelwise=False) if extra_options_flag("substep_eps_mean", extra_options): eps_[row] = normalize_latent(eps_[row], eps_orig[row], std=False, channelwise=False) if extra_options_flag("substep_eps_std", extra_options): eps_[row] = normalize_latent(eps_[row], eps_orig[row], mean=False, channelwise=False) return eps_, x_ @torch.no_grad def process_guides_poststep(self, x, denoised, eps, step, extra_options): x_orig = x.clone() mean_weight = float(get_extra_options_kv("mean_weight", "0.01", extra_options)) y0 = self.y0 if self.y0.shape[0] > 1: y0 = self.y0[min(step, self.y0.shape[0]-1)].unsqueeze(0) y0_inv = self.y0_inv lgw_mask = self.lgw_masks[step].clone() lgw_mask_inv = self.lgw_masks_inv[step].clone() if self.lgw_masks_inv is not None else None mask = self.mask #needed for bitwise mask below lgw = self.lgw[step] lgw_inv = self.lgw_inv[step] latent_guide = self.latent_guide latent_guide_inv = self.latent_guide_inv guide_mode = self.guide_mode UNSAMPLE = self.UNSAMPLE if self.guide_mode: data_norm = denoised - denoised.mean(dim=(-2,-1), keepdim=True) y0_norm = y0 - y0.mean(dim=(-2,-1), keepdim=True) y0_inv_norm = y0_inv - y0_inv.mean(dim=(-2,-1), keepdim=True) y0_cossim = get_cosine_similarity(data_norm*lgw_mask, y0_norm *lgw_mask) y0_cossim_inv = get_cosine_similarity(data_norm*lgw_mask_inv, y0_inv_norm*lgw_mask_inv) if y0_cossim < self.guide_cossim_cutoff_ or y0_cossim_inv < self.guide_bkg_cossim_cutoff_: lgw_mask_cossim, lgw_mask_cossim_inv = lgw_mask, lgw_mask_inv if y0_cossim >= self.guide_cossim_cutoff_: lgw_mask_cossim = torch.zeros_like(lgw_mask) if y0_cossim_inv >= self.guide_bkg_cossim_cutoff_: lgw_mask_cossim_inv = torch.zeros_like(lgw_mask_inv) lgw_mask = lgw_mask_cossim lgw_mask_inv = lgw_mask_cossim_inv else: return x if guide_mode in {"epsilon_dynamic_mean_std", "epsilon_dynamic_mean", "epsilon_dynamic_std", "epsilon_dynamic_mean_from_bkg"}: denoised_masked = denoised * ((mask==1)*mask) denoised_masked_inv = denoised * ((mask==0)*(1-mask)) d_shift, d_shift_inv = torch.zeros_like(x), torch.zeros_like(x) for b, c in itertools.product(range(x.shape[0]), range(x.shape[1])): denoised_mask = denoised[b][c][mask[b][c] == 1] denoised_mask_inv = denoised[b][c][mask[b][c] == 0] if guide_mode == "epsilon_dynamic_mean_std": d_shift[b][c] = (denoised_masked[b][c] - denoised_mask.mean()) / denoised_mask.std() d_shift[b][c] = (d_shift[b][c] * denoised_mask_inv.std()) + denoised_mask_inv.mean() elif guide_mode == "epsilon_dynamic_mean": d_shift[b][c] = denoised_masked[b][c] - denoised_mask.mean() + denoised_mask_inv.mean() d_shift_inv[b][c] = denoised_masked_inv[b][c] - denoised_mask_inv.mean() + denoised_mask.mean() elif guide_mode == "epsilon_dynamic_mean_from_bkg": d_shift[b][c] = denoised_masked[b][c] - denoised_mask.mean() + denoised_mask_inv.mean() if guide_mode in {"epsilon_dynamic_mean_std", "epsilon_dynamic_mean_from_bkg"}: denoised_shifted = denoised + mean_weight * lgw_mask * (d_shift - denoised_masked) elif guide_mode == "epsilon_dynamic_mean": denoised_shifted = denoised + mean_weight * lgw_mask * (d_shift - denoised_masked) + mean_weight * lgw_mask_inv * (d_shift_inv - denoised_masked_inv) x = denoised_shifted + eps if UNSAMPLE == False and (latent_guide is not None or latent_guide_inv is not None) and guide_mode in ("hard_light", "blend", "blend_projection", "mean_std", "mean", "mean_tiled", "std"): if guide_mode == "hard_light": d_shift, d_shift_inv = hard_light_blend(y0, denoised), hard_light_blend(y0_inv, denoised) elif guide_mode == "blend": d_shift, d_shift_inv = y0, y0_inv elif guide_mode == "blend_projection": #d_shift = get_collinear(denoised, y0) #d_shift_inv = get_collinear(denoised, y0_inv) d_lerp = denoised + lgw_mask * (y0-denoised) + lgw_mask_inv * (y0_inv-denoised) d_collinear_d_lerp = get_collinear(denoised, d_lerp) d_lerp_ortho_d = get_orthogonal(d_lerp, denoised) denoised_shifted = d_collinear_d_lerp + d_lerp_ortho_d x = denoised_shifted + eps return x elif guide_mode == "mean_std": d_shift, d_shift_inv = normalize_latent([denoised, denoised], [y0, y0_inv]) elif guide_mode == "mean": d_shift, d_shift_inv = normalize_latent([denoised, denoised], [y0, y0_inv], std=False) elif guide_mode == "std": d_shift, d_shift_inv = normalize_latent([denoised, denoised], [y0, y0_inv], mean=False) elif guide_mode == "mean_tiled": mean_tile_size = int(get_extra_options_kv("mean_tile", "8", extra_options)) y0_tiled = rearrange(y0, "b c (h t1) (w t2) -> (t1 t2) b c h w", t1=mean_tile_size, t2=mean_tile_size) y0_inv_tiled = rearrange(y0_inv, "b c (h t1) (w t2) -> (t1 t2) b c h w", t1=mean_tile_size, t2=mean_tile_size) denoised_tiled = rearrange(denoised, "b c (h t1) (w t2) -> (t1 t2) b c h w", t1=mean_tile_size, t2=mean_tile_size) d_shift_tiled, d_shift_inv_tiled = torch.zeros_like(y0_tiled), torch.zeros_like(y0_tiled) for i in range(y0_tiled.shape[0]): d_shift_tiled[i], d_shift_inv_tiled[i] = normalize_latent([denoised_tiled[i], denoised_tiled[i]], [y0_tiled[i], y0_inv_tiled[i]], std=False) d_shift = rearrange(d_shift_tiled, "(t1 t2) b c h w -> b c (h t1) (w t2)", t1=mean_tile_size, t2=mean_tile_size) d_shift_inv = rearrange(d_shift_inv_tiled, "(t1 t2) b c h w -> b c (h t1) (w t2)", t1=mean_tile_size, t2=mean_tile_size) if guide_mode in ("hard_light", "blend", "mean_std", "mean", "mean_tiled", "std"): if latent_guide_inv is None: denoised_shifted = denoised + lgw_mask * (d_shift - denoised) else: denoised_shifted = denoised + lgw_mask * (d_shift - denoised) + lgw_mask_inv * (d_shift_inv - denoised) if extra_options_flag("poststep_denoised_ch_mean_std", extra_options): denoised_shifted = normalize_latent(denoised_shifted, denoised) if extra_options_flag("poststep_denoised_ch_mean", extra_options): denoised_shifted = normalize_latent(denoised_shifted, denoised, std=False) if extra_options_flag("poststep_denoised_ch_std", extra_options): denoised_shifted = normalize_latent(denoised_shifted, denoised, mean=False) if extra_options_flag("poststep_denoised_mean_std", extra_options): denoised_shifted = normalize_latent(denoised_shifted, denoised, channelwise=False) if extra_options_flag("poststep_denoised_mean", extra_options): denoised_shifted = normalize_latent(denoised_shifted, denoised, std=False, channelwise=False) if extra_options_flag("poststep_denoised_std", extra_options): denoised_shifted = normalize_latent(denoised_shifted, denoised, mean=False, channelwise=False) x = denoised_shifted + eps if extra_options_flag("poststep_x_ch_mean_std", extra_options): x = normalize_latent(x, x_orig) if extra_options_flag("poststep_x_ch_mean", extra_options): x = normalize_latent(x, x_orig, std=False) if extra_options_flag("poststep_x_ch_std", extra_options): x = normalize_latent(x, x_orig, mean=False) if extra_options_flag("poststep_x_mean_std", extra_options): x = normalize_latent(x, x_orig, channelwise=False) if extra_options_flag("poststep_x_mean", extra_options): x = normalize_latent(x, x_orig, std=False, channelwise=False) if extra_options_flag("poststep_x_std", extra_options): x = normalize_latent(x, x_orig, mean=False, channelwise=False) return x def apply_frame_weights(mask, frame_weights): if frame_weights is not None: for f in range(mask.shape[2]): frame_weight = frame_weights[f] mask[..., f:f+1, :, :] *= frame_weight def prepare_mask(x, mask, LGW_MASK_RESCALE_MIN) -> Tuple[torch.Tensor, bool]: if mask is None: mask = torch.ones_like(x) LGW_MASK_RESCALE_MIN = False return mask, LGW_MASK_RESCALE_MIN spatial_mask = mask.unsqueeze(1) target_height = x.shape[-2] target_width = x.shape[-1] spatial_mask = F.interpolate(spatial_mask, size=(target_height, target_width), mode='bilinear', align_corners=False) while spatial_mask.dim() < x.dim(): spatial_mask = spatial_mask.unsqueeze(2) repeat_shape = [1] #batch for i in range(1, x.dim() - 2): repeat_shape.append(x.shape[i]) repeat_shape.extend([1, 1]) #height and width mask = spatial_mask.repeat(*repeat_shape).to(x.dtype).to(x.device) del spatial_mask return mask, LGW_MASK_RESCALE_MIN def prepare_weighted_masks(mask, mask_inv, lgw_, lgw_inv_, latent_guide, latent_guide_inv, LGW_MASK_RESCALE_MIN): if LGW_MASK_RESCALE_MIN: lgw_mask = mask * (1-lgw_) + lgw_ lgw_mask_inv = (1-mask) * (1-lgw_inv_) + lgw_inv_ else: if latent_guide is not None: lgw_mask = mask * lgw_ else: lgw_mask = torch.zeros_like(mask) if latent_guide_inv is not None: if mask_inv is not None: lgw_mask_inv = torch.minimum(1-mask_inv, (1-mask) * lgw_inv_) else: lgw_mask_inv = (1-mask) * lgw_inv_ else: lgw_mask_inv = torch.zeros_like(mask) return lgw_mask, lgw_mask_inv def apply_temporal_smoothing(tensor, temporal_smoothing): if temporal_smoothing <= 0 or tensor.dim() != 5: return tensor kernel_size = 5 padding = kernel_size // 2 temporal_kernel = torch.tensor( [0.1, 0.2, 0.4, 0.2, 0.1], device=tensor.device, dtype=tensor.dtype ) * temporal_smoothing temporal_kernel[kernel_size//2] += (1 - temporal_smoothing) temporal_kernel = temporal_kernel / temporal_kernel.sum() # resahpe for conv1d b, c, f, h, w = tensor.shape data_flat = tensor.permute(0, 1, 3, 4, 2).reshape(-1, f) # apply smoohting data_smooth = F.conv1d( data_flat.unsqueeze(1), temporal_kernel.view(1, 1, -1), padding=padding ).squeeze(1) return data_smooth.view(b, c, h, w, f).permute(0, 1, 4, 2, 3) def get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type, b=None, c=None): s_in = x_0.new_ones([x_0.shape[0]]) if b is not None and c is not None: index = (b, c) elif b is not None: index = (b,) else: index = () if RK_Method.is_exponential(rk_type): eps_row = y0 [index] - x_0[index] eps_row_inv = y0_inv[index] - x_0[index] else: eps_row = (x_[row+1][index] - y0 [index]) / (s_[row] * s_in) eps_row_inv = (x_[row+1][index] - y0_inv[index]) / (s_[row] * s_in) return eps_row, eps_row_inv def get_guide_epsilon(x_0, x_, y0, sigma, rk_type, b=None, c=None): s_in = x_0.new_ones([x_0.shape[0]]) if b is not None and c is not None: index = (b, c) elif b is not None: index = (b,) else: index = () if RK_Method.is_exponential(rk_type): eps = y0 [index] - x_0[index] else: eps = (x_[index] - y0 [index]) / (sigma * s_in) return eps @torch.no_grad def noise_cossim_guide_tiled(x_list, guide, cossim_mode="forward", tile_size=2, step=0): guide_tiled = rearrange(guide, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size) x_tiled_list = [ rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size) for x in x_list ] x_tiled_stack = torch.stack([x_tiled[0] for x_tiled in x_tiled_list]) # [n_x, n_tiles, c, h, w] guide_flat = guide_tiled[0].view(guide_tiled.shape[1], -1).unsqueeze(0) # [1, n_tiles, c*h*w] x_flat = x_tiled_stack.view(x_tiled_stack.size(0), x_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w] cossim_tmp_all = F.cosine_similarity(x_flat, guide_flat, dim=-1) # [n_x, n_tiles] if cossim_mode == "forward": indices = cossim_tmp_all.argmax(dim=0) elif cossim_mode == "reverse": indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal": indices = torch.abs(cossim_tmp_all).argmin(dim=0) elif cossim_mode == "forward_reverse": if step % 2 == 0: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_forward": if step % 2 == 1: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal_reverse": if step % 2 == 0: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_orthogonal": if step % 2 == 1: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) else: target_value = float(cossim_mode) indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0) x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w] x_tiled_out = x_tiled_out.unsqueeze(0) x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size) return x_detiled @torch.no_grad def noise_cossim_eps_tiled(x_list, eps, noise_list, cossim_mode="forward", tile_size=2, step=0): eps_tiled = rearrange(eps, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size) x_tiled_list = [ rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size) for x in x_list ] noise_tiled_list = [ rearrange(noise, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size) for noise in noise_list ] noise_tiled_stack = torch.stack([noise_tiled[0] for noise_tiled in noise_tiled_list]) # [n_x, n_tiles, c, h, w] eps_expanded = eps_tiled[0].view(eps_tiled.shape[1], -1).unsqueeze(0) # [1, n_tiles, c*h*w] noise_flat = noise_tiled_stack.view(noise_tiled_stack.size(0), noise_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w] cossim_tmp_all = F.cosine_similarity(noise_flat, eps_expanded, dim=-1) # [n_x, n_tiles] if cossim_mode == "forward": indices = cossim_tmp_all.argmax(dim=0) elif cossim_mode == "reverse": indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal": indices = torch.abs(cossim_tmp_all).argmin(dim=0) elif cossim_mode == "orthogonal_pos": positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) elif cossim_mode == "orthogonal_neg": negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "orthogonal_posneg": if step % 2 == 0: positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) else: negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "orthogonal_negpos": if step % 2 == 1: positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) else: negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "forward_reverse": if step % 2 == 0: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_forward": if step % 2 == 1: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal_reverse": if step % 2 == 0: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_orthogonal": if step % 2 == 1: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) else: target_value = float(cossim_mode) indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0) #else: # raise ValueError(f"Unknown cossim_mode: {cossim_mode}") x_tiled_stack = torch.stack([x_tiled[0] for x_tiled in x_tiled_list]) # [n_x, n_tiles, c, h, w] x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w] x_tiled_out = x_tiled_out.unsqueeze(0) # restore batch dim x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size) return x_detiled @torch.no_grad def noise_cossim_guide_eps_tiled(x_0, x_list, y0, noise_list, cossim_mode="forward", tile_size=2, step=0, sigma=None, rk_type=None): x_tiled_stack = torch.stack([ rearrange(x, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0] for x in x_list ]) # [n_x, n_tiles, c, h, w] eps_guide_stack = torch.stack([ rearrange(x - y0, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0] for x in x_list ]) # [n_x, n_tiles, c, h, w] del x_list noise_tiled_stack = torch.stack([ rearrange(noise, "b c (h t1) (w t2) -> b (t1 t2) c h w", t1=tile_size, t2=tile_size)[0] for noise in noise_list ]) # [n_x, n_tiles, c, h, w] del noise_list noise_flat = noise_tiled_stack.view(noise_tiled_stack.size(0), noise_tiled_stack.size(1), -1) # [n_x, n_tiles, c*h*w] eps_guide_flat = eps_guide_stack.view(eps_guide_stack.size(0), eps_guide_stack.size(1), -1) # [n_x, n_tiles, c*h*w] cossim_tmp_all = F.cosine_similarity(noise_flat, eps_guide_flat, dim=-1) # [n_x, n_tiles] del noise_tiled_stack, noise_flat, eps_guide_stack, eps_guide_flat if cossim_mode == "forward": indices = cossim_tmp_all.argmax(dim=0) elif cossim_mode == "reverse": indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal": indices = torch.abs(cossim_tmp_all).argmin(dim=0) elif cossim_mode == "orthogonal_pos": positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) elif cossim_mode == "orthogonal_neg": negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "orthogonal_posneg": if step % 2 == 0: positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) else: negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "orthogonal_negpos": if step % 2 == 1: positive_mask = cossim_tmp_all > 0 positive_tmp = torch.where(positive_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('inf'))) indices = positive_tmp.argmin(dim=0) else: negative_mask = cossim_tmp_all < 0 negative_tmp = torch.where(negative_mask, cossim_tmp_all, torch.full_like(cossim_tmp_all, float('-inf'))) indices = negative_tmp.argmax(dim=0) elif cossim_mode == "forward_reverse": if step % 2 == 0: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_forward": if step % 2 == 1: indices = cossim_tmp_all.argmax(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "orthogonal_reverse": if step % 2 == 0: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) elif cossim_mode == "reverse_orthogonal": if step % 2 == 1: indices = torch.abs(cossim_tmp_all).argmin(dim=0) else: indices = cossim_tmp_all.argmin(dim=0) else: target_value = float(cossim_mode) indices = torch.abs(cossim_tmp_all - target_value).argmin(dim=0) x_tiled_out = x_tiled_stack[indices, torch.arange(indices.size(0))] # [n_tiles, c, h, w] del x_tiled_stack x_tiled_out = x_tiled_out.unsqueeze(0) x_detiled = rearrange(x_tiled_out, "b (t1 t2) c h w -> b c (h t1) (w t2)", t1=tile_size, t2=tile_size) return x_detiled def get_collinear(x, y): y_flat = y.view(y.size(0), -1).clone() x_flat = x.view(x.size(0), -1).clone() y_flat /= y_flat.norm(dim=-1, keepdim=True) x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat return x_proj_y.view_as(x) def get_orthogonal(x, y): y_flat = y.view(y.size(0), -1).clone() x_flat = x.view(x.size(0), -1).clone() y_flat /= y_flat.norm(dim=-1, keepdim=True) x_proj_y = torch.sum(x_flat * y_flat, dim=-1, keepdim=True) * y_flat x_ortho_y = x_flat - x_proj_y return x_ortho_y.view_as(x) def get_orthogonal_noise_from_channelwise(*refs, max_iter=500, max_score=1e-15): noise, *refs = refs noise_tmp = noise.clone() #b,c,h,w = noise.shape if (noise.dim() == 4): b,ch,h,w = noise.shape elif (noise.dim() == 5): b,ch,t,h,w = noise.shape for i in range(max_iter): noise_tmp = gram_schmidt_channels_optimized(noise_tmp, *refs) cossim_scores = [] for ref in refs: #for c in range(noise.shape[-3]): for c in range(ch): cossim_scores.append(get_cosine_similarity(noise_tmp[0][c], ref[0][c]).abs()) cossim_scores.append(get_cosine_similarity(noise_tmp[0], ref[0]).abs()) if max(cossim_scores) < max_score: break return noise_tmp def gram_schmidt_channels_optimized(A, *refs): if (A.dim() == 4): b,c,h,w = A.shape elif (A.dim() == 5): b,c,t,h,w = A.shape A_flat = A.view(b, c, -1) for ref in refs: ref_flat = ref.view(b, c, -1).clone() ref_flat /= ref_flat.norm(dim=-1, keepdim=True) proj_coeff = torch.sum(A_flat * ref_flat, dim=-1, keepdim=True) projection = proj_coeff * ref_flat A_flat -= projection return A_flat.view_as(A) class NoiseStepHandlerOSDE: def __init__(self, x, eps=None, data=None, x_init=None, guide=None, guide_bkg=None): self.noise = None self.x = x self.eps = eps self.data = data self.x_init = x_init self.guide = guide self.guide_bkg = guide_bkg self.eps_list = None self.noise_cossim_map = { "eps_orthogonal": [self.noise, self.eps], "eps_data_orthogonal": [self.noise, self.eps, self.data], "data_orthogonal": [self.noise, self.data], "xinit_orthogonal": [self.noise, self.x_init], "x_orthogonal": [self.noise, self.x], "x_data_orthogonal": [self.noise, self.x, self.data], "x_eps_orthogonal": [self.noise, self.x, self.eps], "x_eps_data_orthogonal": [self.noise, self.x, self.eps, self.data], "x_eps_data_xinit_orthogonal": [self.noise, self.x, self.eps, self.data, self.x_init], "x_eps_guide_orthogonal": [self.noise, self.x, self.eps, self.guide], "x_eps_guide_bkg_orthogonal": [self.noise, self.x, self.eps, self.guide_bkg], "noise_orthogonal": [self.noise, self.x_init], "guide_orthogonal": [self.noise, self.guide], "guide_bkg_orthogonal": [self.noise, self.guide_bkg], } def check_cossim_source(self, source): return source in self.noise_cossim_map def get_ortho_noise(self, noise, prev_noises=None, max_iter=100, max_score=1e-7, NOISE_COSSIM_SOURCE="eps_orthogonal"): if NOISE_COSSIM_SOURCE not in self.noise_cossim_map: raise ValueError(f"Invalid NOISE_COSSIM_SOURCE: {NOISE_COSSIM_SOURCE}") self.noise_cossim_map[NOISE_COSSIM_SOURCE][0] = noise params = self.noise_cossim_map[NOISE_COSSIM_SOURCE] noise = get_orthogonal_noise_from_channelwise(*params, max_iter=max_iter, max_score=max_score) return noise def handle_tiled_etc_noise_steps(x_0, x, x_prenoise, x_init, eps, denoised, y0, y0_inv, step, rk_type, rk, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t, NOISE_COSSIM_SOURCE, NOISE_COSSIM_MODE, noise_cossim_tile_size, noise_cossim_iterations, extra_options): x_tmp, cossim_tmp, noise_tmp_list = [], [], [] if step > int(get_extra_options_kv("noise_cossim_end_step", "10000", extra_options)): NOISE_COSSIM_SOURCE = get_extra_options_kv("noise_cossim_takeover_source", "eps", extra_options) NOISE_COSSIM_MODE = get_extra_options_kv("noise_cossim_takeover_mode", "forward", extra_options) noise_cossim_tile_size = int(get_extra_options_kv("noise_cossim_takeover_tile", str(noise_cossim_tile_size), extra_options)) noise_cossim_iterations = int(get_extra_options_kv("noise_cossim_takeover_iterations", str(noise_cossim_iterations), extra_options)) for i in range(noise_cossim_iterations): x_tmp.append(rk.add_noise_post(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) )#y0, lgw, sigma_down are currently unused noise_tmp = x_tmp[i] - x if extra_options_flag("noise_noise_zscore_norm", extra_options): noise_tmp = (noise_tmp - noise_tmp.mean()) / noise_tmp.std() if extra_options_flag("noise_eps_zscore_norm", extra_options): eps = (eps - eps.mean()) / eps.std() if NOISE_COSSIM_SOURCE in ("eps_tiled", "guide_epsilon_tiled", "guide_bkg_epsilon_tiled", "iig_tiled"): noise_tmp_list.append(noise_tmp) if NOISE_COSSIM_SOURCE == "eps": cossim_tmp.append(get_cosine_similarity(eps, noise_tmp)) if NOISE_COSSIM_SOURCE == "eps_ch": cossim_total = torch.zeros_like(eps[0][0][0][0]) for ch in range(eps.shape[1]): cossim_total += get_cosine_similarity(eps[0][ch], noise_tmp[0][ch]) cossim_tmp.append(cossim_total) elif NOISE_COSSIM_SOURCE == "data": cossim_tmp.append(get_cosine_similarity(denoised, noise_tmp)) elif NOISE_COSSIM_SOURCE == "latent": cossim_tmp.append(get_cosine_similarity(x_prenoise, noise_tmp)) elif NOISE_COSSIM_SOURCE == "x_prenoise": cossim_tmp.append(get_cosine_similarity(x_prenoise, x_tmp[i])) elif NOISE_COSSIM_SOURCE == "x": cossim_tmp.append(get_cosine_similarity(x, x_tmp[i])) elif NOISE_COSSIM_SOURCE == "x_data": cossim_tmp.append(get_cosine_similarity(denoised, x_tmp[i])) elif NOISE_COSSIM_SOURCE == "x_init_vs_noise": cossim_tmp.append(get_cosine_similarity(x_init, noise_tmp)) elif NOISE_COSSIM_SOURCE == "mom": cossim_tmp.append(get_cosine_similarity(denoised, x + sigma_next*noise_tmp)) elif NOISE_COSSIM_SOURCE == "guide": cossim_tmp.append(get_cosine_similarity(y0, x_tmp[i])) elif NOISE_COSSIM_SOURCE == "guide_bkg": cossim_tmp.append(get_cosine_similarity(y0_inv, x_tmp[i])) if step < int(get_extra_options_kv("noise_cossim_start_step", "0", extra_options)): x = x_tmp[0] elif (NOISE_COSSIM_SOURCE == "eps_tiled"): x = noise_cossim_eps_tiled(x_tmp, eps, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step) elif (NOISE_COSSIM_SOURCE == "guide_epsilon_tiled"): x = noise_cossim_guide_eps_tiled(x_0, x_tmp, y0, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step, sigma=sigma, rk_type=rk_type) elif (NOISE_COSSIM_SOURCE == "guide_bkg_epsilon_tiled"): x = noise_cossim_guide_eps_tiled(x_0, x_tmp, y0_inv, noise_tmp_list, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step, sigma=sigma, rk_type=rk_type) elif (NOISE_COSSIM_SOURCE == "guide_tiled"): x = noise_cossim_guide_tiled(x_tmp, y0, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size, step=step) elif (NOISE_COSSIM_SOURCE == "guide_bkg_tiled"): x = noise_cossim_guide_tiled(x_tmp, y0_inv, cossim_mode=NOISE_COSSIM_MODE, tile_size=noise_cossim_tile_size) else: for i in range(len(x_tmp)): if (NOISE_COSSIM_MODE == "forward") and (cossim_tmp[i] == max(cossim_tmp)): x = x_tmp[i] break elif (NOISE_COSSIM_MODE == "reverse") and (cossim_tmp[i] == min(cossim_tmp)): x = x_tmp[i] break elif (NOISE_COSSIM_MODE == "orthogonal") and (abs(cossim_tmp[i]) == min(abs(val) for val in cossim_tmp)): x = x_tmp[i] break elif (NOISE_COSSIM_MODE != "forward") and (NOISE_COSSIM_MODE != "reverse") and (NOISE_COSSIM_MODE != "orthogonal"): x = x_tmp[0] break return x ================================================ FILE: legacy/rk_method.py ================================================ import torch import re import torch.nn.functional as F import torchvision.transforms as T from .noise_classes import * import comfy.model_patcher import comfy.supported_models import itertools from .rk_coefficients import * from .phi_functions import * class RK_Method: def __init__(self, model, name="", method="explicit", dynamic_method=False, device='cuda', dtype=torch.float64): self.model = model self.model_sampling = model.inner_model.inner_model.model_sampling self.device = device self.dtype = dtype self.method = method self.dynamic_method = dynamic_method self.stages = 0 self.name = name self.ab = None self.a = None self.b = None self.c = None self.denoised = None self.uncond = None self.rows = 0 self.cols = 0 self.y0 = None self.y0_inv = None self.sigma_min = model.inner_model.inner_model.model_sampling.sigma_min.to(dtype) self.sigma_max = model.inner_model.inner_model.model_sampling.sigma_max.to(dtype) self.noise_sampler = None self.h_prev = None self.h_prev2 = None self.multistep_stages = 0 self.cfg_cw = 1.0 @staticmethod def is_exponential(rk_type): #if rk_type.startswith(("res", "dpmpp", "ddim", "irk_exp_diag_2s" )): if rk_type.startswith(("res", "dpmpp", "ddim", "lawson", "genlawson")): return True else: return False @staticmethod def create(model, rk_type, device='cuda', dtype=torch.float64, name="", method="explicit"): if RK_Method.is_exponential(rk_type): return RK_Method_Exponential(model, name, method, device, dtype) else: return RK_Method_Linear(model, name, method, device, dtype) def __call__(self): raise NotImplementedError("This method got clownsharked!") def model_epsilon(self, x, sigma, **extra_args): s_in = x.new_ones([x.shape[0]]) denoised = self.model(x, sigma * s_in, **extra_args) denoised = self.calc_cfg_channelwise(denoised) #return x0 ###################################THIS WORKS ONLY WITH THE MODEL SAMPLING PATCH eps = (x - denoised) / (sigma * s_in).view(x.shape[0], 1, 1, 1) return eps, denoised def model_denoised(self, x, sigma, **extra_args): s_in = x.new_ones([x.shape[0]]) denoised = self.model(x, sigma * s_in, **extra_args) denoised = self.calc_cfg_channelwise(denoised) return denoised def init_noise_sampler(self, x, noise_seed, noise_sampler_type, alpha, k=1., scale=0.1): seed = torch.initial_seed()+1 if noise_seed == -1 else noise_seed if noise_sampler_type == "fractal": self.noise_sampler = NOISE_GENERATOR_CLASSES.get(noise_sampler_type)(x=x, seed=seed, sigma_min=self.sigma_min, sigma_max=self.sigma_max) self.noise_sampler.alpha = alpha self.noise_sampler.k = k self.noise_sampler.scale = scale else: self.noise_sampler = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_sampler_type)(x=x, seed=seed, sigma_min=self.sigma_min, sigma_max=self.sigma_max) def add_noise_pre(self, x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL=False, sde_noise_t=None): if isinstance(self.model_sampling, comfy.model_sampling.CONST) == False and noise_mode == "hard": return self.add_noise(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, SDE_NOISE_EXTERNAL, sde_noise_t) else: return x def add_noise_post(self, x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL=False, sde_noise_t=None): if isinstance(self.model_sampling, comfy.model_sampling.CONST) == True or (isinstance(self.model_sampling, comfy.model_sampling.CONST) == False and noise_mode != "hard"): return self.add_noise(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, SDE_NOISE_EXTERNAL, sde_noise_t) else: return x def add_noise(self, x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, SDE_NOISE_EXTERNAL, sde_noise_t): if sigma_next > 0.0: noise = self.noise_sampler(sigma=sigma, sigma_next=sigma_next) noise = torch.nan_to_num((noise - noise.mean()) / noise.std(), 0.0) if SDE_NOISE_EXTERNAL: noise = (1-s_noise) * noise + s_noise * sde_noise_t return alpha_ratio * x + noise * sigma_up * s_noise else: return x def set_coeff(self, rk_type, h, c1=0.0, c2=0.5, c3=1.0, stepcount=0, sigmas=None, sigma=None, sigma_down=None, extra_options=None): if rk_type == "default": return sigma = sigmas[stepcount] sigma_next = sigmas[stepcount+1] a, b, ci, multistep_stages, FSAL = get_rk_methods(rk_type, h, c1, c2, c3, self.h_prev, self.h_prev2, stepcount, sigmas, sigma, sigma_next, sigma_down, extra_options) self.multistep_stages = multistep_stages self.a = torch.tensor(a, dtype=h.dtype, device=h.device) self.a = self.a.view(*self.a.shape, 1, 1, 1, 1, 1) self.b = torch.tensor(b, dtype=h.dtype, device=h.device) self.b = self.b.view(*self.b.shape, 1, 1, 1, 1, 1) self.c = torch.tensor(ci, dtype=h.dtype, device=h.device) self.rows = self.a.shape[0] self.cols = self.a.shape[1] def a_k_sum(self, k, row): if len(k.shape) == 4: a_coeff = self.a[row].squeeze(-1) ks = k * a_coeff.sum(dim=0) elif len(k.shape) == 5: a_coeff = self.a[row].squeeze(-1) ks = (k[0:self.cols] * a_coeff).sum(dim=0) elif len(k.shape) == 6: a_coeff = self.a[row] ks = (k[0:self.cols] * a_coeff).sum(dim=0) else: raise ValueError(f"Unexpected k shape: {k.shape}") return ks def b_k_sum(self, k, row): if len(k.shape) == 4: b_coeff = self.b[row].squeeze(-1) ks = k * b_coeff.sum(dim=0) elif len(k.shape) == 5: b_coeff = self.b[row].squeeze(-1) ks = (k[0:self.cols] * b_coeff).sum(dim=0) elif len(k.shape) == 6: b_coeff = self.b[row] ks = (k[0:self.cols] * b_coeff).sum(dim=0) else: raise ValueError(f"Unexpected k shape: {k.shape}") return ks def init_cfg_channelwise(self, x, cfg_cw=1.0, **extra_args): self.uncond = [torch.full_like(x, 0.0)] self.cfg_cw = cfg_cw if cfg_cw != 1.0: def post_cfg_function(args): self.uncond[0] = args["uncond_denoised"] return args["denoised"] model_options = extra_args.get("model_options", {}).copy() extra_args["model_options"] = comfy.model_patcher.set_model_options_post_cfg_function(model_options, post_cfg_function, disable_cfg1_optimization=True) return extra_args def calc_cfg_channelwise(self, denoised): if self.cfg_cw != 1.0: avg = 0 for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])): avg += torch.norm(denoised[b][c] - self.uncond[0][b][c]) avg /= denoised.shape[1] for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])): ratio = torch.nan_to_num(torch.norm(denoised[b][c] - self.uncond[0][b][c]) / avg, 0) denoised_new = self.uncond[0] + ratio * self.cfg_cw * (denoised - self.uncond[0]) return denoised_new else: return denoised class RK_Method_Exponential(RK_Method): def __init__(self, model, name="", method="explicit", device='cuda', dtype=torch.float64): super().__init__(model, name, method, device, dtype) self.exponential = True self.eps_pred = True @staticmethod def alpha_fn(neg_h): return torch.exp(neg_h) @staticmethod def sigma_fn(t): return t.neg().exp() @staticmethod def t_fn(sigma): return sigma.log().neg() @staticmethod def h_fn(sigma_down, sigma): return -torch.log(sigma_down/sigma) def __call__(self, x_0, x, sigma, h, **extra_args): denoised = self.model_denoised(x, sigma, **extra_args) epsilon = denoised - x_0 """if self.uncond == None: self.uncond = [torch.zeros_like(x)] denoised_u = self.uncond[0].clone() if torch.all(denoised_u == 0): epsilon_u = [torch.zeros_like(x_0)] else: epsilon_u = denoised_u[0] - x_0""" if h is not None: self.h_prev2 = self.h_prev self.h_prev = h #print("MODEL SIGMA: ", round(float(sigma),3)) return epsilon, denoised def data_to_vel(self, x, data, sigma): return data - x def get_epsilon(self, x_0, x, y, sigma, sigma_cur, sigma_down=None, unsample_resample_scale=None, extra_options=None): if sigma_down > sigma: sigma_cur = self.sigma_max - sigma_cur.clone() sigma_cur = unsample_resample_scale if unsample_resample_scale is not None else sigma_cur if extra_options is not None: if re.search(r"\bpower_unsample\b", extra_options) or re.search(r"\bpower_resample\b", extra_options): if sigma_down is None: return y - x_0 else: if sigma_down > sigma: return (x_0 - y) * sigma_cur else: return (y - x_0) * sigma_cur else: if sigma_down is None: return (y - x_0) / sigma_cur else: if sigma_down > sigma: return (x_0 - y) / sigma_cur else: return (y - x_0) / sigma_cur class RK_Method_Linear(RK_Method): def __init__(self, model, name="", method="explicit", device='cuda', dtype=torch.float64): super().__init__(model, name, method, device, dtype) self.expanential = False self.eps_pred = True @staticmethod def alpha_fn(neg_h): return torch.ones_like(neg_h) @staticmethod def sigma_fn(t): return t @staticmethod def t_fn(sigma): return sigma @staticmethod def h_fn(sigma_down, sigma): return sigma_down - sigma def __call__(self, x_0, x, sigma, h, **extra_args): #s_in = x.new_ones([x.shape[0]]) epsilon, denoised = self.model_epsilon(x, sigma, **extra_args) """if self.uncond == None: self.uncond = [torch.zeros_like(x)] denoised_u = self.uncond[0].clone() if torch.all(denoised_u[0] == 0): epsilon_u = [torch.zeros_like(x_0)] else: epsilon_u = (x_0 - denoised_u[0]) / (sigma * s_in).view(x.shape[0], 1, 1, 1)""" if h is not None: self.h_prev2 = self.h_prev self.h_prev = h #print("MODEL SIGMA: ", round(float(sigma),3)) return epsilon, denoised def data_to_vel(self, x, data, sigma): return (data - x) / sigma def get_epsilon(self, x_0, x, y, sigma, sigma_cur, sigma_down=None, unsample_resample_scale=None, extra_options=None): if sigma_down > sigma: sigma_cur = self.sigma_max - sigma_cur.clone() sigma_cur = unsample_resample_scale if unsample_resample_scale is not None else sigma_cur if sigma_down is None: return (x - y) / sigma_cur else: if sigma_down > sigma: return (y - x) / sigma_cur else: return (x - y) / sigma_cur ================================================ FILE: legacy/rk_sampler.py ================================================ import torch import torch.nn.functional as F from tqdm.auto import trange from .noise_classes import * from .noise_sigmas_timesteps_scaling import get_res4lyf_step_with_model, get_res4lyf_half_step3 from .rk_method import RK_Method from .rk_guide_func import * from .latents import normalize_latent, initialize_or_scale, latent_normalize_channels from .helper import get_extra_options_kv, extra_options_flag, get_cosine_similarity, is_RF_model from .sigmas import get_sigmas PRINT_DEBUG=False def prepare_sigmas(model, sigmas): if sigmas[0] == 0.0: #remove padding used to prevent comfy from adding noise to the latent (for unsampling, etc.) UNSAMPLE = True sigmas = sigmas[1:-1] else: UNSAMPLE = False if hasattr(model, "sigmas"): model.sigmas = sigmas return sigmas, UNSAMPLE def prepare_step_to_sigma_zero(rk, irk, rk_type, irk_type, model, x, extra_options, alpha, k, noise_sampler_type, cfg_cw=1.0, **extra_args): rk_type_final_step = f"ralston_{rk_type[-2:]}" if rk_type[-2:] in {"2s", "3s"} else "ralston_3s" rk_type_final_step = f"deis_2m" if rk_type[-2:] in {"2m", "3m", "4m"} else rk_type_final_step rk_type_final_step = f"buehler" if rk_type in {"ddim"} else rk_type_final_step rk_type_final_step = get_extra_options_kv("rk_type_final_step", rk_type_final_step, extra_options) rk = RK_Method.create(model, rk_type_final_step, x.device) rk.init_noise_sampler(x, torch.initial_seed() + 1, noise_sampler_type, alpha=alpha, k=k) extra_args = rk.init_cfg_channelwise(x, cfg_cw, **extra_args) if any(element >= 1 for element in irk.c): irk_type_final_step = f"gauss-legendre_{rk_type[-2:]}" if rk_type[-2:] in {"2s", "3s", "4s", "5s"} else "gauss-legendre_2s" irk_type_final_step = f"deis_2m" if rk_type[-2:] in {"2m", "3m", "4m"} else irk_type_final_step irk_type_final_step = get_extra_options_kv("irk_type_final_step", irk_type_final_step, extra_options) irk = RK_Method.create(model, irk_type_final_step, x.device) irk.init_noise_sampler(x, torch.initial_seed() + 100, noise_sampler_type, alpha=alpha, k=k) extra_args = irk.init_cfg_channelwise(x, cfg_cw, **extra_args) else: irk_type_final_step = irk_type eta, eta_var = 0, 0 return rk, irk, rk_type_final_step, irk_type_final_step, eta, eta_var, extra_args @torch.no_grad() def sample_rk(model, x, sigmas, extra_args=None, callback=None, disable=None, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_2m", implicit_sampler_name="explicit_full", sigma_fn_formula="", t_fn_formula="", eta=0.0, eta_var=0.0, s_noise=1., d_noise=1., alpha=-1.0, k=1.0, scale=0.1, c1=0.0, c2=0.5, c3=1.0, implicit_steps=0, reverse_weight=0.0, latent_guide=None, latent_guide_inv=None, latent_guide_weight=0.0, latent_guide_weight_inv=0.0, latent_guide_weights=None, latent_guide_weights_inv=None, guide_mode="", GARBAGE_COLLECT=False, mask=None, mask_inv=None, LGW_MASK_RESCALE_MIN=True, sigmas_override=None, unsample_resample_scales=None,regional_conditioning_weights=None, sde_noise=[], extra_options="", etas=None, s_noises=None, momentums=None, guides=None, cfgpp=0.0, cfg_cw = 1.0,regional_conditioning_floors=None, frame_weights_grp=None, eta_substep=0.0, noise_mode_sde_substep="hard", guide_cossim_cutoff_=1.0, guide_bkg_cossim_cutoff_=1.0, ): extra_args = {} if extra_args is None else extra_args noise_cossim_iterations = int(get_extra_options_kv("noise_cossim_iterations", "1", extra_options)) noise_substep_cossim_iterations = int(get_extra_options_kv("noise_substep_cossim_iterations", "1", extra_options)) NOISE_COSSIM_MODE = get_extra_options_kv("noise_cossim_mode", "orthogonal", extra_options) NOISE_COSSIM_SOURCE = get_extra_options_kv("noise_cossim_source", "x_eps_data_xinit_orthogonal", extra_options) NOISE_SUBSTEP_COSSIM_MODE = get_extra_options_kv("noise_substep_cossim_mode", "orthogonal", extra_options) NOISE_SUBSTEP_COSSIM_SOURCE = get_extra_options_kv("noise_substep_cossim_source", "x_eps_data_xinit_orthogonal", extra_options) SUBSTEP_SKIP_LAST = get_extra_options_kv("substep_skip_last", "false", extra_options) == "true" noise_cossim_tile_size = int(get_extra_options_kv("noise_cossim_tile", "2", extra_options)) noise_substep_cossim_tile_size = int(get_extra_options_kv("noise_substep_cossim_tile", "2", extra_options)) substep_eta = float(get_extra_options_kv("substep_eta", str(eta_substep), extra_options)) substep_noise_scaling = float(get_extra_options_kv("substep_noise_scaling", "0.0", extra_options)) substep_noise_mode = get_extra_options_kv("substep_noise_mode", noise_mode_sde_substep, extra_options) substep_eta_start_step = int(get_extra_options_kv("substep_noise_start_step", "-1", extra_options)) substep_eta_final_step = int(get_extra_options_kv("substep_noise_final_step", "-1", extra_options)) noise_substep_cossim_max_iter = int(get_extra_options_kv("noise_substep_cossim_max_iter", "5", extra_options)) noise_cossim_max_iter = int(get_extra_options_kv("noise_cossim_max_iter", "5", extra_options)) noise_substep_cossim_max_score = float(get_extra_options_kv("noise_substep_cossim_max_score", "1e-7", extra_options)) noise_cossim_max_score = float(get_extra_options_kv("noise_cossim_max_score", "1e-7", extra_options)) c1 = c1_ = float(get_extra_options_kv("c1", str(c1), extra_options)) c2 = c2_ = float(get_extra_options_kv("c2", str(c2), extra_options)) c3 = c3_ = float(get_extra_options_kv("c3", str(c3), extra_options)) guide_skip_steps = int(get_extra_options_kv("guide_skip_steps", 0, extra_options)) cfg_cw = float(get_extra_options_kv("cfg_cw", str(cfg_cw), extra_options)) MODEL_SAMPLING = model.inner_model.inner_model.model_sampling s_in, s_one = x.new_ones([x.shape[0]]), x.new_ones([1]) default_dtype = getattr(torch, get_extra_options_kv("default_dtype", "float64", extra_options), torch.float64) max_steps=10000 if sigmas_override is not None: sigmas = sigmas_override.clone() sigmas = sigmas.clone() * d_noise sigmas, UNSAMPLE = prepare_sigmas(model, sigmas) SDE_NOISE_EXTERNAL = False if sde_noise is not None: if len(sde_noise) > 0 and sigmas[1] > sigmas[2]: SDE_NOISE_EXTERNAL = True sigma_up_total = torch.zeros_like(sigmas[0]) for i in range(len(sde_noise)-1): sigma_up_total += sigmas[i+1] eta = eta / sigma_up_total irk_type = implicit_sampler_name if implicit_sampler_name in ("explicit_full", "explicit_diagonal", "none"): irk_type = rk_type rk_type = "buehler" if implicit_steps > 0 and implicit_sampler_name == "explicit_full" else rk_type rk_type = get_extra_options_kv("rk_type", rk_type, extra_options) print("rk_type: ", rk_type) rk = RK_Method.create(model, rk_type, x.device) irk = RK_Method.create(model, irk_type, x.device) extra_args = irk.init_cfg_channelwise(x, cfg_cw, **extra_args) extra_args = rk.init_cfg_channelwise(x, cfg_cw, **extra_args) rk. init_noise_sampler(x, noise_seed, noise_sampler_type, alpha=alpha, k=k) irk.init_noise_sampler(x, noise_seed+100, noise_sampler_type, alpha=alpha, k=k) frame_weights, frame_weights_inv = None, None if frame_weights_grp is not None and frame_weights_grp[0] is not None: frame_weights = initialize_or_scale(frame_weights_grp[0], 1.0, max_steps).to(default_dtype) frame_weights = F.pad(frame_weights, (0, max_steps), value=0.0) if frame_weights_grp is not None and frame_weights_grp[1] is not None: frame_weights_inv = initialize_or_scale(frame_weights_grp[1], 1.0, max_steps).to(default_dtype) frame_weights_inv = F.pad(frame_weights_inv, (0, max_steps), value=0.0) frame_weights_grp = (frame_weights, frame_weights_inv) LG = LatentGuide(guides, x, model, sigmas, UNSAMPLE, LGW_MASK_RESCALE_MIN, extra_options) x = LG.init_guides(x, rk.noise_sampler) y0, y0_inv = LG.y0, LG.y0_inv lgw, lgw_inv = LG.lgw, LG.lgw_inv guide_mode = LG.guide_mode denoised, denoised_prev, eps, eps_prev = [torch.zeros_like(x) for _ in range(4)] prev_noises = [] x_init = x.clone() for step in trange(len(sigmas)-1, disable=disable): sigma, sigma_next = sigmas[step], sigmas[step+1] unsample_resample_scale = float(unsample_resample_scales[step]) if unsample_resample_scales is not None else None if regional_conditioning_weights is not None: extra_args['model_options']['transformer_options']['regional_conditioning_weight'] = regional_conditioning_weights[step] extra_args['model_options']['transformer_options']['regional_conditioning_floor'] = regional_conditioning_floors [step] else: extra_args['model_options']['transformer_options']['regional_conditioning_weight'] = 0.0 extra_args['model_options']['transformer_options']['regional_conditioning_floor'] = 0.0 eta = eta_var = etas[step] if etas is not None else eta s_noise = s_noises[step] if s_noises is not None else s_noise if sigma_next == 0: rk, irk, rk_type, irk_type, eta, eta_var, extra_args = prepare_step_to_sigma_zero(rk, irk, rk_type, irk_type, model, x, extra_options, alpha, k, noise_sampler_type, cfg_cw=cfg_cw, **extra_args) sigma_up, sigma, sigma_down, alpha_ratio = get_res4lyf_step_with_model(model, sigma, sigma_next, eta, noise_mode) h = rk.h_fn(sigma_down, sigma) h_irk = irk.h_fn(sigma_down, sigma) c2, c3 = get_res4lyf_half_step3(sigma, sigma_down, c2_, c3_, t_fn=rk.t_fn, sigma_fn=rk.sigma_fn, t_fn_formula=t_fn_formula, sigma_fn_formula=sigma_fn_formula) rk. set_coeff(rk_type, h, c1, c2, c3, step, sigmas, sigma, sigma_down, extra_options) irk.set_coeff(irk_type, h_irk, c1, c2, c3, step, sigmas, sigma, sigma_down, extra_options) s_ = [( rk.sigma_fn( rk.t_fn(sigma) + h*c_)) * s_one for c_ in rk.c] s_irk_rk = [( rk.sigma_fn( rk.t_fn(sigma) + h*c_)) * s_one for c_ in irk.c] s_irk = [( irk.sigma_fn(irk.t_fn(sigma) + h_irk*c_)) * s_one for c_ in irk.c] if step == 0 or step == guide_skip_steps: x_, data_, data_u, eps_ = (torch.zeros(max(rk.rows, irk.rows) + 2, *x.shape, dtype=x.dtype, device=x.device) for step in range(4)) sde_noise_t = None if SDE_NOISE_EXTERNAL: if step >= len(sde_noise): SDE_NOISE_EXTERNAL=False else: sde_noise_t = sde_noise[step] x_prenoise = x.clone() x_[0] = x if sigma_up > 0: x_[0] = rk.add_noise_pre(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) #y0, lgw, sigma_down are currently unused x_0 = x_[0].clone() for ms in range(rk.multistep_stages): if RK_Method.is_exponential(rk_type): eps_ [rk.multistep_stages - ms] = -(x_0 - data_ [rk.multistep_stages - ms]) else: eps_ [rk.multistep_stages - ms] = (x_0 - data_ [rk.multistep_stages - ms]) / sigma if implicit_steps == 0 or implicit_sampler_name == "explicit_diagonal": for row in range(rk.rows - rk.multistep_stages): for exim_iter in range(implicit_steps+1): sub_sigma_up, sub_sigma, sub_sigma_next, sub_sigma_down, sub_alpha_ratio = 0, s_[row], s_[row+1], s_[row+1], 1 if (substep_eta_final_step < 0 and step == len(sigmas)-1+substep_eta_final_step) or (substep_eta_final_step > 0 and step > substep_eta_final_step): sub_sigma_up, sub_sigma, sub_sigma_down, sub_alpha_ratio = 0, s_[row], s_[row+1], 1 edsef=1 if extra_options_flag("explicit_diagonal_eta_substep_factors", extra_options): #value_str = get_extra_options_list("explicit_diagonal_eta_substep_factors", "", extra_options) #float_list = [float(item.strip()) for item in value_str.split(',') if item.strip()] float_list = get_extra_options_list("explicit_diagonal_eta_substep_factors", "", extra_options, ret_type=float) edsef = float_list[exim_iter] nsef = 1 if extra_options_flag("noise_eta_substep_factors", extra_options): #value_str = get_extra_options_list("noise_eta_substep_factors", "", extra_options) #nsef_list = [float(item.strip()) for item in value_str.split(',') if item.strip()] nsef_list = get_extra_options_list("noise_eta_substep_factors", "", extra_options, ret_type=float) nsef = nsef_list[row] if exim_iter > 0 and rk_type.endswith("m") and step >= int(rk_type[-2]): sub_sigma_up, sub_sigma, sub_sigma_down, sub_alpha_ratio = get_res4lyf_step_with_model(model, sigma, sigma_next, substep_eta*edsef*nsef, substep_noise_mode) sub_sigma_next = sigma_next if (row > 0 and not extra_options_flag("disable_rough_noise", extra_options)): # and s_[row-1] >= s_[row]: sub_sigma_up, sub_sigma, sub_sigma_down, sub_alpha_ratio = get_res4lyf_step_with_model(model, s_[row-1], s_[row], substep_eta*edsef*nsef, substep_noise_mode) sub_sigma_next = s_[row] if row > 0 and substep_eta*edsef*nsef > 0 and row < rk.rows and ((SUBSTEP_SKIP_LAST == False) or (row < rk.rows - rk.multistep_stages - 1)) and (sub_sigma_down > 0) and sigma_next > 0: substep_noise_scaling_ratio = s_[row+1]/sub_sigma_down eps_[row-1] *= 1 + substep_noise_scaling*(substep_noise_scaling_ratio-1) h_new = h.clone() if (rk_type.endswith("m") and step >= int(rk_type[-2]) and sub_sigma_up > 0) or (row > 0 and sub_sigma_up > 0): if extra_options_flag("substep_eta_c_row_plus_one", extra_options): h_new = (rk.h_fn(sub_sigma_down, sigma) / rk.c[row+1])[0] else: if exim_iter > 0 and rk_type.endswith("m") and step >= int(rk_type[-2]): c_val = -rk.h_prev/h h_new = (rk.h_fn(sub_sigma_down, sigma)) / c_val else: h_new = (rk.h_fn(sub_sigma_down, sigma) / rk.c[row])[0] #used to be rk.c[row+1] s_new_ = [( rk.sigma_fn( rk.t_fn(sigma) + h_new*c_)) * s_one for c_ in rk.c] """print("step, row: ", step, row) print("h, h_new: ", h.item(), h_new.item()) print("s_: ", s_) print("s_new_: ", s_new_) print("sub_sigma_up, sub_sigma, sub_sigma_next, sub_sigma_down, sub_alpha_ratio: ", sub_sigma_up.item(), sub_sigma.item(), sub_sigma_next.item(), sub_sigma_down.item(), sub_alpha_ratio.item())""" # UPDATE #print("UPDATE: step,row,h_new: ", step, row, h_new.item()) x_[row+1] = x_0 + h_new * rk.a_k_sum(eps_, row) if row > 0: if PRINT_DEBUG: print("A: step,row,h,h_new: \n", step, row, round(float(h.item()),3), round(float(h_new.item()),3)) #print("step, row, exim_iter: ", step, row, exim_iter) # NOISE ADD if is_RF_model(model) == True or (is_RF_model(model) == False and noise_mode != "hard"): if (exim_iter < implicit_steps and sub_sigma_up > 0) or ((row > 0) and (sub_sigma_up > 0) and ((SUBSTEP_SKIP_LAST == False) or (row < rk.rows - rk.multistep_stages - 1))): if PRINT_DEBUG: print("A: sub_sigma_up, sub_sigma, sub_sigma_next, sub_sigma_down, sub_alpha_ratio: \n", round(float(sub_sigma_up),3), round(float(sub_sigma),3), round(float(sub_sigma_next),3), round(float(sub_sigma_down),3), round(float(sub_alpha_ratio),3)) data_tmp = denoised_prev if data_[row-1].sum() == 0 else data_[row-1] eps_tmp = eps_prev if eps_[row-1].sum() == 0 else eps_ [row-1] Osde = NoiseStepHandlerOSDE(x_[row+1], eps_tmp, data_tmp, x_init, y0, y0_inv) if Osde.check_cossim_source(NOISE_SUBSTEP_COSSIM_SOURCE): noise = rk.noise_sampler(sigma=sub_sigma, sigma_next=sub_sigma_next) noise_osde = Osde.get_ortho_noise(noise, prev_noises, max_iter=noise_substep_cossim_max_iter, max_score=noise_substep_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_SUBSTEP_COSSIM_SOURCE) x_[row+1] = sub_alpha_ratio * x_[row+1] + sub_sigma_up * noise_osde * s_noise elif extra_options_flag("noise_substep_cossim", extra_options): x_[row+1] = handle_tiled_etc_noise_steps(x_0, x_[row+1], x_prenoise, x_init, eps_tmp, data_tmp, y0, y0_inv, row, rk_type, rk, sub_sigma_up, s_[row-1], s_[row], sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t, NOISE_SUBSTEP_COSSIM_SOURCE, NOISE_SUBSTEP_COSSIM_MODE, noise_substep_cossim_tile_size, noise_substep_cossim_iterations, extra_options) else: x_[row+1] = rk.add_noise_post(x_[row+1], sub_sigma_up, sub_sigma, sub_sigma_next, sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) # MODEL CALL if step < guide_skip_steps: eps_row, eps_row_inv = get_guide_epsilon_substep(x_0, x_, y0, y0_inv, s_, row, rk_type) eps_[row] = LG.mask * eps_row + (1-LG.mask) * eps_row_inv else: if implicit_steps == 0 or row > 0 or (row == 0 and not extra_options_flag("explicit_diagonal_implicit_predictor", extra_options)): eps_[row], data_[row] = rk(x_0, x_[row+1], s_[row], h, **extra_args) #print("exim: ", step, row, exim_iter) else: if extra_options_flag("explicit_diagonal_implicit_predictor_disable_noise", extra_options): sub_sigma_up, sub_sigma_down, sub_alpha_ratio = sub_sigma_up*0, sub_sigma_next, sub_alpha_ratio/sub_alpha_ratio eps_[row], data_[row] = rk(x_0, x_[row+1], s_[row], h, **extra_args) eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_, unsample_resample_scale, rk, rk_type, extra_options, frame_weights_grp) h_mini = rk.h_fn(sub_sigma_down, sub_sigma) x_[row+1] = x_0 + h_mini * eps_[row] Osde = NoiseStepHandlerOSDE(x_[row+1], eps_[row], data_[row], x_init, y0, y0_inv) if Osde.check_cossim_source(NOISE_SUBSTEP_COSSIM_SOURCE): noise = rk.noise_sampler(sigma=sub_sigma, sigma_next=sub_sigma_next) noise_osde = Osde.get_ortho_noise(noise, prev_noises, max_iter=noise_substep_cossim_max_iter, max_score=noise_substep_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_SUBSTEP_COSSIM_SOURCE) x_[row+1] = sub_alpha_ratio * x_[row+1] + sub_sigma_up * noise_osde * s_noise else: x_[row+1] = rk.add_noise_post(x_[row+1], sub_sigma_up, sub_sigma, sub_sigma_next, sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) for inner_exim_iter in range(implicit_steps): # implicit buehler update to find Yn+1 #print("inner_exim: ", step, row, inner_exim_iter) eps_[row], data_[row] = rk(x_0, x_[row+1], s_[row+1], h, **extra_args) eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_, unsample_resample_scale, rk, rk_type, extra_options, frame_weights_grp) x_[row+1] = x_0 + h_mini * eps_[row] Osde = NoiseStepHandlerOSDE(x_[row+1], eps_[row], data_[row], x_init, y0, y0_inv) if Osde.check_cossim_source(NOISE_SUBSTEP_COSSIM_SOURCE): noise = rk.noise_sampler(sigma=sub_sigma, sigma_next=sub_sigma_next) noise_osde = Osde.get_ortho_noise(noise, prev_noises, max_iter=noise_substep_cossim_max_iter, max_score=noise_substep_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_SUBSTEP_COSSIM_SOURCE) x_[row+1] = sub_alpha_ratio * x_[row+1] + sub_sigma_up * noise_osde * s_noise else: x_[row+1] = rk.add_noise_post(x_[row+1], sub_sigma_up, sub_sigma, sub_sigma_next, sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) if extra_options_flag("rk_linear_straight", extra_options): eps_[row] = (x_0 - data_[row]) / sigma if sub_sigma_up > 0 and not RK_Method.is_exponential(rk_type): eps_[row] = (x_0 - data_[row]) / sigma # GUIDES eps_row_tmp, x_row_tmp = eps_[row].clone(), x_[row+1].clone() eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_, unsample_resample_scale, rk, rk_type, extra_options, frame_weights_grp) if extra_options_flag("explicit_diagonal_eps_proj_factors", extra_options): #value_str = get_extra_options_list("explicit_diagonal_eps_proj_factors", "", extra_options) #float_list = [float(item.strip()) for item in value_str.split(',') if item.strip()] value_str = get_extra_options_list("explicit_diagonal_eps_proj_factors", "", extra_options, ret_type=float) eps_[row] = (float_list[exim_iter]) * eps_[row] + (1-float_list[exim_iter]) * eps_row_tmp x_[row+1] = (float_list[exim_iter]) * x_[row+1] + (1-float_list[exim_iter]) * x_row_tmp if row > 0 and exim_iter <= implicit_steps and implicit_steps > 0: eps_[row-1] = eps_[row] if implicit_steps > 0 and row == 0: break if PRINT_DEBUG: print("B: step,h,h_new: \n", step, round(float(h.item()),3), round(float(h_new.item()),3)) print("B: sub_sigma_up, sub_sigma, sub_sigma_next, sub_sigma_down, sub_alpha_ratio: \n", round(float(sub_sigma_up),3), round(float(sub_sigma),3), round(float(sub_sigma_next),3), round(float(sub_sigma_down),3), round(float(sub_alpha_ratio),3)) x = x_0 + h * rk.b_k_sum(eps_, 0) denoised = x_0 + ((sigma / (sigma - sigma_down)) * h) * rk.b_k_sum(eps_, 0) eps = x - denoised x = LG.process_guides_poststep(x, denoised, eps, step, extra_options) # DIAGONALLY IMPLICIT elif implicit_sampler_name=="explicit_diagonal_alt" or any(irk_type.startswith(prefix) for prefix in {"crouzeix", "irk_exp_diag", "pareschi_russo", "kraaijevanger_spijker", "qin_zhang",}): s_irk = [torch.full_like(s_irk[0], sigma.item())] + s_irk for row in range(irk.rows - irk.multistep_stages): sub_sigma_up, sub_sigma, sub_sigma_next, sub_sigma_down, sub_alpha_ratio = 0.0, s_irk[row], s_irk[row+1], s_irk[row+1], 1.0 if irk.c[row] > 0: sub_sigma_up, sub_sigma, sub_sigma_down, sub_alpha_ratio = get_res4lyf_step_with_model(model, s_irk[row], s_irk[row+1], substep_eta, substep_noise_mode) if not extra_options_flag("diagonal_implicit_skip_initial", extra_options): # MODEL CALL eps_[row], data_[row] = irk(x_0, x_[row], s_irk[row], h_irk, **extra_args) # GUIDES eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_irk, unsample_resample_scale, irk, irk_type, extra_options, frame_weights_grp) for diag_iter in range(implicit_steps): h_new_irk = h.clone() if irk.c[row] > 0: h_new_irk = (irk.h_fn(sub_sigma_down, sigma) / irk.c[row])[0] # UPDATE x_[row+1] = x_0 + h_new_irk * irk.a_k_sum(eps_, row) # NOISE ADD if is_RF_model(model) == True or (is_RF_model(model) == False and noise_mode != "hard"): if (row > 0) and (sub_sigma_up > 0) and ((SUBSTEP_SKIP_LAST == False) or (row < irk.rows - irk.multistep_stages - 1)): data_tmp = denoised_prev if data_[row-1].sum() == 0 else data_[row-1] eps_tmp = eps_prev if eps_[row-1].sum() == 0 else eps_ [row-1] Osde = NoiseStepHandlerOSDE(x_[row+1], eps_tmp, data_tmp, x_init, y0, y0_inv) if Osde.check_cossim_source(NOISE_SUBSTEP_COSSIM_SOURCE): noise = irk.noise_sampler(sigma=sub_sigma, sigma_next=sub_sigma_next) noise_osde = Osde.get_ortho_noise(noise, prev_noises, max_iter=noise_substep_cossim_max_iter, max_score=noise_substep_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_SUBSTEP_COSSIM_SOURCE) x_[row+1] = sub_alpha_ratio * x_[row+1] + sub_sigma_up * noise_osde * s_noise elif extra_options_flag("noise_substep_cossim", extra_options): x_[row+1] = handle_tiled_etc_noise_steps(x_0, x_[row+1], x_prenoise, x_init, eps_tmp, data_tmp, y0, y0_inv, row, irk_type, irk, sub_sigma_up, s_irk[row-1], s_irk[row], sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t, NOISE_SUBSTEP_COSSIM_SOURCE, NOISE_SUBSTEP_COSSIM_MODE, noise_substep_cossim_tile_size, noise_substep_cossim_iterations, extra_options) else: x_[row+1] = irk.add_noise_post(x_[row+1], sub_sigma_up, sub_sigma, sub_sigma_next, sub_alpha_ratio, s_noise, substep_noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) # MODEL CALL eps_[row], data_[row] = irk(x_0, x_[row+1], s_irk[row+1], h_irk, **extra_args) if sub_sigma_up > 0 and not RK_Method.is_exponential(irk_type): eps_[row] = (x_0 - data_[row]) / sigma # GUIDES eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_irk, unsample_resample_scale, irk, irk_type, extra_options, frame_weights_grp) x = x_0 + h_irk * irk.b_k_sum(eps_, 0) denoised = x_0 + (sigma / (sigma - sigma_down)) * h_irk * irk.b_k_sum(eps_, 0) eps = x - denoised x = LG.process_guides_poststep(x, denoised, eps, step, extra_options) # FULLY IMPLICIT else: s2 = s_irk_rk[:] s2.append(sigma.unsqueeze(dim=0)) s_all = torch.sort(torch.stack(s2, dim=0).squeeze(dim=1).unique(), descending=True)[0] sigmas_and = torch.cat( (sigmas[0:step], s_all), dim=0) data_[0].zero_() eps_ [0].zero_() eps_list = [] if extra_options_flag("fast_implicit_guess", extra_options): if denoised.sum() == 0: if extra_options_flag("fast_implicit_guess_use_guide", extra_options): data_s = y0 eps_s = x_0 - data_s else: eps_s, data_s = rk(x_0, x_0, sigma, h, **extra_args) else: eps_s, data_s = eps, denoised for i in range(len(s_all)-1): eps_list.append(eps_s * s_all[i]/sigma) if torch.allclose(s_all[-1], sigma_down, atol=1e-8): eps_list.append(eps_s * sigma_down/sigma) else: # EXPLICIT GUESS x_mid = x for i in range(len(s_all)-1): x_mid, eps_, data_ = get_explicit_rk_step(rk, rk_type, x_mid, LG, step, s_all[i], s_all[i+1], eta, eta_var, s_noise, noise_mode, c2, c3, step+i, sigmas_and, x_, eps_, data_, unsample_resample_scale, extra_options, frame_weights_grp, x_init, x_prenoise, NOISE_COSSIM_SOURCE, NOISE_COSSIM_MODE, noise_cossim_max_iter, noise_cossim_max_score, noise_cossim_tile_size, noise_cossim_iterations,SDE_NOISE_EXTERNAL,sde_noise_t,MODEL_SAMPLING, **extra_args) eps_list.append(eps_[0]) data_[0].zero_() eps_ [0].zero_() if torch.allclose(s_all[-1], sigma_down, atol=1e-8): eps_down, data_down = rk(x_0, x_mid, sigma_down, h, **extra_args) #should h_irk = h? going to change it for now. eps_list.append(eps_down) s_all = [s for s in s_all if s in s_irk_rk] eps_list = [eps_list[s_all.index(s)].clone() for s in s_irk_rk] eps2_ = torch.stack(eps_list, dim=0) # FULLY IMPLICIT LOOP for implicit_iter in range(implicit_steps): for row in range(irk.rows): x_[row+1] = x_0 + h_irk * irk.a_k_sum(eps2_, row) eps2_[row], data_[row] = irk(x_0, x_[row+1], s_irk[row], h_irk, **extra_args) if not extra_options_flag("implicit_loop_skip_guide", extra_options): eps2_, x_ = LG.process_guides_substep(x_0, x_, eps2_, data_, row, step, sigma, sigma_next, sigma_down, s_irk, unsample_resample_scale, irk, irk_type, extra_options, frame_weights_grp) x = x_0 + h_irk * irk.b_k_sum(eps2_, 0) denoised = x_0 + (sigma / (sigma - sigma_down)) * h_irk * irk.b_k_sum(eps2_, 0) eps = x - denoised x = LG.process_guides_poststep(x, denoised, eps, step, extra_options) preview_callback(x, eps, denoised, x_, eps_, data_, step, sigma, sigma_next, callback, extra_options) sde_noise_t = None if SDE_NOISE_EXTERNAL: if step >= len(sde_noise): SDE_NOISE_EXTERNAL=False else: sde_noise_t = sde_noise[step] if is_RF_model(model) == True or (is_RF_model(model) == False and noise_mode != "hard"): if sigma_up > 0: #print("NOISE_FULL: sigma_up, sigma, sigma_next, sigma_down, alpha_ratio: ", sigma_up.item(), sigma.item(), sigma_next.item(), sigma_down.item(), alpha_ratio.item()) if implicit_steps==0: rk_or_irk = rk rk_or_irk_type = rk_type else: rk_or_irk = irk rk_or_irk_type = irk_type Osde = NoiseStepHandlerOSDE(x, eps, denoised, x_init, y0, y0_inv) if Osde.check_cossim_source(NOISE_COSSIM_SOURCE): noise = rk_or_irk.noise_sampler(sigma=sigma, sigma_next=sigma_next) noise_osde = Osde.get_ortho_noise(noise, prev_noises, max_iter=noise_cossim_max_iter, max_score=noise_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_COSSIM_SOURCE) x = alpha_ratio * x + sigma_up * noise_osde * s_noise elif extra_options_flag("noise_cossim", extra_options): x = handle_tiled_etc_noise_steps(x_0, x, x_prenoise, x_init, eps, denoised, y0, y0_inv, step, rk_or_irk_type, rk_or_irk, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t, NOISE_COSSIM_SOURCE, NOISE_COSSIM_MODE, noise_cossim_tile_size, noise_cossim_iterations, extra_options) else: x = rk_or_irk.add_noise_post(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) if PRINT_DEBUG: print("Data vs. y0 cossim score: ", get_cosine_similarity(data_[0], y0).item()) for ms in range(rk.multistep_stages): if RK_Method.is_exponential(rk_type): eps_[rk.multistep_stages - ms] = data_[rk.multistep_stages - ms - 1] - x else: eps_[rk.multistep_stages - ms] = (x - data_[rk.multistep_stages - ms - 1]) / sigma #eps_ [rk.multistep_stages - ms] = eps_ [rk.multistep_stages - ms - 1] data_[rk.multistep_stages - ms] = data_[rk.multistep_stages - ms - 1] eps_ [0] = torch.zeros_like(eps_ [0]) data_[0] = torch.zeros_like(data_[0]) denoised_prev = denoised eps_prev = eps preview_callback(x, eps, denoised, x_, eps_, data_, step, sigma, sigma_next, callback, extra_options, FINAL_STEP=True) return x def get_explicit_rk_step(rk, rk_type, x, LG, step, sigma, sigma_next, eta, eta_var, s_noise, noise_mode, c2, c3, stepcount, sigmas, x_, eps_, data_, unsample_resample_scale, extra_options, frame_weights_grp, x_init, x_prenoise, NOISE_COSSIM_SOURCE, NOISE_COSSIM_MODE, noise_cossim_max_iter, noise_cossim_max_score, noise_cossim_tile_size, noise_cossim_iterations,SDE_NOISE_EXTERNAL,sde_noise_t,MODEL_SAMPLING, **extra_args): extra_args = {} if extra_args is None else extra_args s_in = x.new_ones([x.shape[0]]) eta = float(get_extra_options_kv("implicit_substep_eta", eta, extra_options)) sigma_up, sigma, sigma_down, alpha_ratio = get_res4lyf_step_with_model(rk.model, sigma, sigma_next, eta, noise_mode) h = rk.h_fn(sigma_down, sigma) c2, c3 = get_res4lyf_half_step3(sigma, sigma_down, c2, c3, t_fn=rk.t_fn, sigma_fn=rk.sigma_fn) rk.set_coeff(rk_type, h, c2=c2, c3=c3, stepcount=stepcount, sigmas=sigmas, sigma_down=sigma_down, extra_options=extra_options) s_ = [(sigma + h * c_) * s_in for c_ in rk.c] x_[0] = rk.add_noise_pre(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode) x_0 = x_[0].clone() for ms in range(rk.multistep_stages): if RK_Method.is_exponential(rk_type): eps_ [rk.multistep_stages - ms] = data_ [rk.multistep_stages - ms] - x_0 else: eps_ [rk.multistep_stages - ms] = (x_0 - data_ [rk.multistep_stages - ms]) / sigma for row in range(rk.rows - rk.multistep_stages): x_[row+1] = x_0 + h * rk.a_k_sum(eps_, row) eps_[row], data_[row] = rk(x_0, x_[row+1], s_[row], h, **extra_args) eps_, x_ = LG.process_guides_substep(x_0, x_, eps_, data_, row, step, sigma, sigma_next, sigma_down, s_, unsample_resample_scale, rk, rk_type, extra_options, frame_weights_grp) x = x_0 + h * rk.b_k_sum(eps_, 0) denoised = x_0 + (sigma / (sigma - sigma_down)) * h * rk.b_k_sum(eps_, 0) eps = x - denoised y0 = LG.y0 if LG.y0.shape[0] > 1: y0 = LG.y0[min(step, LG.y0.shape[0]-1)].unsqueeze(0) x = LG.process_guides_poststep(x, denoised, eps, step, extra_options) #x = rk.add_noise_post(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode) if is_RF_model(rk.model) == True or (is_RF_model(rk.model) == False and noise_mode != "hard"): if sigma_up > 0: Osde = NoiseStepHandlerOSDE(x, eps, denoised, x_init, y0, LG.y0_inv) if Osde.check_cossim_source(NOISE_COSSIM_SOURCE): noise = rk.noise_sampler(sigma=sigma, sigma_next=sigma_next) noise_osde = Osde.get_ortho_noise(noise, [], max_iter=noise_cossim_max_iter, max_score=noise_cossim_max_score, NOISE_COSSIM_SOURCE=NOISE_COSSIM_SOURCE) x = alpha_ratio * x + sigma_up * noise_osde * s_noise elif extra_options_flag("noise_cossim", extra_options): x = handle_tiled_etc_noise_steps(x_0, x, x_prenoise, x_init, eps, denoised, y0, LG.y0_inv, step, rk_type, rk, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t, NOISE_COSSIM_SOURCE, NOISE_COSSIM_MODE, noise_cossim_tile_size, noise_cossim_iterations, extra_options) else: x = rk.add_noise_post(x, sigma_up, sigma, sigma_next, alpha_ratio, s_noise, noise_mode, SDE_NOISE_EXTERNAL, sde_noise_t) for ms in range(rk.multistep_stages): # NEEDS ADJUSTING? eps_ [rk.multistep_stages - ms] = eps_ [rk.multistep_stages - ms - 1] data_[rk.multistep_stages - ms] = data_[rk.multistep_stages - ms - 1] return x, eps_, data_ def preview_callback(x, eps, denoised, x_, eps_, data_, step, sigma, sigma_next, callback, extra_options, FINAL_STEP=False): if FINAL_STEP: denoised_callback = denoised elif extra_options_flag("eps_substep_preview", extra_options): row_callback = int(get_extra_options_kv("eps_substep_preview", "0", extra_options)) denoised_callback = eps_[row_callback] elif extra_options_flag("denoised_substep_preview", extra_options): row_callback = int(get_extra_options_kv("denoised_substep_preview", "0", extra_options)) denoised_callback = data_[row_callback] elif extra_options_flag("x_substep_preview", extra_options): row_callback = int(get_extra_options_kv("x_substep_preview", "0", extra_options)) denoised_callback = x_[row_callback] elif extra_options_flag("eps_preview", extra_options): denoised_callback = eps elif extra_options_flag("denoised_preview", extra_options): denoised_callback = denoised elif extra_options_flag("x_preview", extra_options): denoised_callback = x else: denoised_callback = data_[0] callback({'x': x, 'i': step, 'sigma': sigma, 'sigma_next': sigma_next, 'denoised': denoised_callback.to(torch.float32)}) if callback is not None else None return def sample_res_2m(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_2m", eta=0.0, ) def sample_res_2s(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_2s", eta=0.0, ) def sample_res_3s(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_3s", eta=0.0, ) def sample_res_5s(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_5s", eta=0.0, ) def sample_res_6s(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_6s", eta=0.0, ) def sample_res_2m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_2m", eta=0.5, eta_substep=0.5, ) def sample_res_2s_sde(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_2s", eta=0.5, eta_substep=0.5, ) def sample_res_3s_sde(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_3s", eta=0.5, eta_substep=0.5, ) def sample_res_5s_sde(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_5s", eta=0.5, eta_substep=0.5, ) def sample_res_6s_sde(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="res_6s", eta=0.5, eta_substep=0.5, ) def sample_deis_2m(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_2m", eta=0.0, ) def sample_deis_3m(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_3m", eta=0.0, ) def sample_deis_4m(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_4m", eta=0.0, ) def sample_deis_2m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_2m", eta=0.5, eta_substep=0.5, ) def sample_deis_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_3m", eta=0.5, eta_substep=0.5, ) def sample_deis_4m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None): return sample_rk(model, x, sigmas, extra_args, callback, disable, noise_sampler_type="gaussian", noise_mode="hard", noise_seed=-1, rk_type="deis_4m", eta=0.5, eta_substep=0.5, ) ================================================ FILE: legacy/samplers.py ================================================ from .noise_classes import prepare_noise, NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_NAMES_SIMPLE, NOISE_GENERATOR_NAMES from .sigmas import get_sigmas from .constants import MAX_STEPS import comfy.samplers import comfy.sample import comfy.sampler_helpers import comfy.model_sampling import comfy.latent_formats import comfy.sd import comfy.supported_models import latent_preview import torch import torch.nn.functional as F import math import copy from .helper import get_extra_options_kv, extra_options_flag, get_res4lyf_scheduler_list from .latents import initialize_or_scale from .noise_classes import prepare_noise, NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_NAMES_SIMPLE, NOISE_GENERATOR_NAMES from .sigmas import get_sigmas from .rk_sampler import sample_rk from .rk_coefficients import RK_SAMPLER_NAMES, IRK_SAMPLER_NAMES from .rk_guide_func import get_orthogonal from .noise_sigmas_timesteps_scaling import NOISE_MODE_NAMES def move_to_same_device(*tensors): if not tensors: return tensors device = tensors[0].device return tuple(tensor.to(device) for tensor in tensors) #SCHEDULER_NAMES = comfy.samplers.SCHEDULER_NAMES + ["beta57"] class ClownSamplerAdvanced: @classmethod def INPUT_TYPES(s): return {"required": { "noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_type_sde_substep": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "noise_mode_sde_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "eta_substep": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}), "d_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Downscales the sigma schedule. Values around 0.98-0.95 can lead to a large boost in detail and paint textures."}), "noise_seed_sde": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}), "sampler_name": (RK_SAMPLER_NAMES, {"default": "res_2m"}), "implicit_sampler_name": (IRK_SAMPLER_NAMES, {"default": "explicit_diagonal"}), "implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}), }, "optional": { "guides": ("GUIDES", ), "options": ("OPTIONS", ), "automation": ("AUTOMATION", ), "extra_options": ("STRING", {"default": "", "multiline": True}), } } RETURN_TYPES = ("SAMPLER",) RETURN_NAMES = ("sampler", ) FUNCTION = "main" CATEGORY = "RES4LYF/legacy/samplers" DEPRECATED = True def main(self, noise_type_sde="gaussian", noise_type_sde_substep="gaussian", noise_mode_sde="hard", eta=0.25, eta_var=0.0, d_noise=1.0, s_noise=1.0, alpha_sde=-1.0, k_sde=1.0, cfgpp=0.0, c1=0.0, c2=0.5, c3=1.0, noise_seed_sde=-1, sampler_name="res_2m", implicit_sampler_name="gauss-legendre_2s", t_fn_formula=None, sigma_fn_formula=None, implicit_steps=0, latent_guide=None, latent_guide_inv=None, guide_mode="", latent_guide_weights=None, latent_guide_weights_inv=None, latent_guide_mask=None, latent_guide_mask_inv=None, rescale_floor=True, sigmas_override=None, guides=None, options=None, sde_noise=None,sde_noise_steps=1, extra_options="", automation=None, etas=None, s_noises=None,unsample_resample_scales=None, regional_conditioning_weights=None,frame_weights_grp=None, eta_substep=0.5, noise_mode_sde_substep="hard", ): if implicit_sampler_name == "none": implicit_steps = 0 implicit_sampler_name = "gauss-legendre_2s" if noise_mode_sde == "none": eta, eta_var = 0.0, 0.0 noise_mode_sde = "hard" default_dtype = getattr(torch, get_extra_options_kv("default_dtype", "float64", extra_options), torch.float64) unsample_resample_scales_override = unsample_resample_scales if options is not None: noise_type_sde = options.get('noise_type_sde', noise_type_sde) noise_mode_sde = options.get('noise_mode_sde', noise_mode_sde) eta = options.get('eta', eta) s_noise = options.get('s_noise', s_noise) d_noise = options.get('d_noise', d_noise) alpha_sde = options.get('alpha_sde', alpha_sde) k_sde = options.get('k_sde', k_sde) c1 = options.get('c1', c1) c2 = options.get('c2', c2) c3 = options.get('c3', c3) t_fn_formula = options.get('t_fn_formula', t_fn_formula) sigma_fn_formula = options.get('sigma_fn_formula', sigma_fn_formula) frame_weights_grp = options.get('frame_weights_grp', frame_weights_grp) sde_noise = options.get('sde_noise', sde_noise) sde_noise_steps = options.get('sde_noise_steps', sde_noise_steps) #noise_seed_sde = torch.initial_seed()+1 if noise_seed_sde < 0 else noise_seed_sde rescale_floor = extra_options_flag("rescale_floor", extra_options) if automation is not None: etas = automation['etas'] if 'etas' in automation else None s_noises = automation['s_noises'] if 's_noises' in automation else None unsample_resample_scales = automation['unsample_resample_scales'] if 'unsample_resample_scales' in automation else None frame_weights_grp = automation['frame_weights_grp'] if 'frame_weights_grp' in automation else None etas = initialize_or_scale(etas, eta, MAX_STEPS).to(default_dtype) etas = F.pad(etas, (0, MAX_STEPS), value=0.0) s_noises = initialize_or_scale(s_noises, s_noise, MAX_STEPS).to(default_dtype) s_noises = F.pad(s_noises, (0, MAX_STEPS), value=0.0) if sde_noise is None: sde_noise = [] else: sde_noise = copy.deepcopy(sde_noise) for i in range(len(sde_noise)): sde_noise[i] = sde_noise[i] for j in range(sde_noise[i].shape[1]): sde_noise[i][0][j] = ((sde_noise[i][0][j] - sde_noise[i][0][j].mean()) / sde_noise[i][0][j].std()) if unsample_resample_scales_override is not None: unsample_resample_scales = unsample_resample_scales_override sampler = comfy.samplers.ksampler("rk", {"eta": eta, "eta_var": eta_var, "s_noise": s_noise, "d_noise": d_noise, "alpha": alpha_sde, "k": k_sde, "c1": c1, "c2": c2, "c3": c3, "cfgpp": cfgpp, "noise_sampler_type": noise_type_sde, "noise_mode": noise_mode_sde, "noise_seed": noise_seed_sde, "rk_type": sampler_name, "implicit_sampler_name": implicit_sampler_name, "t_fn_formula": t_fn_formula, "sigma_fn_formula": sigma_fn_formula, "implicit_steps": implicit_steps, "latent_guide": latent_guide, "latent_guide_inv": latent_guide_inv, "mask": latent_guide_mask, "mask_inv": latent_guide_mask_inv, "latent_guide_weights": latent_guide_weights, "latent_guide_weights_inv": latent_guide_weights_inv, "guide_mode": guide_mode, "LGW_MASK_RESCALE_MIN": rescale_floor, "sigmas_override": sigmas_override, "sde_noise": sde_noise, "extra_options": extra_options, "etas": etas, "s_noises": s_noises, "unsample_resample_scales": unsample_resample_scales, "regional_conditioning_weights": regional_conditioning_weights, "guides": guides, "frame_weights_grp": frame_weights_grp, "eta_substep": eta_substep, "noise_mode_sde_substep": noise_mode_sde_substep, }) return (sampler, ) class ClownSampler: @classmethod def INPUT_TYPES(s): return {"required": { "noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "d_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "noise_seed_sde": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}), "sampler_name": (RK_SAMPLER_NAMES, {"default": "res_2m"}), "implicit_sampler_name": (IRK_SAMPLER_NAMES, {"default": "explicit_diagonal"}), "implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}), }, "optional": { "guides": ("GUIDES", ), "options": ("OPTIONS", ), "automation": ("AUTOMATION", ), "extra_options": ("STRING", {"default": "", "multiline": True}), } } RETURN_TYPES = ("SAMPLER",) RETURN_NAMES = ("sampler", ) FUNCTION = "main" CATEGORY = "RES4LYF/legacy/samplers" DEPRECATED = True def main(self, noise_type_sde="gaussian", noise_type_sde_substep="gaussian", noise_mode_sde="hard", eta=0.25, eta_var=0.0, d_noise=1.0, s_noise=1.0, alpha_sde=-1.0, k_sde=1.0, cfgpp=0.0, c1=0.0, c2=0.5, c3=1.0, noise_seed_sde=-1, sampler_name="res_2m", implicit_sampler_name="gauss-legendre_2s", t_fn_formula=None, sigma_fn_formula=None, implicit_steps=0, latent_guide=None, latent_guide_inv=None, guide_mode="", latent_guide_weights=None, latent_guide_weights_inv=None, latent_guide_mask=None, latent_guide_mask_inv=None, rescale_floor=True, sigmas_override=None, guides=None, options=None, sde_noise=None,sde_noise_steps=1, extra_options="", automation=None, etas=None, s_noises=None,unsample_resample_scales=None, regional_conditioning_weights=None,frame_weights_grp=None,eta_substep=0.0, noise_mode_sde_substep="hard", ): eta_substep = eta noise_mode_sde_substep = noise_mode_sde noise_type_sde_substep = noise_type_sde sampler = ClownSamplerAdvanced().main( noise_type_sde=noise_type_sde, noise_type_sde_substep=noise_type_sde_substep, noise_mode_sde=noise_mode_sde, eta=eta, eta_var=eta_var, d_noise=d_noise, s_noise=s_noise, alpha_sde=alpha_sde, k_sde=k_sde, cfgpp=cfgpp, c1=c1, c2=c2, c3=c3, noise_seed_sde=noise_seed_sde, sampler_name=sampler_name, implicit_sampler_name=implicit_sampler_name, t_fn_formula=t_fn_formula, sigma_fn_formula=sigma_fn_formula, implicit_steps=implicit_steps, latent_guide=latent_guide, latent_guide_inv=latent_guide_inv, guide_mode=guide_mode, latent_guide_weights=latent_guide_weights, latent_guide_weights_inv=latent_guide_weights_inv, latent_guide_mask=latent_guide_mask, latent_guide_mask_inv=latent_guide_mask_inv, rescale_floor=rescale_floor, sigmas_override=sigmas_override, guides=guides, options=options, sde_noise=sde_noise,sde_noise_steps=sde_noise_steps, extra_options=extra_options, automation=automation, etas=etas, s_noises=s_noises,unsample_resample_scales=unsample_resample_scales, regional_conditioning_weights=regional_conditioning_weights,frame_weights_grp=frame_weights_grp, eta_substep=eta_substep, noise_mode_sde_substep=noise_mode_sde_substep, ) return sampler def process_sampler_name(selected_value): processed_name = selected_value.split("/")[-1] if selected_value.startswith("fully_implicit") or selected_value.startswith("diag_implicit"): implicit_sampler_name = processed_name sampler_name = "buehler" else: sampler_name = processed_name implicit_sampler_name = "use_explicit" return sampler_name, implicit_sampler_name def copy_cond(positive): new_positive = [] for embedding, cond in positive: cond_copy = {} for k, v in cond.items(): if isinstance(v, torch.Tensor): cond_copy[k] = v.clone() else: cond_copy[k] = v # ensure we're not copying huge shit like controlnets new_positive.append([embedding.clone(), cond_copy]) return new_positive class SharkSamplerAlpha: @classmethod def INPUT_TYPES(s): return {"required": {"model": ("MODEL",), "noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_stdev": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }), "noise_seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}), "sampler_mode": (['standard', 'unsample', 'resample'],), "scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},), "steps": ("INT", {"default": 30, "min": 1, "max": 10000}), "denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "cfg": ("FLOAT", {"default": 3.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Negative values use channelwise CFG." }), }, "optional": { "positive": ("CONDITIONING", ), "negative": ("CONDITIONING", ), "sampler": ("SAMPLER", ), "sigmas": ("SIGMAS", ), "latent_image": ("LATENT", ), "options": ("OPTIONS", ), "extra_options": ("STRING", {"default": "", "multiline": True}), } } RETURN_TYPES = ("LATENT","LATENT", "LATENT",) RETURN_NAMES = ("output", "denoised","sde_noise",) FUNCTION = "main" CATEGORY = "RES4LYF/legacy/samplers" DEPRECATED = True def main(self, model, cfg, scheduler, steps, sampler_mode="standard",denoise=1.0, denoise_alt=1.0, noise_type_init="gaussian", latent_image=None, positive=None, negative=None, sampler=None, sigmas=None, latent_noise=None, latent_noise_match=None, noise_stdev=1.0, noise_mean=0.0, noise_normalize=True, d_noise=1.0, alpha_init=-1.0, k_init=1.0, cfgpp=0.0, noise_seed=-1, options=None, sde_noise=None,sde_noise_steps=1, extra_options="", ): # blame comfy here raw_x = latent_image['raw_x'] if 'raw_x' in latent_image else None last_seed = latent_image['last_seed'] if 'last_seed' in latent_image else None pos_cond = copy_cond(positive) neg_cond = copy_cond(negative) if sampler is None: raise ValueError("sampler is required") else: sampler = copy.deepcopy(sampler) default_dtype = getattr(torch, get_extra_options_kv("default_dtype", "float64", extra_options), torch.float64) model = model.clone() if pos_cond[0][1] is not None: if "regional_conditioning_weights" in pos_cond[0][1]: sampler.extra_options['regional_conditioning_weights'] = pos_cond[0][1]['regional_conditioning_weights'] sampler.extra_options['regional_conditioning_floors'] = pos_cond[0][1]['regional_conditioning_floors'] regional_generate_conditionings_and_masks_fn = pos_cond[0][1]['regional_generate_conditionings_and_masks_fn'] regional_conditioning, regional_mask = regional_generate_conditionings_and_masks_fn(latent_image['samples']) regional_conditioning = copy.deepcopy(regional_conditioning) regional_mask = copy.deepcopy(regional_mask) model.set_model_patch(regional_conditioning, 'regional_conditioning_positive') model.set_model_patch(regional_mask, 'regional_conditioning_mask') if "noise_seed" in sampler.extra_options: if sampler.extra_options['noise_seed'] == -1 and noise_seed != -1: sampler.extra_options['noise_seed'] = noise_seed + 1 #print("Shark: setting clown noise seed to: ", sampler.extra_options['noise_seed']) if "sampler_mode" in sampler.extra_options: sampler.extra_options['sampler_mode'] = sampler_mode if "extra_options" in sampler.extra_options: extra_options += " " extra_options += sampler.extra_options['extra_options'] sampler.extra_options['extra_options'] = extra_options batch_size = int(get_extra_options_kv("batch_size", "1", extra_options)) if batch_size > 1: latent_image['samples'] = latent_image['samples'].repeat(batch_size, 1, 1, 1) latent_image_batch = {"samples": latent_image['samples']} out_samples, out_samples_fp64, out_denoised_samples, out_denoised_samples_fp64 = [], [], [], [] for batch_num in range(latent_image_batch['samples'].shape[0]): latent_unbatch = copy.deepcopy(latent_image) latent_unbatch['samples'] = latent_image_batch['samples'][batch_num].clone().unsqueeze(0) if noise_seed == -1: seed = torch.initial_seed() + 1 + batch_num else: seed = noise_seed + batch_num torch.manual_seed(seed) torch.cuda.manual_seed(seed) #torch.cuda.manual_seed_all(seed) if options is not None: noise_stdev = options.get('noise_init_stdev', noise_stdev) noise_mean = options.get('noise_init_mean', noise_mean) noise_type_init = options.get('noise_type_init', noise_type_init) d_noise = options.get('d_noise', d_noise) alpha_init = options.get('alpha_init', alpha_init) k_init = options.get('k_init', k_init) sde_noise = options.get('sde_noise', sde_noise) sde_noise_steps = options.get('sde_noise_steps', sde_noise_steps) latent_image_dtype = latent_unbatch['samples'].dtype if isinstance(model.model.model_config, comfy.supported_models.Flux) or isinstance(model.model.model_config, comfy.supported_models.FluxSchnell): if pos_cond is None: pos_cond = [[ torch.zeros((1, 256, 4096)), {'pooled_output': torch.zeros((1, 768))} ]] if extra_options_flag("uncond_ortho_flux", extra_options): if neg_cond is None: print("uncond_ortho_flux: using random negative conditioning...") neg_cond = [[ torch.randn((1, 256, 4096)), {'pooled_output': torch.randn((1, 768))} ]] #neg_cond[0][0] = get_orthogonal(neg_cond[0][0].to(torch.bfloat16), pos_cond[0][0].to(torch.bfloat16)) #neg_cond[0][1]['pooled_output'] = get_orthogonal(neg_cond[0][1]['pooled_output'].to(torch.bfloat16), pos_cond[0][1]['pooled_output'].to(torch.bfloat16)) neg_cond[0][0] = get_orthogonal(neg_cond[0][0], pos_cond[0][0]) neg_cond[0][1]['pooled_output'] = get_orthogonal(neg_cond[0][1]['pooled_output'], pos_cond[0][1]['pooled_output']) if neg_cond is None: neg_cond = [[ torch.zeros((1, 256, 4096)), {'pooled_output': torch.zeros((1, 768))} ]] else: if pos_cond is None: pos_cond = [[ torch.zeros((1, 154, 4096)), {'pooled_output': torch.zeros((1, 2048))} ]] if extra_options_flag("uncond_ortho_sd35", extra_options): if neg_cond is None: neg_cond = [[ torch.randn((1, 154, 4096)), {'pooled_output': torch.randn((1, 2048))} ]] neg_cond[0][0] = get_orthogonal(neg_cond[0][0], pos_cond[0][0]) neg_cond[0][1]['pooled_output'] = get_orthogonal(neg_cond[0][1]['pooled_output'], pos_cond[0][1]['pooled_output']) if neg_cond is None: neg_cond = [[ torch.zeros((1, 154, 4096)), {'pooled_output': torch.zeros((1, 2048))} ]] if extra_options_flag("zero_uncond_t5", extra_options): neg_cond[0][0] = torch.zeros_like(neg_cond[0][0]) if extra_options_flag("zero_uncond_pooled_output", extra_options): neg_cond[0][1]['pooled_output'] = torch.zeros_like(neg_cond[0][1]['pooled_output']) if extra_options_flag("zero_pooled_output", extra_options): pos_cond[0][1]['pooled_output'] = torch.zeros_like(pos_cond[0][1]['pooled_output']) neg_cond[0][1]['pooled_output'] = torch.zeros_like(neg_cond[0][1]['pooled_output']) if denoise_alt < 0: d_noise = denoise_alt = -denoise_alt if options is not None: d_noise = options.get('d_noise', d_noise) if sigmas is not None: sigmas = sigmas.clone().to(default_dtype) else: sigmas = get_sigmas(model, scheduler, steps, denoise).to(default_dtype) sigmas *= denoise_alt if sampler_mode.startswith("unsample"): null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype) sigmas = torch.flip(sigmas, dims=[0]) sigmas = torch.cat([sigmas, null]) elif sampler_mode.startswith("resample"): null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype) sigmas = torch.cat([null, sigmas]) sigmas = torch.cat([sigmas, null]) x = latent_unbatch["samples"].clone().to(default_dtype) if latent_unbatch is not None: if "samples_fp64" in latent_unbatch: if latent_unbatch['samples'].shape == latent_unbatch['samples_fp64'].shape: if torch.norm(latent_unbatch['samples'] - latent_unbatch['samples_fp64']) < 0.01: x = latent_unbatch["samples_fp64"].clone() if latent_noise is not None: latent_noise_samples = latent_noise["samples"].clone().to(default_dtype) if latent_noise_match is not None: latent_noise_match_samples = latent_noise_match["samples"].clone().to(default_dtype) truncate_conditioning = extra_options_flag("truncate_conditioning", extra_options) if truncate_conditioning == "true" or truncate_conditioning == "true_and_zero_neg": if pos_cond is not None: pos_cond[0][0] = pos_cond[0][0].clone().to(default_dtype) pos_cond[0][1]["pooled_output"] = pos_cond[0][1]["pooled_output"].clone().to(default_dtype) if neg_cond is not None: neg_cond[0][0] = neg_cond[0][0].clone().to(default_dtype) neg_cond[0][1]["pooled_output"] = neg_cond[0][1]["pooled_output"].clone().to(default_dtype) c = [] for t in pos_cond: d = t[1].copy() pooled_output = d.get("pooled_output", None) for t in neg_cond: d = t[1].copy() pooled_output = d.get("pooled_output", None) if pooled_output is not None: if truncate_conditioning == "true_and_zero_neg": d["pooled_output"] = torch.zeros((1,2048), dtype=t[0].dtype, device=t[0].device) n = [torch.zeros((1,154,4096), dtype=t[0].dtype, device=t[0].device), d] else: d["pooled_output"] = d["pooled_output"][:, :2048] n = [t[0][:, :154, :4096], d] c.append(n) neg_cond = c sigmin = model.model.model_sampling.sigma_min sigmax = model.model.model_sampling.sigma_max if sde_noise is None and sampler_mode.startswith("unsample"): total_steps = len(sigmas)+1 sde_noise = [] else: total_steps = 1 for total_steps_iter in range (sde_noise_steps): if noise_type_init == "none": noise = torch.zeros_like(x) elif latent_noise is None: print("Initial latent noise seed: ", seed) noise_sampler_init = NOISE_GENERATOR_CLASSES_SIMPLE.get(noise_type_init)(x=x, seed=seed, sigma_min=sigmin, sigma_max=sigmax) if noise_type_init == "fractal": noise_sampler_init.alpha = alpha_init noise_sampler_init.k = k_init noise_sampler_init.scale = 0.1 noise = noise_sampler_init(sigma=sigmax, sigma_next=sigmin) else: noise = latent_noise_samples if noise_normalize and noise.std() > 0: noise = (noise - noise.mean(dim=(-2, -1), keepdim=True)) / noise.std(dim=(-2, -1), keepdim=True) #noise.sub_(noise.mean()).div_(noise.std()) noise *= noise_stdev noise = (noise - noise.mean()) + noise_mean if latent_noise_match is not None: for i in range(latent_noise_match_samples.shape[1]): noise[0][i] = (noise[0][i] - noise[0][i].mean()) noise[0][i] = (noise[0][i]) + latent_noise_match_samples[0][i].mean() noise_mask = latent_unbatch["noise_mask"] if "noise_mask" in latent_unbatch else None x0_output = {} if cfg < 0: sampler.extra_options['cfg_cw'] = -cfg cfg = 1.0 else: sampler.extra_options.pop("cfg_cw", None) if sde_noise is None: sde_noise = [] else: sde_noise = copy.deepcopy(sde_noise) for i in range(len(sde_noise)): sde_noise[i] = sde_noise[i] for j in range(sde_noise[i].shape[1]): sde_noise[i][0][j] = ((sde_noise[i][0][j] - sde_noise[i][0][j].mean()) / sde_noise[i][0][j].std()) callback = latent_preview.prepare_callback(model, sigmas.shape[-1] - 1, x0_output) disable_pbar = not comfy.utils.PROGRESS_BAR_ENABLED model.model.diffusion_model.raw_x = raw_x model.model.diffusion_model.last_seed = last_seed samples = comfy.sample.sample_custom(model, noise, cfg, sampler, sigmas, pos_cond, neg_cond, x.clone(), noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed) out = latent_unbatch.copy() out["samples"] = samples if "x0" in x0_output: out_denoised = latent_unbatch.copy() out_denoised["samples"] = model.model.process_latent_out(x0_output["x0"].cpu()) else: out_denoised = out out["samples_fp64"] = out["samples"].clone() out["samples"] = out["samples"].to(latent_image_dtype) out_denoised["samples_fp64"] = out_denoised["samples"].clone() out_denoised["samples"] = out_denoised["samples"].to(latent_image_dtype) out_samples. append(out["samples"]) out_samples_fp64.append(out["samples_fp64"]) out_denoised_samples. append(out_denoised["samples"]) out_denoised_samples_fp64.append(out_denoised["samples_fp64"]) seed += 1 torch.manual_seed(seed) if total_steps_iter > 1: sde_noise.append(out["samples_fp64"]) out_samples = [tensor.squeeze(0) for tensor in out_samples] out_samples_fp64 = [tensor.squeeze(0) for tensor in out_samples_fp64] out_denoised_samples = [tensor.squeeze(0) for tensor in out_denoised_samples] out_denoised_samples_fp64 = [tensor.squeeze(0) for tensor in out_denoised_samples_fp64] out['samples'] = torch.stack(out_samples, dim=0) out['samples_fp64'] = torch.stack(out_samples_fp64, dim=0) out_denoised['samples'] = torch.stack(out_denoised_samples, dim=0) out_denoised['samples_fp64'] = torch.stack(out_denoised_samples_fp64, dim=0) out['raw_x'] = None if hasattr(model.model.diffusion_model, "raw_x"): if model.model.diffusion_model.raw_x is not None: out['raw_x'] = model.model.diffusion_model.raw_x.clone() del model.model.diffusion_model.raw_x out['last_seed'] = None if hasattr(model.model.diffusion_model, "last_seed"): if model.model.diffusion_model.last_seed is not None: out['last_seed'] = model.model.diffusion_model.last_seed del model.model.diffusion_model.last_seed return ( out, out_denoised, sde_noise,) class ClownsharKSampler: @classmethod def INPUT_TYPES(s): return {"required": {"model": ("MODEL",), "noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "noise_seed": ("INT", {"default": 0, "min": -1, "max": 0xffffffffffffffff}), "sampler_mode": (['standard', 'unsample', 'resample'],), "sampler_name": (RK_SAMPLER_NAMES, {"default": "res_2m"}), "implicit_sampler_name": (IRK_SAMPLER_NAMES, {"default": "explicit_diagonal"}), "scheduler": (get_res4lyf_scheduler_list(), {"default": "beta57"},), "steps": ("INT", {"default": 30, "min": 1, "max": 10000}), "implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}), "denoise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "cfg": ("FLOAT", {"default": 3.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, }), "extra_options": ("STRING", {"default": "", "multiline": True}), }, "optional": { "positive": ("CONDITIONING", ), "negative": ("CONDITIONING", ), "sigmas": ("SIGMAS", ), "latent_image": ("LATENT", ), "guides": ("GUIDES", ), "options": ("OPTIONS", ), "automation": ("AUTOMATION", ), } } RETURN_TYPES = ("LATENT","LATENT", "LATENT",) RETURN_NAMES = ("output", "denoised","sde_noise",) FUNCTION = "main" CATEGORY = "RES4LYF/legacy/samplers" DEPRECATED = True def main(self, model, cfg, sampler_mode, scheduler, steps, denoise=1.0, denoise_alt=1.0, noise_type_init="gaussian", noise_type_sde="brownian", noise_mode_sde="hard", latent_image=None, positive=None, negative=None, sigmas=None, latent_noise=None, latent_noise_match=None, noise_stdev=1.0, noise_mean=0.0, noise_normalize=True, noise_is_latent=False, eta=0.25, eta_var=0.0, d_noise=1.0, s_noise=1.0, alpha_init=-1.0, k_init=1.0, alpha_sde=-1.0, k_sde=1.0, cfgpp=0.0, c1=0.0, c2=0.5, c3=1.0, noise_seed=-1, sampler_name="res_2m", implicit_sampler_name="default", t_fn_formula=None, sigma_fn_formula=None, implicit_steps=0, latent_guide=None, latent_guide_inv=None, guide_mode="blend", latent_guide_weights=None, latent_guide_weights_inv=None, latent_guide_mask=None, latent_guide_mask_inv=None, rescale_floor=True, sigmas_override=None, shift=3.0, base_shift=0.85, guides=None, options=None, sde_noise=None,sde_noise_steps=1, shift_scaling="exponential", extra_options="", automation=None, etas=None, s_noises=None,unsample_resample_scales=None, regional_conditioning_weights=None,frame_weights_grp=None, ): if noise_seed >= 0: noise_seed_sde = noise_seed + 1 else: noise_seed_sde = -1 eta_substep = eta noise_mode_sde_substep = noise_mode_sde noise_type_sde_substep = noise_type_sde sampler = ClownSamplerAdvanced().main( noise_type_sde=noise_type_sde, noise_type_sde_substep=noise_type_sde_substep, noise_mode_sde=noise_mode_sde, eta=eta, eta_var=eta_var, d_noise=d_noise, s_noise=s_noise, alpha_sde=alpha_sde, k_sde=k_sde, cfgpp=cfgpp, c1=c1, c2=c2, c3=c3, noise_seed_sde=noise_seed_sde, sampler_name=sampler_name, implicit_sampler_name=implicit_sampler_name, t_fn_formula=t_fn_formula, sigma_fn_formula=sigma_fn_formula, implicit_steps=implicit_steps, latent_guide=latent_guide, latent_guide_inv=latent_guide_inv, guide_mode=guide_mode, latent_guide_weights=latent_guide_weights, latent_guide_weights_inv=latent_guide_weights_inv, latent_guide_mask=latent_guide_mask, latent_guide_mask_inv=latent_guide_mask_inv, rescale_floor=rescale_floor, sigmas_override=sigmas_override, guides=guides, options=options, sde_noise=sde_noise,sde_noise_steps=sde_noise_steps, extra_options=extra_options, automation=automation, etas=etas, s_noises=s_noises,unsample_resample_scales=unsample_resample_scales, regional_conditioning_weights=regional_conditioning_weights,frame_weights_grp=frame_weights_grp, eta_substep=eta_substep, noise_mode_sde_substep=noise_mode_sde_substep, ) return SharkSamplerAlpha().main( model=model, cfg=cfg, sampler_mode=sampler_mode, scheduler=scheduler, steps=steps, denoise=denoise, denoise_alt=denoise_alt, noise_type_init=noise_type_init, latent_image=latent_image, positive=positive, negative=negative, sampler=sampler[0], sigmas=sigmas, latent_noise=latent_noise, latent_noise_match=latent_noise_match, noise_stdev=noise_stdev, noise_mean=noise_mean, noise_normalize=noise_normalize, d_noise=d_noise, alpha_init=alpha_init, k_init=k_init, cfgpp=cfgpp, noise_seed=noise_seed, options=options, sde_noise=sde_noise, sde_noise_steps=sde_noise_steps, extra_options=extra_options ) class UltraSharkSampler: # for use with https://github.com/ClownsharkBatwing/UltraCascade @classmethod def INPUT_TYPES(s): return { "required": { "model": ("MODEL",), "add_noise": ("BOOLEAN", {"default": True}), "normalize_noise": ("BOOLEAN", {"default": False}), "noise_type": (NOISE_GENERATOR_NAMES, ), "alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}), "k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}), "noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), "cfg": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 100.0, "step":0.5, "round": 0.01}), "positive": ("CONDITIONING", ), "negative": ("CONDITIONING", ), "sampler": ("SAMPLER", ), "sigmas": ("SIGMAS", ), "latent_image": ("LATENT", ), "guide_type": (['residual', 'weighted'], ), "guide_weight": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": 0.01}), }, "optional": { #"latent_noise": ("LATENT", ), "guide": ("LATENT",), "guide_weights": ("SIGMAS",), #"style": ("CONDITIONING", ), #"img_style": ("CONDITIONING", ), } } RETURN_TYPES = ("LATENT","LATENT","LATENT") RETURN_NAMES = ("output", "denoised_output", "latent_batch") FUNCTION = "main" CATEGORY = "RES4LYF/legacy/samplers/UltraCascade" DESCRIPTION = "For use with Stable Cascade and UltraCascade." DEPRECATED = True def main(self, model, add_noise, normalize_noise, noise_type, noise_seed, cfg, alpha, k, positive, negative, sampler, sigmas, guide_type, guide_weight, latent_image, latent_noise=None, guide=None, guide_weights=None, style=None, img_style=None): if model.model.model_config.unet_config.get('stable_cascade_stage') == 'up': model = model.clone() x_lr = guide['samples'] if guide is not None else None guide_weights = initialize_or_scale(guide_weights, guide_weight, 10000)#("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), #model.model.diffusion_model.set_guide_weights(guide_weights=guide_weights) #model.model.diffusion_model.set_guide_type(guide_type=guide_type) #model.model.diffusion_model.set_x_lr(x_lr=x_lr) patch = model.model_options.get("transformer_options", {}).get("patches_replace", {}).get("ultracascade", {}).get("main") if patch is not None: patch.update(x_lr=x_lr, guide_weights=guide_weights, guide_type=guide_type) else: model.model.diffusion_model.set_sigmas_schedule(sigmas_schedule=sigmas) model.model.diffusion_model.set_sigmas_prev(sigmas_prev=sigmas[:1]) model.model.diffusion_model.set_guide_weights(guide_weights=guide_weights) model.model.diffusion_model.set_guide_type(guide_type=guide_type) model.model.diffusion_model.set_x_lr(x_lr=x_lr) elif model.model.model_config.unet_config['stable_cascade_stage'] == 'b': c_pos, c_neg = [], [] for t in positive: d_pos = t[1].copy() d_neg = t[1].copy() d_pos['stable_cascade_prior'] = guide['samples'] pooled_output = d_neg.get("pooled_output", None) if pooled_output is not None: d_neg["pooled_output"] = torch.zeros_like(pooled_output) c_pos.append([t[0], d_pos]) c_neg.append([torch.zeros_like(t[0]), d_neg]) positive = c_pos negative = c_neg if style is not None: model.set_model_patch(style, 'style_cond') if img_style is not None: model.set_model_patch(img_style,'img_style_cond') # 1, 768 clip_style[0][0][1]['unclip_conditioning'][0]['clip_vision_output'].image_embeds.shape # 1, 1280 clip_style[0][0][1]['pooled_output'].shape # 1, 77, 1280 clip_style[0][0][0].shape latent = latent_image latent_image = latent["samples"] torch.manual_seed(noise_seed) if not add_noise: noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") elif latent_noise is None: batch_inds = latent["batch_index"] if "batch_index" in latent else None noise = prepare_noise(latent_image, noise_seed, noise_type, batch_inds, alpha, k) else: noise = latent_noise["samples"]#.to(torch.float64) if normalize_noise and noise.std() > 0: noise = (noise - noise.mean(dim=(-2, -1), keepdim=True)) / noise.std(dim=(-2, -1), keepdim=True) noise_mask = None if "noise_mask" in latent: noise_mask = latent["noise_mask"] x0_output = {} callback = latent_preview.prepare_callback(model, sigmas.shape[-1] - 1, x0_output) disable_pbar = False samples = comfy.sample.sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, latent_image, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed) out = latent.copy() out["samples"] = samples if "x0" in x0_output: out_denoised = latent.copy() out_denoised["samples"] = model.model.process_latent_out(x0_output["x0"].cpu()) else: out_denoised = out return (out, out_denoised) ================================================ FILE: legacy/samplers_extensions.py ================================================ from .noise_classes import NOISE_GENERATOR_CLASSES, NOISE_GENERATOR_CLASSES_SIMPLE, NOISE_GENERATOR_NAMES, NOISE_GENERATOR_NAMES_SIMPLE import comfy.sample import comfy.sampler_helpers import comfy.model_sampling import comfy.latent_formats import comfy.sd import comfy.supported_models from .conditioning import FluxRegionalPrompt, FluxRegionalConditioning from .models import ReFluxPatcher import torch import torch.nn.functional as F import copy from .helper import initialize_or_scale, get_res4lyf_scheduler_list def move_to_same_device(*tensors): if not tensors: return tensors device = tensors[0].device return tuple(tensor.to(device) for tensor in tensors) class SamplerOptions_TimestepScaling: # for patching the t_fn and sigma_fn (sigma <-> timestep) formulas to allow picking Runge-Kutta Ci values ("midpoints") with different scaling. @classmethod def INPUT_TYPES(s): return {"required": { "sampler": ("SAMPLER", ), "t_fn_formula": ("STRING", {"default": "1/((sigma).exp()+1)", "multiline": True}), "sigma_fn_formula": ("STRING", {"default": "((1-t)/t).log()", "multiline": True}), }, "optional": { } } RETURN_TYPES = ("SAMPLER",) RETURN_NAMES = ("sampler",) FUNCTION = "set_sampler_extra_options" CATEGORY = "RES4LYF/legacy/sampler_extensions" DESCRIPTION = "Patches ClownSampler's t_fn and sigma_fn (sigma <-> timestep) formulas to allow picking Runge-Kutta Ci values (midpoints) with different scaling." DEPRECATED = True def set_sampler_extra_options(self, sampler, t_fn_formula=None, sigma_fn_formula=None, ): sampler = copy.deepcopy(sampler) sampler.extra_options['t_fn_formula'] = t_fn_formula sampler.extra_options['sigma_fn_formula'] = sigma_fn_formula return (sampler, ) class SamplerOptions_GarbageCollection: @classmethod def INPUT_TYPES(s): return {"required": { "sampler": ("SAMPLER", ), "garbage_collection": ("BOOLEAN", {"default": True}), }, "optional": { } } RETURN_TYPES = ("SAMPLER",) RETURN_NAMES = ("sampler",) FUNCTION = "set_sampler_extra_options" CATEGORY = "RES4LYF/legacy/sampler_extensions" DESCRIPTION = "Patches ClownSampler to use garbage collection after every step. This can help with OOM issues during inference for large models like Flux. The tradeoff is slower sampling." DEPRECATED = True def set_sampler_extra_options(self, sampler, garbage_collection): sampler = copy.deepcopy(sampler) sampler.extra_options['GARBAGE_COLLECT'] = garbage_collection return (sampler, ) GUIDE_MODE_NAMES = ["unsample", "resample", "epsilon", "epsilon_projection", "epsilon_dynamic_mean", "epsilon_dynamic_mean_std", "epsilon_dynamic_mean_from_bkg", "epsilon_guide_mean_std_from_bkg", "hard_light", "blend", "blend_projection", "mean_std", "mean", "mean_tiled", "std", "data", #"data_projection", "none", ] class ClownInpaint: ################################################################################################################################## @classmethod def INPUT_TYPES(s): return {"required": {#"guide_mode": (GUIDE_MODE_NAMES, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}), "guide_weight": ("FLOAT", {"default": 0.10, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "guide_weight_bkg": ("FLOAT", {"default": 1.00, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "guide_weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "guide_weight_scheduler_bkg": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "guide_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}), "guide_bkg_end_step": ("INT", {"default": 10000, "min": 1, "max": 10000}), }, "optional": { "model": ("MODEL", ), "positive_inpaint": ("CONDITIONING", ), "positive_bkg": ("CONDITIONING", ), "negative": ("CONDITIONING", ), "latent_image": ("LATENT", ), "mask": ("MASK", ), "guide_weights": ("SIGMAS", ), "guide_weights_bkg": ("SIGMAS", ), } } RETURN_TYPES = ("MODEL","CONDITIONING","CONDITIONING","LATENT","GUIDES",) RETURN_NAMES = ("model","positive" ,"negative" ,"latent","guides",) CATEGORY = "RES4LYF/legacy/sampler_extensions" FUNCTION = "main" DEPRECATED = True def main(self, guide_weight_scheduler="constant", guide_weight_scheduler_bkg="constant", guide_end_step=10000, guide_bkg_end_step=30, guide_weight_scale=1.0, guide_weight_bkg_scale=1.0, guide=None, guide_bkg=None, guide_weight=1.0, guide_weight_bkg=1.0, guide_mode="epsilon", guide_weights=None, guide_weights_bkg=None, guide_mask_bkg=None, model=None, positive_inpaint=None, positive_bkg=None, negative=None, latent_image=None, mask=None, ): default_dtype = torch.float64 guide = latent_image guide_bkg = {'samples': latent_image['samples'].clone()} max_steps = 10000 denoise, denoise_bkg = guide_weight_scale, guide_weight_bkg_scale if guide_mode.startswith("epsilon_") and not guide_mode.startswith("epsilon_projection") and guide_bkg == None: print("Warning: need two latent inputs for guide_mode=",guide_mode," to work. Falling back to epsilon.") guide_mode = "epsilon" if guide_weight_scheduler == "constant": guide_weights = initialize_or_scale(None, guide_weight, guide_end_step).to(default_dtype) guide_weights = F.pad(guide_weights, (0, max_steps), value=0.0) if guide_weight_scheduler_bkg == "constant": guide_weights_bkg = initialize_or_scale(None, guide_weight_bkg, guide_bkg_end_step).to(default_dtype) guide_weights_bkg = F.pad(guide_weights_bkg, (0, max_steps), value=0.0) guides = (guide_mode, guide_weight, guide_weight_bkg, guide_weights, guide_weights_bkg, guide, guide_bkg, mask, guide_mask_bkg, guide_weight_scheduler, guide_weight_scheduler_bkg, guide_end_step, guide_bkg_end_step, denoise, denoise_bkg) latent = {'samples': torch.zeros_like(latent_image['samples'])} if (positive_inpaint is None) and (positive_bkg is None): positive = None else: if positive_bkg is None: if positive_bkg is None: positive_bkg = [[ torch.zeros((1, 256, 4096)), {'pooled_output': torch.zeros((1, 768))} ]] cond_regional, mask_inv = FluxRegionalPrompt().main(cond=positive_inpaint, mask=mask) cond_regional, mask_inv_inv = FluxRegionalPrompt().main(cond=positive_bkg , cond_regional=cond_regional, mask=mask_inv) positive, = FluxRegionalConditioning().main(conditioning_regional=cond_regional, self_attn_floor=0.0) model, = ReFluxPatcher().main(model, enable=True) return (model, positive, negative, latent, guides, ) class ClownInpaintSimple: ################################################################################################################################## @classmethod def INPUT_TYPES(s): return {"required": {#"guide_mode": (GUIDE_MODE_NAMES, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}), "guide_weight": ("FLOAT", {"default": 0.10, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "guide_weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "guide_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}), }, "optional": { "model": ("MODEL", ), "positive_inpaint": ("CONDITIONING", ), "negative": ("CONDITIONING", ), "latent_image": ("LATENT", ), "mask": ("MASK", ), } } RETURN_TYPES = ("MODEL","CONDITIONING","CONDITIONING","LATENT","GUIDES",) RETURN_NAMES = ("model","positive" ,"negative" ,"latent","guides",) CATEGORY = "RES4LYF/legacy/sampler_extensions" FUNCTION = "main" DEPRECATED = True def main(self, guide_weight_scheduler="constant", guide_weight_scheduler_bkg="constant", guide_end_step=10000, guide_bkg_end_step=30, guide_weight_scale=1.0, guide_weight_bkg_scale=1.0, guide=None, guide_bkg=None, guide_weight=1.0, guide_weight_bkg=1.0, guide_mode="epsilon", guide_weights=None, guide_weights_bkg=None, guide_mask_bkg=None, model=None, positive_inpaint=None, positive_bkg=None, negative=None, latent_image=None, mask=None, ): default_dtype = torch.float64 guide = latent_image guide_bkg = {'samples': latent_image['samples'].clone()} max_steps = 10000 denoise, denoise_bkg = guide_weight_scale, guide_weight_bkg_scale if guide_mode.startswith("epsilon_") and not guide_mode.startswith("epsilon_projection") and guide_bkg == None: print("Warning: need two latent inputs for guide_mode=",guide_mode," to work. Falling back to epsilon.") guide_mode = "epsilon" if guide_weight_scheduler == "constant": guide_weights = initialize_or_scale(None, guide_weight, guide_end_step).to(default_dtype) guide_weights = F.pad(guide_weights, (0, max_steps), value=0.0) if guide_weight_scheduler_bkg == "constant": guide_weights_bkg = initialize_or_scale(None, guide_weight_bkg, guide_bkg_end_step).to(default_dtype) guide_weights_bkg = F.pad(guide_weights_bkg, (0, max_steps), value=0.0) guides = (guide_mode, guide_weight, guide_weight_bkg, guide_weights, guide_weights_bkg, guide, guide_bkg, mask, guide_mask_bkg, guide_weight_scheduler, guide_weight_scheduler_bkg, guide_end_step, guide_bkg_end_step, denoise, denoise_bkg) latent = {'samples': torch.zeros_like(latent_image['samples'])} if (positive_inpaint is None) and (positive_bkg is None): positive = None else: if positive_bkg is None: if positive_bkg is None: positive_bkg = [[ torch.zeros((1, 256, 4096)), {'pooled_output': torch.zeros((1, 768))} ]] cond_regional, mask_inv = FluxRegionalPrompt().main(cond=positive_inpaint, mask=mask) cond_regional, mask_inv_inv = FluxRegionalPrompt().main(cond=positive_bkg , cond_regional=cond_regional, mask=mask_inv) positive, = FluxRegionalConditioning().main(conditioning_regional=cond_regional, self_attn_floor=1.0) model, = ReFluxPatcher().main(model, enable=True) return (model, positive, negative, latent, guides, ) ################################################################################################################################## class ClownsharKSamplerGuide: @classmethod def INPUT_TYPES(s): return {"required": {"guide_mode": (GUIDE_MODE_NAMES, {"default": 'epsilon_projection', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}), "guide_weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), #"guide_weight_bkg": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "guide_weight_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), #"guide_weight_bkg_scale": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "guide_weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), #"guide_weight_scheduler_bkg": (["constant"] + comfy.samplers.SCHEDULER_NAMES + ["beta57"], {"default": "beta57"},), "guide_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}), #"guide_bkg_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}), }, "optional": { "guide": ("LATENT", ), #"guide_bkg": ("LATENT", ), "guide_mask": ("MASK", ), #"guide_mask_bkg": ("MASK", ), "guide_weights": ("SIGMAS", ), #"guide_weights_bkg": ("SIGMAS", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) CATEGORY = "RES4LYF/legacy/sampler_extensions" FUNCTION = "main" DEPRECATED = True def main(self, guide_weight_scheduler="constant", guide_weight_scheduler_bkg="constant", guide_end_step=30, guide_bkg_end_step=30, guide_weight_scale=1.0, guide_weight_bkg_scale=1.0, guide=None, guide_bkg=None, guide_weight=0.0, guide_weight_bkg=0.0, guide_mode="blend", guide_weights=None, guide_weights_bkg=None, guide_mask=None, guide_mask_bkg=None, ): default_dtype = torch.float64 max_steps = 10000 denoise, denoise_bkg = guide_weight_scale, guide_weight_bkg_scale if guide_mode.startswith("epsilon_") and not guide_mode.startswith("epsilon_projection") and guide_bkg == None: print("Warning: need two latent inputs for guide_mode=",guide_mode," to work. Falling back to epsilon.") guide_mode = "epsilon" if guide_weight_scheduler == "constant" and guide_weights == None: guide_weights = initialize_or_scale(None, 1.0, guide_end_step).to(default_dtype) #guide_weights = initialize_or_scale(None, guide_weight, guide_end_step).to(default_dtype) guide_weights = F.pad(guide_weights, (0, max_steps), value=0.0) if guide_weight_scheduler_bkg == "constant": guide_weights_bkg = initialize_or_scale(None, 0.0, guide_bkg_end_step).to(default_dtype) #guide_weights_bkg = initialize_or_scale(None, guide_weight_bkg, guide_bkg_end_step).to(default_dtype) guide_weights_bkg = F.pad(guide_weights_bkg, (0, max_steps), value=0.0) guides = (guide_mode, guide_weight, guide_weight_bkg, guide_weights, guide_weights_bkg, guide, guide_bkg, guide_mask, guide_mask_bkg, guide_weight_scheduler, guide_weight_scheduler_bkg, guide_end_step, guide_bkg_end_step, denoise, denoise_bkg) return (guides, ) class ClownsharKSamplerGuides: @classmethod def INPUT_TYPES(s): return {"required": {"guide_mode": (GUIDE_MODE_NAMES, {"default": 'epsilon_projection', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}), "guide_weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "guide_weight_bkg": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "guide_weight_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "guide_weight_bkg_scale": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "guide_weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "guide_weight_scheduler_bkg": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "guide_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}), "guide_bkg_end_step": ("INT", {"default": 15, "min": 1, "max": 10000}), }, "optional": { "guide": ("LATENT", ), "guide_bkg": ("LATENT", ), "guide_mask": ("MASK", ), "guide_mask_bkg": ("MASK", ), "guide_weights": ("SIGMAS", ), "guide_weights_bkg": ("SIGMAS", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) CATEGORY = "RES4LYF/legacy/sampler_extensions" FUNCTION = "main" DEPRECATED = True def main(self, guide_weight_scheduler="constant", guide_weight_scheduler_bkg="constant", guide_end_step=30, guide_bkg_end_step=30, guide_weight_scale=1.0, guide_weight_bkg_scale=1.0, guide=None, guide_bkg=None, guide_weight=0.0, guide_weight_bkg=0.0, guide_mode="blend", guide_weights=None, guide_weights_bkg=None, guide_mask=None, guide_mask_bkg=None, ): default_dtype = torch.float64 max_steps = 10000 denoise, denoise_bkg = guide_weight_scale, guide_weight_bkg_scale if guide_mode.startswith("epsilon_") and not guide_mode.startswith("epsilon_projection") and guide_bkg == None: print("Warning: need two latent inputs for guide_mode=",guide_mode," to work. Falling back to epsilon.") guide_mode = "epsilon" if guide_weight_scheduler == "constant" and guide_weights == None: guide_weights = initialize_or_scale(None, 1.0, guide_end_step).to(default_dtype) guide_weights = F.pad(guide_weights, (0, max_steps), value=0.0) if guide_weight_scheduler_bkg == "constant" and guide_weights_bkg == None: guide_weights_bkg = initialize_or_scale(None, 1.0, guide_bkg_end_step).to(default_dtype) guide_weights_bkg = F.pad(guide_weights_bkg, (0, max_steps), value=0.0) guides = (guide_mode, guide_weight, guide_weight_bkg, guide_weights, guide_weights_bkg, guide, guide_bkg, guide_mask, guide_mask_bkg, guide_weight_scheduler, guide_weight_scheduler_bkg, guide_end_step, guide_bkg_end_step, denoise, denoise_bkg) return (guides, ) class ClownsharKSamplerAutomation: @classmethod def INPUT_TYPES(s): return {"required": { }, "optional": { "etas": ("SIGMAS", ), "s_noises": ("SIGMAS", ), "unsample_resample_scales": ("SIGMAS", ), } } RETURN_TYPES = ("AUTOMATION",) RETURN_NAMES = ("automation",) CATEGORY = "RES4LYF/legacy/sampler_extensions" FUNCTION = "main" DEPRECATED = True def main(self, etas=None, s_noises=None, unsample_resample_scales=None,): automation = (etas, s_noises, unsample_resample_scales) return (automation, ) class ClownsharKSamplerAutomation_Advanced: @classmethod def INPUT_TYPES(s): return {"required": { }, "optional": { "automation": ("AUTOMATION", ), "etas": ("SIGMAS", ), "etas_substep": ("SIGMAS", ), "s_noises": ("SIGMAS", ), "unsample_resample_scales": ("SIGMAS", ), "frame_weights": ("SIGMAS", ), "frame_weights_bkg": ("SIGMAS", ), } } RETURN_TYPES = ("AUTOMATION",) RETURN_NAMES = ("automation",) CATEGORY = "RES4LYF/legacy/sampler_extensions" FUNCTION = "main" DEPRECATED = True def main(self, automation=None, etas=None, etas_substep=None, s_noises=None, unsample_resample_scales=None, frame_weights=None, frame_weights_bkg=None): if automation is None: automation = {} frame_weights_grp = (frame_weights, frame_weights_bkg) automation['etas'] = etas automation['etas_substep'] = etas_substep automation['s_noises'] = s_noises automation['unsample_resample_scales'] = unsample_resample_scales automation['frame_weights_grp'] = frame_weights_grp return (automation, ) class ClownsharKSamplerOptions: @classmethod def INPUT_TYPES(s): return { "required": { "noise_init_stdev": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }), "noise_init_mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }), "noise_type_init": (NOISE_GENERATOR_NAMES, {"default": "gaussian"}), "noise_type_sde": (NOISE_GENERATOR_NAMES, {"default": "brownian"}), "noise_mode_sde": (["hard", "hard_var", "hard_sq", "soft", "softer", "exp"], {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "eta": ("FLOAT", {"default": 0.25, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), "s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "round": False}), "d_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "alpha_init": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.1}), "k_init": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 2}), "alpha_sde": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.1}), "k_sde": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 2}), "noise_seed": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff, "tooltip": "Seed for the SDE noise that is added after each step if eta or eta_var are non-zero. If set to -1, it will use the increment the seed most recently used by the workflow."}), "c1": ("FLOAT", {"default": 0.0, "min": -1.0, "max": 10000.0, "step": 0.01}), "c2": ("FLOAT", {"default": 0.5, "min": -1.0, "max": 10000.0, "step": 0.01}), "c3": ("FLOAT", {"default": 1.0, "min": -1.0, "max": 10000.0, "step": 0.01}), "t_fn_formula": ("STRING", {"default": "", "multiline": True}), "sigma_fn_formula": ("STRING", {"default": "", "multiline": True}), #"unsampler_type": (['linear', 'exponential', 'constant'],), }, "optional": { "options": ("OPTIONS",), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) CATEGORY = "RES4LYF/legacy/sampler_extensions" FUNCTION = "main" DEPRECATED = True def main(self, noise_init_stdev, noise_init_mean, c1, c2, c3, eta, s_noise, d_noise, noise_type_init, noise_type_sde, noise_mode_sde, noise_seed, alpha_init, k_init, alpha_sde, k_sde, t_fn_formula=None, sigma_fn_formula=None, unsampler_type="linear", alphas=None, etas=None, s_noises=None, d_noises=None, c2s=None, c3s=None, options=None, ): if options is None: options = {} options['noise_init_stdev'] = noise_init_stdev options['noise_init_mean'] = noise_init_mean options['noise_type_init'] = noise_type_init options['noise_type_sde'] = noise_type_sde options['noise_mode_sde'] = noise_mode_sde options['eta'] = eta options['s_noise'] = s_noise options['d_noise'] = d_noise options['alpha_init'] = alpha_init options['k_init'] = k_init options['alpha_sde'] = alpha_sde options['k_sde'] = k_sde options['noise_seed_sde'] = noise_seed options['c1'] = c1 options['c2'] = c2 options['c3'] = c3 options['t_fn_formula'] = t_fn_formula options['sigma_fn_formula'] = sigma_fn_formula options['unsampler_type'] = unsampler_type return (options,) class ClownOptions_SDE_Noise: @classmethod def INPUT_TYPES(s): return { "required": { "sde_noise_steps": ("INT", {"default": 1, "min": 1, "max": 10000}), }, "optional": { "sde_noise": ("LATENT",), "options" : ("OPTIONS",), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) CATEGORY = "RES4LYF/legacy/sampler_options" FUNCTION = "main" DEPRECATED = True def main(self, sde_noise_steps, sde_noise, options=None,): if options is None: options = {} options['sde_noise_steps'] = sde_noise_steps options['sde_noise'] = sde_noise return (options,) class ClownOptions_FrameWeights: @classmethod def INPUT_TYPES(s): return { "required": { "frame_weights": ("SIGMAS", ), }, "optional": { "options": ("OPTIONS",), } } DEPRECATED = True RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) CATEGORY = "RES4LYF/legacy/sampler_options" FUNCTION = "main" DEPRECATED = True def main(self, frame_weights, options=None,): if options is None: options = {} frame_weights_grp = (frame_weights, frame_weights) options['frame_weights_grp'] = frame_weights_grp return (options,) ================================================ FILE: legacy/samplers_tiled.py ================================================ # tiled sampler code adapted from https://github.com/BlenderNeko/ComfyUI_TiledKSampler # and heavily modified for use with https://github.com/ClownsharkBatwing/UltraCascade import sys import os import copy from functools import partial from tqdm.auto import tqdm import torch sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy")) import comfy.sd import comfy.controlnet import comfy.model_management import comfy.sample import comfy.sampler_helpers import latent_preview from nodes import MAX_RESOLUTION #MAX_RESOLUTION=8192 import comfy.clip_vision import folder_paths from . import tiling from .noise_classes import * def initialize_or_scale(tensor, value, steps): if tensor is None: return torch.full((steps,), value) else: return value * tensor def cv_cond(cv_out, conditioning, strength, noise_augmentation): c = [] for t in conditioning: o = t[1].copy() x = {"clip_vision_output": cv_out, "strength": strength, "noise_augmentation": noise_augmentation} if "unclip_conditioning" in o: o["unclip_conditioning"] = o["unclip_conditioning"][:] + [x] else: o["unclip_conditioning"] = [x] n = [t[0], o] c.append(n) return c def recursion_to_list(obj, attr): current = obj yield current while True: current = getattr(current, attr, None) if current is not None: yield current else: return def copy_cond(cond): return [[c1,c2.copy()] for c1,c2 in cond] def slice_cond(tile_h, tile_h_len, tile_w, tile_w_len, cond, area): tile_h_end = tile_h + tile_h_len tile_w_end = tile_w + tile_w_len coords = area[0] #h_len, w_len, h, w, mask = area[1] if coords is not None: h_len, w_len, h, w = coords h_end = h + h_len w_end = w + w_len if h < tile_h_end and h_end > tile_h and w < tile_w_end and w_end > tile_w: new_h = max(0, h - tile_h) new_w = max(0, w - tile_w) new_h_end = min(tile_h_end, h_end - tile_h) new_w_end = min(tile_w_end, w_end - tile_w) cond[1]['area'] = (new_h_end - new_h, new_w_end - new_w, new_h, new_w) else: return (cond, True) if mask is not None: new_mask = tiling.get_slice(mask, tile_h,tile_h_len,tile_w,tile_w_len) if new_mask.sum().cpu() == 0.0 and 'mask' in cond[1]: return (cond, True) else: cond[1]['mask'] = new_mask return (cond, False) def slice_gligen(tile_h, tile_h_len, tile_w, tile_w_len, cond, gligen): tile_h_end = tile_h + tile_h_len tile_w_end = tile_w + tile_w_len if gligen is None: return gligen_type = gligen[0] gligen_model = gligen[1] gligen_areas = gligen[2] gligen_areas_new = [] for emb, h_len, w_len, h, w in gligen_areas: h_end = h + h_len w_end = w + w_len if h < tile_h_end and h_end > tile_h and w < tile_w_end and w_end > tile_w: new_h = max(0, h - tile_h) new_w = max(0, w - tile_w) new_h_end = min(tile_h_end, h_end - tile_h) new_w_end = min(tile_w_end, w_end - tile_w) gligen_areas_new.append((emb, new_h_end - new_h, new_w_end - new_w, new_h, new_w)) if len(gligen_areas_new) == 0: del cond['gligen'] else: cond['gligen'] = (gligen_type, gligen_model, gligen_areas_new) def slice_cnet(h, h_len, w, w_len, model:comfy.controlnet.ControlBase, img): if img is None: img = model.cond_hint_original hint = tiling.get_slice(img, h*8, h_len*8, w*8, w_len*8) if isinstance(model, comfy.controlnet.ControlLora): model.cond_hint = hint.float().to(model.device) else: model.cond_hint = hint.to(model.control_model.dtype).to(model.device) def slices_T2I(h, h_len, w, w_len, model:comfy.controlnet.ControlBase, img): model.control_input = None if img is None: img = model.cond_hint_original model.cond_hint = tiling.get_slice(img, h*8, h_len*8, w*8, w_len*8).float().to(model.device) # TODO: refactor some of the mess def cnets_and_cnet_imgs(positive, negative, shape): # cnets cnets = [c['control'] for (_, c) in positive + negative if 'control' in c] # unroll recursion cnets = list(set([x for m in cnets for x in recursion_to_list(m, "previous_controlnet")])) # filter down to only cnets cnets = [x for x in cnets if isinstance(x, comfy.controlnet.ControlNet)] cnet_imgs = [ torch.nn.functional.interpolate(m.cond_hint_original, (shape[-2] * 8, shape[-1] * 8), mode='nearest-exact').to('cpu') if m.cond_hint_original.shape[-2] != shape[-2] * 8 or m.cond_hint_original.shape[-1] != shape[-1] * 8 else None for m in cnets] return cnets, cnet_imgs def T2Is_and_T2I_imgs(positive, negative, shape): # T2I T2Is = [c['control'] for (_, c) in positive + negative if 'control' in c] # unroll recursion T2Is = [x for m in T2Is for x in recursion_to_list(m, "previous_controlnet")] # filter down to only T2I T2Is = [x for x in T2Is if isinstance(x, comfy.controlnet.T2IAdapter)] T2I_imgs = [ torch.nn.functional.interpolate(m.cond_hint_original, (shape[-2] * 8, shape[-1] * 8), mode='nearest-exact').to('cpu') if m.cond_hint_original.shape[-2] != shape[-2] * 8 or m.cond_hint_original.shape[-1] != shape[-1] * 8 or (m.channels_in == 1 and m.cond_hint_original.shape[1] != 1) else None for m in T2Is ] T2I_imgs = [ torch.mean(img, 1, keepdim=True) if img is not None and m.channels_in == 1 and m.cond_hint_original.shape[1] else img for m, img in zip(T2Is, T2I_imgs) ] return T2Is, T2I_imgs def spatial_conds_posneg(positive, negative, shape, device): #cond area and mask spatial_conds_pos = [ (c[1]['area'] if 'area' in c[1] else None, comfy.sample.prepare_mask(c[1]['mask'], shape, device) if 'mask' in c[1] else None) for c in positive ] spatial_conds_neg = [ (c[1]['area'] if 'area' in c[1] else None, comfy.sample.prepare_mask(c[1]['mask'], shape, device) if 'mask' in c[1] else None) for c in negative ] return spatial_conds_pos, spatial_conds_neg def gligen_posneg(positive, negative): #gligen gligen_pos = [ c[1]['gligen'] if 'gligen' in c[1] else None for c in positive ] gligen_neg = [ c[1]['gligen'] if 'gligen' in c[1] else None for c in negative ] return gligen_pos, gligen_neg def cascade_tiles(x, input_x, tile_h, tile_w, tile_h_len, tile_w_len): h_cascade = input_x.shape[-2] w_cascade = input_x.shape[-1] h_samples = x.shape[-2] w_samples = x.shape[-1] tile_h_cascade = (h_cascade * tile_h) // h_samples tile_w_cascade = (w_cascade * tile_w) // w_samples tile_h_len_cascade = (h_cascade * tile_h_len) // h_samples tile_w_len_cascade = (w_cascade * tile_w_len) // w_samples return tile_h_cascade, tile_w_cascade, tile_h_len_cascade, tile_w_len_cascade def sample_common(model, x, noise, noise_mask, noise_seed, tile_width, tile_height, tiling_strategy, cfg, positive, negative, preview=False, sampler=None, sigmas=None, clip_name=None, strength=1.0, noise_augment=1.0, image_cv=None, max_tile_batch_size=3, guide=None, guide_type='residual', guide_weight=1.0, guide_weights=None, ): device = comfy.model_management.get_torch_device() steps = len(sigmas)-1 conds0 = \ {"positive": comfy.sampler_helpers.convert_cond(positive), "negative": comfy.sampler_helpers.convert_cond(negative)} conds = {} for k in conds0: conds[k] = list(map(lambda a: a.copy(), conds0[k])) modelPatches, inference_memory = comfy.sampler_helpers.get_additional_models(conds, model.model_dtype()) comfy.model_management.load_models_gpu([model] + modelPatches, model.memory_required(noise.shape) + inference_memory) if model.model.model_config.unet_config['stable_cascade_stage'] == 'up': compression = 1 guide_weight = 1.0 if guide_weight is None else guide_weight guide_type = 'residual' if guide_type is None else guide_type guide = guide['samples'] if guide is not None else None guide_weights = initialize_or_scale(guide_weights, guide_weight, 10000) patch = model.model_options.get("transformer_options", {}).get("patches_replace", {}).get("ultracascade", {}).get("main") #CHANGED HERE if patch is not None: patch.update(x_lr=guide, guide_weights=guide_weights, guide_type=guide_type) else: model = model.clone() model.model.diffusion_model.set_sigmas_prev(sigmas_prev=sigmas[:1]) model.model.diffusion_model.set_guide_weights(guide_weights=guide_weights) model.model.diffusion_model.set_guide_type(guide_type=guide_type) elif model.model.model_config.unet_config['stable_cascade_stage'] == 'c': compression = 1 elif model.model.model_config.unet_config['stable_cascade_stage'] == 'b': compression = 4 c_pos, c_neg = [], [] for t in positive: d_pos = t[1].copy() d_neg = t[1].copy() d_pos['stable_cascade_prior'] = guide['samples'] pooled_output = d_neg.get("pooled_output", None) if pooled_output is not None: d_neg["pooled_output"] = torch.zeros_like(pooled_output) c_pos.append([t[0], d_pos]) c_neg.append([torch.zeros_like(t[0]), d_neg]) positive = c_pos negative = c_neg effnet_samples = positive[0][1]['stable_cascade_prior'].clone() effnet_interpolated = nn.functional.interpolate(effnet_samples.clone().to(torch.float16).to(device), size=torch.Size((x.shape[-2] // 2, x.shape[-1] // 2,)), mode='bilinear', align_corners=True) effnet_full_map = model.model.diffusion_model.effnet_mapper(effnet_interpolated) else: compression = 8 #sd1.5, sdxl, sd3, flux, etc if image_cv is not None: #CLIP VISION LOAD clip_path = folder_paths.get_full_path("clip_vision", clip_name) clip_vision = comfy.clip_vision.load(clip_path) cnets, cnet_imgs = cnets_and_cnet_imgs (positive, negative, x.shape) T2Is, T2I_imgs = T2Is_and_T2I_imgs (positive, negative, x.shape) spatial_conds_pos, spatial_conds_neg = spatial_conds_posneg(positive, negative, x.shape, device) gligen_pos, gligen_neg = gligen_posneg (positive, negative) tile_width = min(x.shape[-1] * compression, tile_width) tile_height = min(x.shape[2] * compression, tile_height) if tiling_strategy != 'padded': if noise_mask is not None: x += sigmas[0] * noise_mask * model.model.process_latent_out(noise) else: x += sigmas[0] * model.model.process_latent_out(noise) if tiling_strategy == 'random' or tiling_strategy == 'random strict': tiles = tiling.get_tiles_and_masks_rgrid(steps, x.shape, tile_height, tile_width, torch.manual_seed(noise_seed), compression=compression) elif tiling_strategy == 'padded': tiles = tiling.get_tiles_and_masks_padded(steps, x.shape, tile_height, tile_width, compression=compression) else: tiles = tiling.get_tiles_and_masks_simple(steps, x.shape, tile_height, tile_width, compression=compression) total_steps = sum([num_steps for img_pass in tiles for steps_list in img_pass for _,_,_,_,num_steps,_ in steps_list]) current_step = [0] with tqdm(total=total_steps) as pbar_tqdm: pbar = comfy.utils.ProgressBar(total_steps) def callback(step, x0, x, total_steps, step_inc=1): current_step[0] += step_inc preview_bytes = None if preview == True: previewer = latent_preview.get_previewer(device, model.model.latent_format) preview_bytes = previewer.decode_latent_to_preview_image("JPEG", x0) pbar.update_absolute(current_step[0], preview=preview_bytes) pbar_tqdm.update(step_inc) if tiling_strategy == "random strict": x_next = x.clone() for img_pass in tiles: # img_pass is a set of non-intersecting tiles effnet_slices, effnet_map_slices, tiled_noise_list, tiled_latent_list, tiled_mask_list, tile_h_list, tile_w_list, tile_h_len_list, tile_w_len_list = [],[],[],[],[],[],[],[],[] for i in range(len(img_pass)): for iteration, (tile_h, tile_h_len, tile_w, tile_w_len, tile_steps, tile_mask) in enumerate(img_pass[i]): tiled_mask = None if noise_mask is not None: tiled_mask = tiling.get_slice(noise_mask, tile_h, tile_h_len, tile_w, tile_w_len).to(device) if tile_mask is not None: if tiled_mask is not None: tiled_mask *= tile_mask.to(device) else: tiled_mask = tile_mask.to(device) if tiling_strategy == 'padded' or tiling_strategy == 'random strict': tile_h, tile_h_len, tile_w, tile_w_len, tiled_mask = tiling.mask_at_boundary( tile_h, tile_h_len, tile_w, tile_w_len, tile_height, tile_width, x.shape[-2], x.shape[-1], tiled_mask, device, compression=compression) if tiled_mask is not None and tiled_mask.sum().cpu() == 0.0: continue tiled_latent = tiling.get_slice(x, tile_h, tile_h_len, tile_w, tile_w_len).to(device) if tiling_strategy == 'padded': tiled_noise = tiling.get_slice(noise, tile_h, tile_h_len, tile_w, tile_w_len).to(device) else: if tiled_mask is None or noise_mask is None: tiled_noise = torch.zeros_like(tiled_latent) else: tiled_noise = tiling.get_slice(noise, tile_h, tile_h_len, tile_w, tile_w_len).to(device) * (1 - tiled_mask) #TODO: all other condition based stuff like area sets and GLIGEN should also happen here #cnets for m, img in zip(cnets, cnet_imgs): slice_cnet(tile_h, tile_h_len, tile_w, tile_w_len, m, img) #T2I for m, img in zip(T2Is, T2I_imgs): slices_T2I(tile_h, tile_h_len, tile_w, tile_w_len, m, img) pos = copy.deepcopy(positive) neg = copy.deepcopy(negative) #cond areas pos = [slice_cond(tile_h, tile_h_len, tile_w, tile_w_len, c, area) for c, area in zip(pos, spatial_conds_pos)] pos = [c for c, ignore in pos if not ignore] neg = [slice_cond(tile_h, tile_h_len, tile_w, tile_w_len, c, area) for c, area in zip(neg, spatial_conds_neg)] neg = [c for c, ignore in neg if not ignore] #gligen for cond, gligen in zip(pos, gligen_pos): slice_gligen(tile_h, tile_h_len, tile_w, tile_w_len, cond, gligen) for cond, gligen in zip(neg, gligen_neg): slice_gligen(tile_h, tile_h_len, tile_w, tile_w_len, cond, gligen) start_step = i * tile_steps last_step = i * tile_steps + tile_steps if last_step is not None and last_step < (len(sigmas) - 1): sigmas = sigmas[:last_step + 1] if start_step is not None: if start_step < (len(sigmas) - 1): sigmas = sigmas[start_step:] else: if tiled_latent is not None: return tiled_latent else: return torch.zeros_like(noise) # SLICE, DICE, AND DENOISE if image_cv is not None: #slice and dice ClipVision for tiling image_cv = image_cv. permute(0,3,1,2) tile_h_cascade, tile_w_cascade, tile_h_len_cascade, tile_w_len_cascade = cascade_tiles(x, image_cv, tile_h, tile_w, tile_h_len, tile_w_len) image_slice = copy.deepcopy(image_cv) image_slice = tiling.get_slice(image_slice, tile_h_cascade, tile_h_len_cascade, tile_w_cascade, tile_w_len_cascade).to(device) image_slice = image_slice.permute(0,2,3,1) image_cv = image_cv. permute(0,2,3,1) cv_out_slice = clip_vision.encode_image(image_slice) pos = cv_cond(cv_out_slice, pos, strength, noise_augment) if model.model.model_config.unet_config['stable_cascade_stage'] == 'up': #slice and dice stage UP guide tile_h_cascade, tile_w_cascade, tile_h_len_cascade, tile_w_len_cascade = cascade_tiles(x, guide, tile_h, tile_w, tile_h_len, tile_w_len) guide_slice = copy.deepcopy(guide) guide_slice = tiling.get_slice(guide_slice.clone(), tile_h_cascade, tile_h_len_cascade, tile_w_cascade, tile_w_len_cascade).to(device) model.model.diffusion_model.set_x_lr(x_lr=guide_slice) tile_result = comfy.sample.sample_custom(model, tiled_noise, cfg, sampler, sigmas, pos, neg, tiled_latent, noise_mask=tiled_mask, callback=callback, disable_pbar=True, seed=noise_seed) elif model.model.model_config.unet_config['stable_cascade_stage'] == 'b': #slice and dice stage B conditioning tile_h_cascade, tile_w_cascade, tile_h_len_cascade, tile_w_len_cascade = cascade_tiles(x, effnet_samples.clone(), tile_h, tile_w, tile_h_len, tile_w_len) effnet_slice = tiling.get_slice(effnet_samples.clone(), tile_h_cascade, tile_h_len_cascade, tile_w_cascade, tile_w_len_cascade).to(device) effnet_slices.append(effnet_slice) tile_h_cascade, tile_w_cascade, tile_h_len_cascade, tile_w_len_cascade = cascade_tiles(x, effnet_full_map.clone(), tile_h, tile_w, tile_h_len, tile_w_len) effnet_map_slice = tiling.get_slice(effnet_full_map.clone(), tile_h_cascade, tile_h_len_cascade, tile_w_cascade, tile_w_len_cascade).to(device) effnet_map_slices.append(effnet_map_slice) else: # not stage UP or stage B, default tile_result = comfy.sample.sample_custom(model, tiled_noise, cfg, sampler, sigmas, pos, neg, tiled_latent, noise_mask=tiled_mask, callback=callback, disable_pbar=True, seed=noise_seed) if model.model.model_config.unet_config['stable_cascade_stage'] != 'b': tile_result = tile_result.cpu() if tiled_mask is not None: tiled_mask = tiled_mask.cpu() if tiling_strategy == "random strict": tiling.set_slice(x_next, tile_result, tile_h, tile_h_len, tile_w, tile_w_len, tiled_mask) else: tiling.set_slice(x, tile_result, tile_h, tile_h_len, tile_w, tile_w_len, tiled_mask) tiled_noise_list .append(tiled_noise) tiled_latent_list.append(tiled_latent) tiled_mask_list .append(tiled_mask) tile_h_list .append(tile_h) tile_w_list .append(tile_w) tile_h_len_list .append(tile_h_len) tile_w_len_list .append(tile_w_len) #END OF NON-INTERSECTING SET OF TILES if tiling_strategy == "random strict": # IS THIS ONE LEVEL OVER?? x = x_next.clone() if model.model.model_config.unet_config['stable_cascade_stage'] == 'b': for start_idx in range(0, len(tiled_latent_list), max_tile_batch_size): end_idx = start_idx + max_tile_batch_size #print("Tiled batch size: ", min(max_tile_batch_size, len(tiled_latent_list))) #end_idx - start_idx) tiled_noise_batch = torch.cat(tiled_noise_list [start_idx:end_idx]) tiled_latent_batch = torch.cat(tiled_latent_list[start_idx:end_idx]) tiled_mask_batch = torch.cat(tiled_mask_list [start_idx:end_idx]) print("Tiled batch size: ", tiled_latent_batch.shape[0]) pos[0][1]['stable_cascade_prior'] = torch.cat(effnet_slices[start_idx:end_idx]) neg[0][1]['stable_cascade_prior'] = torch.cat(effnet_slices[start_idx:end_idx]) tile_result = comfy.sample.sample_custom(model, tiled_noise_batch, cfg, sampler, sigmas, pos, neg, tiled_latent_batch, noise_mask=tiled_mask_batch, callback=partial(callback, step_inc=tiled_latent_batch.shape[0]), disable_pbar=True, seed=noise_seed) for i in range(tile_result.shape[0]): idx = start_idx + i single_tile = tile_result[i].unsqueeze(dim=0) single_mask = tiled_mask_batch[i].unsqueeze(dim=0) tiling.set_slice(x, single_tile, tile_h_list[idx], tile_h_len_list[idx], tile_w_list[idx], tile_w_len_list[idx], single_mask.cpu()) x = x.to('cpu') comfy.sampler_helpers.cleanup_additional_models(modelPatches) return x.cpu() class UltraSharkSampler_Tiled: #this is for use with https://github.com/ClownsharkBatwing/UltraCascade @classmethod def INPUT_TYPES(s): return {"required": { "add_noise": ("BOOLEAN", {"default": True}), "noise_is_latent": ("BOOLEAN", {"default": False}), "noise_type": (NOISE_GENERATOR_NAMES, ), "alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}), "k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}), "noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), "cfg": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}), "guide_type": (['residual', 'weighted'], ), "guide_weight": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": 0.01}), "tile_width": ("INT", {"default": 1024, "min": 2, "max": MAX_RESOLUTION, "step": 1}), "tile_height": ("INT", {"default": 1024, "min": 2, "max": MAX_RESOLUTION, "step": 1}), "tiling_strategy": (["padded", "random", "random strict", 'simple'], ), "max_tile_batch_size": ("INT", {"default": 64, "min": 1, "max": 256, "step": 1}), "model": ("MODEL",), "positive": ("CONDITIONING", ), "negative": ("CONDITIONING", ), "sampler": ("SAMPLER",), "sigmas": ("SIGMAS",), "latent_image": ("LATENT", ), "clip_name": (folder_paths.get_filename_list("clip_vision"), {'default': "clip-vit-large-patch14.safetensors"}), "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), "noise_augment": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), }, "optional": { "latent_noise": ("LATENT", ), "guide": ("LATENT", ), "guide_weights": ("SIGMAS",), "image_cv": ("IMAGE",), }, } RETURN_TYPES = ("LATENT",) FUNCTION = "sample" CATEGORY = "RES4LYF/legacy/samplers/ultracascade" DESCRIPTION = "For use with UltraCascade." DEPRECATED = True def sample(self, model, noise_seed, add_noise, noise_is_latent, noise_type, alpha, k, tile_width, tile_height, tiling_strategy, cfg, positive, negative, latent_image, latent_noise=None, sampler=None, sigmas=None, guide=None, clip_name=None, strength=1.0, noise_augment=1.0, image_cv=None, max_tile_batch_size=3, guide_type='residual', guide_weight=1.0, guide_weights=None, ): x = latent_image["samples"].clone() torch.manual_seed(noise_seed) if not add_noise: noise = torch.zeros(x.size(), dtype=x.dtype, layout=x.layout, device="cpu") elif latent_noise is None: skip = latent_image["batch_index"] if "batch_index" in latent_image else None noise = prepare_noise(x, noise_seed, noise_type, skip, alpha, k) else: noise = latent_noise["samples"] if noise_is_latent: #add noise and latent together and normalize --> noise noise += x.cpu() noise.sub_(noise.mean()).div_(noise.std()) noise_mask = latent_image["noise_mask"].clone() if "noise_mask" in latent_image else None latent_out = latent_image.copy() latent_out['samples'] = sample_common(model, x=x, noise=noise, noise_mask=noise_mask, noise_seed=noise_seed, tile_width=tile_width, tile_height=tile_height, tiling_strategy=tiling_strategy, cfg=cfg, positive=positive, negative=negative, preview=True, sampler=sampler, sigmas=sigmas, clip_name=clip_name, strength=strength, noise_augment=noise_augment, image_cv=image_cv, max_tile_batch_size=max_tile_batch_size, guide=guide, guide_type=guide_type, guide_weight=guide_weight, guide_weights=guide_weights, ) return (latent_out,) ================================================ FILE: legacy/sigmas.py ================================================ import torch import numpy as np from math import * import builtins from scipy.interpolate import CubicSpline import torch.nn.functional as F import torch.nn as nn import torch.optim as optim from comfy.k_diffusion.sampling import get_sigmas_polyexponential, get_sigmas_karras import comfy.samplers def rescale_linear(input, input_min, input_max, output_min, output_max): output = ((input - input_min) / (input_max - input_min)) * (output_max - output_min) + output_min; return output class set_precision_sigmas: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", ), "precision": (["16", "32", "64"], ), "set_default": ("BOOLEAN", {"default": False}) }, } RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("passthrough",) CATEGORY = "RES4LYF/precision" FUNCTION = "main" def main(self, precision="32", sigmas=None, set_default=False): match precision: case "16": if set_default is True: torch.set_default_dtype(torch.float16) sigmas = sigmas.to(torch.float16) case "32": if set_default is True: torch.set_default_dtype(torch.float32) sigmas = sigmas.to(torch.float32) case "64": if set_default is True: torch.set_default_dtype(torch.float64) sigmas = sigmas.to(torch.float64) return (sigmas, ) class SimpleInterpolator(nn.Module): def __init__(self): super(SimpleInterpolator, self).__init__() self.net = nn.Sequential( nn.Linear(1, 16), nn.ReLU(), nn.Linear(16, 32), nn.ReLU(), nn.Linear(32, 1) ) def forward(self, x): return self.net(x) def train_interpolator(model, sigma_schedule, steps, epochs=5000, lr=0.01): with torch.inference_mode(False): model = SimpleInterpolator() sigma_schedule = sigma_schedule.clone() criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=lr) x_train = torch.linspace(0, 1, steps=steps).unsqueeze(1) y_train = sigma_schedule.unsqueeze(1) # disable inference mode for training model.train() for epoch in range(epochs): optimizer.zero_grad() # fwd pass outputs = model(x_train) loss = criterion(outputs, y_train) loss.backward() optimizer.step() return model def interpolate_sigma_schedule_model(sigma_schedule, target_steps): model = SimpleInterpolator() sigma_schedule = sigma_schedule.float().detach() # train on original sigma schedule trained_model = train_interpolator(model, sigma_schedule, len(sigma_schedule)) # generate target steps for interpolation x_interpolated = torch.linspace(0, 1, target_steps).unsqueeze(1) # inference w/o gradients trained_model.eval() with torch.no_grad(): interpolated_sigma = trained_model(x_interpolated).squeeze() return interpolated_sigma class sigmas_interpolate: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_0": ("SIGMAS", {"forceInput": True}), "sigmas_1": ("SIGMAS", {"forceInput": True}), "mode": (["linear", "nearest", "polynomial", "exponential", "power", "model"],), "order": ("INT", {"default": 8, "min": 1,"max": 64,"step": 1}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS","SIGMAS",) RETURN_NAMES = ("sigmas_0", "sigmas_1") CATEGORY = "RES4LYF/sigmas" def interpolate_sigma_schedule_poly(self, sigma_schedule, target_steps): order = self.order sigma_schedule_np = sigma_schedule.cpu().numpy() # orig steps (assuming even spacing) original_steps = np.linspace(0, 1, len(sigma_schedule_np)) # fit polynomial of the given order coefficients = np.polyfit(original_steps, sigma_schedule_np, deg=order) # generate new steps where we want to interpolate the data target_steps_np = np.linspace(0, 1, target_steps) # eval polynomial at new steps interpolated_sigma_np = np.polyval(coefficients, target_steps_np) interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype) return interpolated_sigma def interpolate_sigma_schedule_constrained(self, sigma_schedule, target_steps): sigma_schedule_np = sigma_schedule.cpu().numpy() # orig steps original_steps = np.linspace(0, 1, len(sigma_schedule_np)) # target steps for interpolation target_steps_np = np.linspace(0, 1, target_steps) # fit cubic spline with fixed start and end values cs = CubicSpline(original_steps, sigma_schedule_np, bc_type=((1, 0.0), (1, 0.0))) # eval spline at the target steps interpolated_sigma_np = cs(target_steps_np) interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype) return interpolated_sigma def interpolate_sigma_schedule_exp(self, sigma_schedule, target_steps): # transform to log space log_sigma_schedule = torch.log(sigma_schedule) # define the original and target step ranges original_steps = torch.linspace(0, 1, steps=len(sigma_schedule)) target_steps = torch.linspace(0, 1, steps=target_steps) # interpolate in log space interpolated_log_sigma = F.interpolate( log_sigma_schedule.unsqueeze(0).unsqueeze(0), # Add fake batch and channel dimensions size=target_steps.shape[0], mode='linear', align_corners=True ).squeeze() # transform back to exponential space interpolated_sigma_schedule = torch.exp(interpolated_log_sigma) return interpolated_sigma_schedule def interpolate_sigma_schedule_power(self, sigma_schedule, target_steps): sigma_schedule_np = sigma_schedule.cpu().numpy() original_steps = np.linspace(1, len(sigma_schedule_np), len(sigma_schedule_np)) # power regression using a log-log transformation log_x = np.log(original_steps) log_y = np.log(sigma_schedule_np) # linear regression on log-log data coefficients = np.polyfit(log_x, log_y, deg=1) # degree 1 for linear fit in log-log space a = np.exp(coefficients[1]) # a = "b" = intercept (exp because of the log transform) b = coefficients[0] # b = "m" = slope target_steps_np = np.linspace(1, len(sigma_schedule_np), target_steps) # power law prediction: y = a * x^b interpolated_sigma_np = a * (target_steps_np ** b) interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype) return interpolated_sigma def interpolate_sigma_schedule_linear(self, sigma_schedule, target_steps): return F.interpolate(sigma_schedule.unsqueeze(0).unsqueeze(0), target_steps, mode='linear').squeeze(0).squeeze(0) def interpolate_sigma_schedule_nearest(self, sigma_schedule, target_steps): return F.interpolate(sigma_schedule.unsqueeze(0).unsqueeze(0), target_steps, mode='nearest').squeeze(0).squeeze(0) def interpolate_nearest_neighbor(self, sigma_schedule, target_steps): original_steps = torch.linspace(0, 1, steps=len(sigma_schedule)) target_steps = torch.linspace(0, 1, steps=target_steps) # interpolate original -> target steps using nearest neighbor indices = torch.searchsorted(original_steps, target_steps) indices = torch.clamp(indices, 0, len(sigma_schedule) - 1) # clamp indices to valid range # set nearest neighbor via indices interpolated_sigma = sigma_schedule[indices] return interpolated_sigma def main(self, sigmas_0, sigmas_1, mode, order): self.order = order if mode == "linear": interpolate = self.interpolate_sigma_schedule_linear if mode == "nearest": interpolate = self.interpolate_nearest_neighbor elif mode == "polynomial": interpolate = self.interpolate_sigma_schedule_poly elif mode == "exponential": interpolate = self.interpolate_sigma_schedule_exp elif mode == "power": interpolate = self.interpolate_sigma_schedule_power elif mode == "model": with torch.inference_mode(False): interpolate = interpolate_sigma_schedule_model sigmas_0 = interpolate(sigmas_0, len(sigmas_1)) return (sigmas_0, sigmas_1,) class sigmas_noise_inversion: # flip sigmas for unsampling, and pad both fwd/rev directions with null bytes to disable noise scaling, etc from the model. # will cause model to return epsilon prediction instead of calculated denoised latent image. def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS","SIGMAS",) RETURN_NAMES = ("sigmas_fwd","sigmas_rev",) CATEGORY = "RES4LYF/sigmas" DESCRIPTION = "For use with unsampling. Connect sigmas_fwd to the unsampling (first) node, and sigmas_rev to the sampling (second) node." def main(self, sigmas): sigmas = sigmas.clone().to(torch.float64) null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype) sigmas_fwd = torch.flip(sigmas, dims=[0]) sigmas_fwd = torch.cat([sigmas_fwd, null]) sigmas_rev = torch.cat([null, sigmas]) sigmas_rev = torch.cat([sigmas_rev, null]) return (sigmas_fwd, sigmas_rev,) def compute_sigma_next_variance_floor(sigma): return (-1 + torch.sqrt(1 + 4 * sigma)) / 2 class sigmas_variance_floor: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" DESCRIPTION = ("Process a sigma schedule so that any steps that are too large for variance-locked SDE sampling are replaced with the maximum permissible value." "Will be very difficult to approach sigma = 0 due to the nature of the math, as steps become very small much below approximately sigma = 0.15 to 0.2.") def main(self, sigmas): dtype = sigmas.dtype sigmas = sigmas.clone().to(torch.float64) for i in range(len(sigmas) - 1): sigma_next = (-1 + torch.sqrt(1 + 4 * sigmas[i])) / 2 if sigmas[i+1] < sigma_next and sigmas[i+1] > 0.0: print("swapped i+1 with sigma_next+0.001: ", sigmas[i+1], sigma_next + 0.001) sigmas[i+1] = sigma_next + 0.001 return (sigmas.to(dtype),) class sigmas_from_text: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "text": ("STRING", {"default": "", "multiline": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) CATEGORY = "RES4LYF/sigmas" def main(self, text): text_list = [float(val) for val in text.replace(",", " ").split()] #text_list = [float(val.strip()) for val in text.split(",")] sigmas = torch.tensor(text_list).to('cuda').to(torch.float64) return (sigmas,) class sigmas_concatenate: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_1": ("SIGMAS", {"forceInput": True}), "sigmas_2": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas_1, sigmas_2): return (torch.cat((sigmas_1, sigmas_2)),) class sigmas_truncate: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "sigmas_until": ("INT", {"default": 10, "min": 0,"max": 1000,"step": 1}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, sigmas_until): return (sigmas[:sigmas_until],) class sigmas_start: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "sigmas_until": ("INT", {"default": 10, "min": 0,"max": 1000,"step": 1}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, sigmas_until): return (sigmas[sigmas_until:],) class sigmas_split: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "sigmas_start": ("INT", {"default": 0, "min": 0,"max": 1000,"step": 1}), "sigmas_end": ("INT", {"default": 1000, "min": 0,"max": 1000,"step": 1}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, sigmas_start, sigmas_end): return (sigmas[sigmas_start:sigmas_end],) sigmas_stop_step = sigmas_end - sigmas_start return (sigmas[sigmas_start:][:sigmas_stop_step],) class sigmas_pad: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "value": ("FLOAT", {"default": 0.0, "min": -10000,"max": 10000,"step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, value): return (torch.cat((sigmas, torch.tensor([value], dtype=sigmas.dtype))),) class sigmas_unpad: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas): return (sigmas[:-1],) class sigmas_set_floor: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), "new_floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}) } } RETURN_TYPES = ("SIGMAS",) FUNCTION = "set_floor" CATEGORY = "RES4LYF/sigmas" def set_floor(self, sigmas, floor, new_floor): sigmas[sigmas <= floor] = new_floor return (sigmas,) class sigmas_delete_below_floor: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}) } } RETURN_TYPES = ("SIGMAS",) FUNCTION = "delete_below_floor" CATEGORY = "RES4LYF/sigmas" def delete_below_floor(self, sigmas, floor): return (sigmas[sigmas >= floor],) class sigmas_delete_value: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "value": ("FLOAT", {"default": 0.0, "min": -1000,"max": 1000,"step": 0.01}) } } RETURN_TYPES = ("SIGMAS",) FUNCTION = "delete_value" CATEGORY = "RES4LYF/sigmas" def delete_value(self, sigmas, value): return (sigmas[sigmas != value],) class sigmas_delete_consecutive_duplicates: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_1": ("SIGMAS", {"forceInput": True}) } } RETURN_TYPES = ("SIGMAS",) FUNCTION = "delete_consecutive_duplicates" CATEGORY = "RES4LYF/sigmas" def delete_consecutive_duplicates(self, sigmas_1): mask = sigmas_1[:-1] != sigmas_1[1:] mask = torch.cat((mask, torch.tensor([True]))) return (sigmas_1[mask],) class sigmas_cleanup: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "sigmin": ("FLOAT", {"default": 0.0291675, "min": 0,"max": 1000,"step": 0.01}) } } RETURN_TYPES = ("SIGMAS",) FUNCTION = "cleanup" CATEGORY = "RES4LYF/sigmas" def cleanup(self, sigmas, sigmin): sigmas_culled = sigmas[sigmas >= sigmin] mask = sigmas_culled[:-1] != sigmas_culled[1:] mask = torch.cat((mask, torch.tensor([True]))) filtered_sigmas = sigmas_culled[mask] return (torch.cat((filtered_sigmas,torch.tensor([0]))),) class sigmas_mult: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "multiplier": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}) }, "optional": { "sigmas2": ("SIGMAS", {"forceInput": False}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, multiplier, sigmas2=None): if sigmas2 is not None: return (sigmas * sigmas2 * multiplier,) else: return (sigmas * multiplier,) class sigmas_modulus: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "divisor": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, divisor): return (sigmas % divisor,) class sigmas_quotient: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "divisor": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, divisor): return (sigmas // divisor,) class sigmas_add: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "addend": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, addend): return (sigmas + addend,) class sigmas_power: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "power": ("FLOAT", {"default": 1, "min": -100,"max": 100,"step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, power): return (sigmas ** power,) class sigmas_abs: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas): return (abs(sigmas),) class sigmas2_mult: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_1": ("SIGMAS", {"forceInput": True}), "sigmas_2": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas_1, sigmas_2): return (sigmas_1 * sigmas_2,) class sigmas2_add: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_1": ("SIGMAS", {"forceInput": True}), "sigmas_2": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas_1, sigmas_2): return (sigmas_1 + sigmas_2,) class sigmas_rescale: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "start": ("FLOAT", {"default": 1.0, "min": -10000,"max": 10000,"step": 0.01}), "end": ("FLOAT", {"default": 0.0, "min": -10000,"max": 10000,"step": 0.01}), "sigmas": ("SIGMAS", ), }, "optional": { } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas_rescaled",) CATEGORY = "RES4LYF/sigmas" DESCRIPTION = ("Can be used to set denoise. Results are generally better than with the approach used by KSampler and most nodes with denoise values " "(which slice the sigmas schedule according to step count, not the noise level). Will also flip the sigma schedule if the start and end values are reversed." ) def main(self, start=0, end=-1, sigmas=None): s_out_1 = ((sigmas - sigmas.min()) * (start - end)) / (sigmas.max() - sigmas.min()) + end return (s_out_1,) class sigmas_math1: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "start": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}), "stop": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}), "trim": ("INT", {"default": 0, "min": -10000,"max": 0,"step": 1}), "x": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "y": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "z": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "f1": ("STRING", {"default": "s", "multiline": True}), "rescale" : ("BOOLEAN", {"default": False}), "max1": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}), "min1": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), }, "optional": { "a": ("SIGMAS", {"forceInput": False}), "b": ("SIGMAS", {"forceInput": False}), "c": ("SIGMAS", {"forceInput": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, start=0, stop=0, trim=0, a=None, b=None, c=None, x=1.0, y=1.0, z=1.0, f1="s", rescale=False, min1=1.0, max1=1.0): if stop == 0: t_lens = [len(tensor) for tensor in [a, b, c] if tensor is not None] t_len = stop = min(t_lens) if t_lens else 0 else: stop = stop + 1 t_len = stop - start stop = stop + trim t_len = t_len + trim t_a = t_b = t_c = None if a is not None: t_a = a[start:stop] if b is not None: t_b = b[start:stop] if c is not None: t_c = c[start:stop] t_s = torch.arange(0.0, t_len) t_x = torch.full((t_len,), x) t_y = torch.full((t_len,), y) t_z = torch.full((t_len,), z) eval_namespace = {"__builtins__": None, "round": builtins.round, "np": np, "a": t_a, "b": t_b, "c": t_c, "x": t_x, "y": t_y, "z": t_z, "s": t_s, "torch": torch} eval_namespace.update(np.__dict__) s_out_1 = eval(f1, eval_namespace) if rescale == True: s_out_1 = ((s_out_1 - min(s_out_1)) * (max1 - min1)) / (max(s_out_1) - min(s_out_1)) + min1 return (s_out_1,) class sigmas_math3: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "start": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}), "stop": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}), "trim": ("INT", {"default": 0, "min": -10000,"max": 0,"step": 1}), }, "optional": { "a": ("SIGMAS", {"forceInput": False}), "b": ("SIGMAS", {"forceInput": False}), "c": ("SIGMAS", {"forceInput": False}), "x": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "y": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "z": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "f1": ("STRING", {"default": "s", "multiline": True}), "rescale1" : ("BOOLEAN", {"default": False}), "max1": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}), "min1": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), "f2": ("STRING", {"default": "s", "multiline": True}), "rescale2" : ("BOOLEAN", {"default": False}), "max2": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}), "min2": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), "f3": ("STRING", {"default": "s", "multiline": True}), "rescale3" : ("BOOLEAN", {"default": False}), "max3": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}), "min3": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS","SIGMAS","SIGMAS") CATEGORY = "RES4LYF/sigmas" def main(self, start=0, stop=0, trim=0, a=None, b=None, c=None, x=1.0, y=1.0, z=1.0, f1="s", f2="s", f3="s", rescale1=False, rescale2=False, rescale3=False, min1=1.0, max1=1.0, min2=1.0, max2=1.0, min3=1.0, max3=1.0): if stop == 0: t_lens = [len(tensor) for tensor in [a, b, c] if tensor is not None] t_len = stop = min(t_lens) if t_lens else 0 else: stop = stop + 1 t_len = stop - start stop = stop + trim t_len = t_len + trim t_a = t_b = t_c = None if a is not None: t_a = a[start:stop] if b is not None: t_b = b[start:stop] if c is not None: t_c = c[start:stop] t_s = torch.arange(0.0, t_len) t_x = torch.full((t_len,), x) t_y = torch.full((t_len,), y) t_z = torch.full((t_len,), z) eval_namespace = {"__builtins__": None, "np": np, "a": t_a, "b": t_b, "c": t_c, "x": t_x, "y": t_y, "z": t_z, "s": t_s, "torch": torch} eval_namespace.update(np.__dict__) s_out_1 = eval(f1, eval_namespace) s_out_2 = eval(f2, eval_namespace) s_out_3 = eval(f3, eval_namespace) if rescale1 == True: s_out_1 = ((s_out_1 - min(s_out_1)) * (max1 - min1)) / (max(s_out_1) - min(s_out_1)) + min1 if rescale2 == True: s_out_2 = ((s_out_2 - min(s_out_2)) * (max2 - min2)) / (max(s_out_2) - min(s_out_2)) + min2 if rescale3 == True: s_out_3 = ((s_out_3 - min(s_out_3)) * (max3 - min3)) / (max(s_out_3) - min(s_out_3)) + min3 return s_out_1, s_out_2, s_out_3 class sigmas_iteration_karras: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps_up": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}), "steps_down": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}), "rho_up": ("FLOAT", {"default": 3, "min": -10000,"max": 10000,"step": 0.01}), "rho_down": ("FLOAT", {"default": 4, "min": -10000,"max": 10000,"step": 0.01}), "s_min_start": ("FLOAT", {"default":0.0291675, "min": -10000,"max": 10000,"step": 0.01}), "s_max": ("FLOAT", {"default": 2, "min": -10000,"max": 10000,"step": 0.01}), "s_min_end": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), }, "optional": { "momentums": ("SIGMAS", {"forceInput": False}), "sigmas": ("SIGMAS", {"forceInput": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS","SIGMAS") RETURN_NAMES = ("momentums","sigmas") CATEGORY = "RES4LYF/schedulers" def main(self, steps_up, steps_down, rho_up, rho_down, s_min_start, s_max, s_min_end, sigmas=None, momentums=None): s_up = get_sigmas_karras(steps_up, s_min_start, s_max, rho_up) s_down = get_sigmas_karras(steps_down, s_min_end, s_max, rho_down) s_up = s_up[:-1] s_down = s_down[:-1] s_up = torch.flip(s_up, dims=[0]) sigmas_new = torch.cat((s_up, s_down), dim=0) momentums_new = torch.cat((s_up, -1*s_down), dim=0) if sigmas is not None: sigmas = torch.cat([sigmas, sigmas_new]) else: sigmas = sigmas_new if momentums is not None: momentums = torch.cat([momentums, momentums_new]) else: momentums = momentums_new return (momentums,sigmas) class sigmas_iteration_polyexp: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps_up": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}), "steps_down": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}), "rho_up": ("FLOAT", {"default": 0.6, "min": -10000,"max": 10000,"step": 0.01}), "rho_down": ("FLOAT", {"default": 0.8, "min": -10000,"max": 10000,"step": 0.01}), "s_min_start": ("FLOAT", {"default":0.0291675, "min": -10000,"max": 10000,"step": 0.01}), "s_max": ("FLOAT", {"default": 2, "min": -10000,"max": 10000,"step": 0.01}), "s_min_end": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), }, "optional": { "momentums": ("SIGMAS", {"forceInput": False}), "sigmas": ("SIGMAS", {"forceInput": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS","SIGMAS") RETURN_NAMES = ("momentums","sigmas") CATEGORY = "RES4LYF/schedulers" def main(self, steps_up, steps_down, rho_up, rho_down, s_min_start, s_max, s_min_end, sigmas=None, momentums=None): s_up = get_sigmas_polyexponential(steps_up, s_min_start, s_max, rho_up) s_down = get_sigmas_polyexponential(steps_down, s_min_end, s_max, rho_down) s_up = s_up[:-1] s_down = s_down[:-1] s_up = torch.flip(s_up, dims=[0]) sigmas_new = torch.cat((s_up, s_down), dim=0) momentums_new = torch.cat((s_up, -1*s_down), dim=0) if sigmas is not None: sigmas = torch.cat([sigmas, sigmas_new]) else: sigmas = sigmas_new if momentums is not None: momentums = torch.cat([momentums, momentums_new]) else: momentums = momentums_new return (momentums,sigmas) class tan_scheduler: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 20, "min": 0,"max": 100000,"step": 1}), "offset": ("FLOAT", {"default": 20, "min": 0,"max": 100000,"step": 0.1}), "slope": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}), "start": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}), "end": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}), "sgm" : ("BOOLEAN", {"default": False}), "pad" : ("BOOLEAN", {"default": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/schedulers" def main(self, steps, slope, offset, start, end, sgm, pad): smax = ((2/pi)*atan(-slope*(0-offset))+1)/2 smin = ((2/pi)*atan(-slope*((steps-1)-offset))+1)/2 srange = smax-smin sscale = start - end if sgm: steps+=1 sigmas = [ ( (((2/pi)*atan(-slope*(x-offset))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)] if sgm: sigmas = sigmas[:-1] if pad: sigmas = torch.tensor(sigmas+[0]) else: sigmas = torch.tensor(sigmas) return (sigmas,) class tan_scheduler_2stage: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}), "midpoint": ("INT", {"default": 20, "min": 0,"max": 100000,"step": 1}), "pivot_1": ("INT", {"default": 10, "min": 0,"max": 100000,"step": 1}), "pivot_2": ("INT", {"default": 30, "min": 0,"max": 100000,"step": 1}), "slope_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.1}), "slope_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.1}), "start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.1}), "middle": ("FLOAT", {"default": 0.5, "min": -100000,"max": 100000,"step": 0.1}), "end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.1}), "pad" : ("BOOLEAN", {"default": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) CATEGORY = "RES4LYF/schedulers" def get_tan_sigmas(self, steps, slope, pivot, start, end): smax = ((2/pi)*atan(-slope*(0-pivot))+1)/2 smin = ((2/pi)*atan(-slope*((steps-1)-pivot))+1)/2 srange = smax-smin sscale = start - end sigmas = [ ( (((2/pi)*atan(-slope*(x-pivot))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)] return sigmas def main(self, steps, midpoint, start, middle, end, pivot_1, pivot_2, slope_1, slope_2, pad): steps += 2 stage_2_len = steps - midpoint stage_1_len = steps - stage_2_len tan_sigmas_1 = self.get_tan_sigmas(stage_1_len, slope_1, pivot_1, start, middle) tan_sigmas_2 = self.get_tan_sigmas(stage_2_len, slope_2, pivot_2 - stage_1_len, middle, end) tan_sigmas_1 = tan_sigmas_1[:-1] if pad: tan_sigmas_2 = tan_sigmas_2+[0] tan_sigmas = torch.tensor(tan_sigmas_1 + tan_sigmas_2) return (tan_sigmas,) class tan_scheduler_2stage_simple: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}), "pivot_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}), "pivot_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}), "slope_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}), "slope_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}), "start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}), "middle": ("FLOAT", {"default": 0.5, "min": -100000,"max": 100000,"step": 0.01}), "end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.01}), "pad" : ("BOOLEAN", {"default": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) CATEGORY = "RES4LYF/schedulers" def get_tan_sigmas(self, steps, slope, pivot, start, end): smax = ((2/pi)*atan(-slope*(0-pivot))+1)/2 smin = ((2/pi)*atan(-slope*((steps-1)-pivot))+1)/2 srange = smax-smin sscale = start - end sigmas = [ ( (((2/pi)*atan(-slope*(x-pivot))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)] return sigmas def main(self, steps, start, middle, end, pivot_1, pivot_2, slope_1, slope_2, pad): steps += 2 midpoint = int( (steps*pivot_1 + steps*pivot_2) / 2 ) pivot_1 = int(steps * pivot_1) pivot_2 = int(steps * pivot_2) slope_1 = slope_1 / (steps/40) slope_2 = slope_2 / (steps/40) stage_2_len = steps - midpoint stage_1_len = steps - stage_2_len tan_sigmas_1 = self.get_tan_sigmas(stage_1_len, slope_1, pivot_1, start, middle) tan_sigmas_2 = self.get_tan_sigmas(stage_2_len, slope_2, pivot_2 - stage_1_len, middle, end) tan_sigmas_1 = tan_sigmas_1[:-1] if pad: tan_sigmas_2 = tan_sigmas_2+[0] tan_sigmas = torch.tensor(tan_sigmas_1 + tan_sigmas_2) return (tan_sigmas,) class linear_quadratic_advanced: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "model": ("MODEL",), "steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}), "denoise": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}), "inflection_percent": ("FLOAT", {"default": 0.5, "min": 0,"max": 1,"step": 0.01}), }, # "optional": { # } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) CATEGORY = "RES4LYF/schedulers" def main(self, steps, denoise, inflection_percent, model=None): sigmas = get_sigmas(model, "linear_quadratic", steps, denoise, inflection_percent) return (sigmas, ) class constant_scheduler: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}), "value_start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}), "value_end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.01}), "cutoff_percent": ("FLOAT", {"default": 1.0, "min": 0,"max": 1,"step": 0.01}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) CATEGORY = "RES4LYF/schedulers" def main(self, steps, value_start, value_end, cutoff_percent): sigmas = torch.ones(steps + 1) * value_start cutoff_step = int(round(steps * cutoff_percent)) + 1 sigmas = torch.concat((sigmas[:cutoff_step], torch.ones(steps + 1 - cutoff_step) * value_end), dim=0) return (sigmas,) def get_sigmas_simple_exponential(model, steps): s = model.model_sampling sigs = [] ss = len(s.sigmas) / steps for x in range(steps): sigs += [float(s.sigmas[-(1 + int(x * ss))])] sigs += [0.0] sigs = torch.FloatTensor(sigs) exp = torch.exp(torch.log(torch.linspace(1, 0, steps + 1))) return sigs * exp extra_schedulers = { "simple_exponential": get_sigmas_simple_exponential } def get_sigmas(model, scheduler, steps, denoise, lq_inflection_percent=0.5): #adapted from comfyui total_steps = steps if denoise < 1.0: if denoise <= 0.0: return (torch.FloatTensor([]),) total_steps = int(steps/denoise) #model_sampling = model.get_model_object("model_sampling") if hasattr(model, "model"): model_sampling = model.model.model_sampling elif hasattr(model, "inner_model"): model_sampling = model.inner_model.inner_model.model_sampling if scheduler == "beta57": sigmas = comfy.samplers.beta_scheduler(model_sampling, total_steps, alpha=0.5, beta=0.7) elif scheduler == "linear_quadratic": linear_steps = int(total_steps * lq_inflection_percent) sigmas = comfy.samplers.linear_quadratic_schedule(model_sampling, total_steps, threshold_noise=0.025, linear_steps=linear_steps) else: sigmas = comfy.samplers.calculate_sigmas(model_sampling, scheduler, total_steps).cpu() sigmas = sigmas[-(steps + 1):] return sigmas ================================================ FILE: legacy/tiling.py ================================================ import torch import itertools import numpy as np # tiled sampler code adapted from https://github.com/BlenderNeko/ComfyUI_TiledKSampler # for use with https://github.com/ClownsharkBatwing/UltraCascade def grouper(n, iterable): it = iter(iterable) while True: chunk = list(itertools.islice(it, n)) if not chunk: return yield chunk def create_batches(n, iterable): groups = itertools.groupby(iterable, key= lambda x: (x[1], x[3])) for _, x in groups: for y in grouper(n, x): yield y def get_slice(tensor, h, h_len, w, w_len): t = tensor.narrow(-2, h, h_len) t = t.narrow(-1, w, w_len) return t def set_slice(tensor1,tensor2, h, h_len, w, w_len, mask=None): if mask is not None: tensor1[:,:,h:h+h_len,w:w+w_len] = tensor1[:,:,h:h+h_len,w:w+w_len] * (1 - mask) + tensor2 * mask else: tensor1[:,:,h:h+h_len,w:w+w_len] = tensor2 def get_tiles_and_masks_simple(steps, latent_shape, tile_height, tile_width, compression=4): latent_size_h = latent_shape[-2] latent_size_w = latent_shape[-1] tile_size_h = int(tile_height // compression) #CHANGED FROM 8 tile_size_w = int(tile_width // compression) #CHANGED FROM 8 h = np.arange(0,latent_size_h, tile_size_h) w = np.arange(0,latent_size_w, tile_size_w) def create_tile(hs, ws, i, j): h = int(hs[i]) w = int(ws[j]) h_len = min(tile_size_h, latent_size_h - h) w_len = min(tile_size_w, latent_size_w - w) return (h, h_len, w, w_len, steps, None) passes = [ [[create_tile(h, w, i, j) for i in range(len(h)) for j in range(len(w))]], ] return passes def get_tiles_and_masks_padded(steps, latent_shape, tile_height, tile_width, compression=4): batch_size = latent_shape[0] latent_size_h = latent_shape[-2] latent_size_w = latent_shape[-1] tile_size_h = int(tile_height // compression) #CHANGED FROM 8 tile_size_w = int(tile_width // compression) #CHANGED FROM 8 #if compression > 1: tile_size_h = int((tile_size_h // 4) * 4) #MIGHT BE A PROBLEM WITH STAGE C? tile_size_w = int((tile_size_w // 4) * 4) #masks mask_h = [0,tile_size_h // 4, tile_size_h - tile_size_h // 4, tile_size_h] mask_w = [0,tile_size_w // 4, tile_size_w - tile_size_w // 4, tile_size_w] masks = [[] for _ in range(3)] for i in range(3): for j in range(3): mask = torch.zeros((batch_size,1,tile_size_h, tile_size_w), dtype=torch.float32, device='cpu') mask[:,:, mask_h[i]:mask_h[i+1], mask_w[j]:mask_w[j+1]] = 1.0 masks[i].append(mask) def create_mask(h_ind, w_ind, h_ind_max, w_ind_max, mask_h, mask_w, h_len, w_len): mask = masks[1][1] if not (h_ind == 0 or h_ind == h_ind_max or w_ind == 0 or w_ind == w_ind_max): return get_slice(mask, 0, h_len, 0, w_len) mask = mask.clone() if h_ind == 0 and mask_h: mask += masks[0][1] if h_ind == h_ind_max and mask_h: mask += masks[2][1] if w_ind == 0 and mask_w: mask += masks[1][0] if w_ind == w_ind_max and mask_w: mask += masks[1][2] if h_ind == 0 and w_ind == 0 and mask_h and mask_w: mask += masks[0][0] if h_ind == 0 and w_ind == w_ind_max and mask_h and mask_w: mask += masks[0][2] if h_ind == h_ind_max and w_ind == 0 and mask_h and mask_w: mask += masks[2][0] if h_ind == h_ind_max and w_ind == w_ind_max and mask_h and mask_w: mask += masks[2][2] return get_slice(mask, 0, h_len, 0, w_len) h = np.arange(0,latent_size_h, tile_size_h) h_shift = np.arange(tile_size_h // 2, latent_size_h - tile_size_h // 2, tile_size_h) w = np.arange(0,latent_size_w, tile_size_w) w_shift = np.arange(tile_size_w // 2, latent_size_w - tile_size_h // 2, tile_size_w) def create_tile(hs, ws, mask_h, mask_w, i, j): h = int(hs[i]) w = int(ws[j]) h_len = min(tile_size_h, latent_size_h - h) w_len = min(tile_size_w, latent_size_w - w) mask = create_mask(i,j,len(hs)-1, len(ws)-1, mask_h, mask_w, h_len, w_len) return (h, h_len, w, w_len, steps, mask) passes = [ [[create_tile(h, w, True, True, i, j) for i in range(len(h)) for j in range(len(w))]], [[create_tile(h_shift, w, False, True, i, j) for i in range(len(h_shift)) for j in range(len(w))]], [[create_tile(h, w_shift, True, False, i, j) for i in range(len(h)) for j in range(len(w_shift))]], [[create_tile(h_shift, w_shift, False, False, i,j) for i in range(len(h_shift)) for j in range(len(w_shift))]], ] return passes def mask_at_boundary(h, h_len, w, w_len, tile_size_h, tile_size_w, latent_size_h, latent_size_w, mask, device='cpu', compression=4): tile_size_h = int(tile_size_h // compression) #CHANGED FROM 8 tile_size_w = int(tile_size_w // compression) #CHANGED FROM 8 if (h_len == tile_size_h or h_len == latent_size_h) and (w_len == tile_size_w or w_len == latent_size_w): return h, h_len, w, w_len, mask h_offset = min(0, latent_size_h - (h + tile_size_h)) w_offset = min(0, latent_size_w - (w + tile_size_w)) new_mask = torch.zeros((1,1,tile_size_h, tile_size_w), dtype=torch.float32, device=device) new_mask[:,:,-h_offset:h_len if h_offset == 0 else tile_size_h, -w_offset:w_len if w_offset == 0 else tile_size_w] = 1.0 if mask is None else mask return h + h_offset, tile_size_h, w + w_offset, tile_size_w, new_mask def get_tiles_and_masks_rgrid(steps, latent_shape, tile_height, tile_width, generator, compression=4): def calc_coords(latent_size, tile_size, jitter): tile_coords = int((latent_size + jitter - 1) // tile_size + 1) tile_coords = [np.clip(tile_size * c - jitter, 0, latent_size) for c in range(tile_coords + 1)] tile_coords = [(c1, c2-c1) for c1, c2 in zip(tile_coords, tile_coords[1:])] return tile_coords #calc stuff batch_size = latent_shape[0] latent_size_h = latent_shape[-2] latent_size_w = latent_shape[-1] tile_size_h = int(tile_height // compression) #CHANGED FROM 8 tile_size_w = int(tile_width // compression) #CHANGED FROM 8 tiles_all = [] for s in range(steps): rands = torch.rand((2,), dtype=torch.float32, generator=generator, device='cpu').numpy() jitter_w1 = int(rands[0] * tile_size_w) jitter_w2 = int(((rands[0] + .5) % 1.0) * tile_size_w) jitter_h1 = int(rands[1] * tile_size_h) jitter_h2 = int(((rands[1] + .5) % 1.0) * tile_size_h) #calc number of tiles tiles_h = [ calc_coords(latent_size_h, tile_size_h, jitter_h1), calc_coords(latent_size_h, tile_size_h, jitter_h2) ] tiles_w = [ calc_coords(latent_size_w, tile_size_w, jitter_w1), calc_coords(latent_size_w, tile_size_w, jitter_w2) ] tiles = [] if s % 2 == 0: for i, h in enumerate(tiles_h[0]): for w in tiles_w[i%2]: tiles.append((int(h[0]), int(h[1]), int(w[0]), int(w[1]), 1, None)) else: for i, w in enumerate(tiles_w[0]): for h in tiles_h[i%2]: tiles.append((int(h[0]), int(h[1]), int(w[0]), int(w[1]), 1, None)) tiles_all.append(tiles) return [tiles_all] ================================================ FILE: lightricks/model.py ================================================ import torch from torch import nn import torch.nn.functional as F import comfy.ldm.modules.attention import comfy.ldm.common_dit from einops import rearrange import math from typing import Dict, Optional, Tuple, List from .symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords from ..helper import ExtraOptions def get_timestep_embedding( timesteps: torch.Tensor, embedding_dim: int, flip_sin_to_cos: bool = False, downscale_freq_shift: float = 1, scale: float = 1, max_period: int = 10000, ): """ This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. Args timesteps (torch.Tensor): a 1-D Tensor of N indices, one per batch element. These may be fractional. embedding_dim (int): the dimension of the output. flip_sin_to_cos (bool): Whether the embedding order should be `cos, sin` (if True) or `sin, cos` (if False) downscale_freq_shift (float): Controls the delta between frequencies between dimensions scale (float): Scaling factor applied to the embeddings. max_period (int): Controls the maximum frequency of the embeddings Returns torch.Tensor: an [N x dim] Tensor of positional embeddings. """ assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array" half_dim = embedding_dim // 2 exponent = -math.log(max_period) * torch.arange( start=0, end=half_dim, dtype=torch.float32, device=timesteps.device ) exponent = exponent / (half_dim - downscale_freq_shift) emb = torch.exp(exponent) emb = timesteps[:, None].float() * emb[None, :] # scale embeddings emb = scale * emb # concat sine and cosine embeddings emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1) # flip sine and cosine embeddings if flip_sin_to_cos: emb = torch.cat([emb[:, half_dim:], emb[:, :half_dim]], dim=-1) # zero pad if embedding_dim % 2 == 1: emb = torch.nn.functional.pad(emb, (0, 1, 0, 0)) return emb class TimestepEmbedding(nn.Module): def __init__( self, in_channels: int, time_embed_dim: int, act_fn: str = "silu", out_dim: int = None, post_act_fn: Optional[str] = None, cond_proj_dim=None, sample_proj_bias=True, dtype=None, device=None, operations=None, ): super().__init__() self.linear_1 = operations.Linear(in_channels, time_embed_dim, sample_proj_bias, dtype=dtype, device=device) if cond_proj_dim is not None: self.cond_proj = operations.Linear(cond_proj_dim, in_channels, bias=False, dtype=dtype, device=device) else: self.cond_proj = None self.act = nn.SiLU() if out_dim is not None: time_embed_dim_out = out_dim else: time_embed_dim_out = time_embed_dim self.linear_2 = operations.Linear(time_embed_dim, time_embed_dim_out, sample_proj_bias, dtype=dtype, device=device) if post_act_fn is None: self.post_act = None # else: # self.post_act = get_activation(post_act_fn) def forward(self, sample, condition=None): if condition is not None: sample = sample + self.cond_proj(condition) sample = self.linear_1(sample) if self.act is not None: sample = self.act(sample) sample = self.linear_2(sample) if self.post_act is not None: sample = self.post_act(sample) return sample class Timesteps(nn.Module): def __init__(self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float, scale: int = 1): super().__init__() self.num_channels = num_channels self.flip_sin_to_cos = flip_sin_to_cos self.downscale_freq_shift = downscale_freq_shift self.scale = scale def forward(self, timesteps): t_emb = get_timestep_embedding( timesteps, self.num_channels, flip_sin_to_cos=self.flip_sin_to_cos, downscale_freq_shift=self.downscale_freq_shift, scale=self.scale, ) return t_emb class PixArtAlphaCombinedTimestepSizeEmbeddings(nn.Module): """ For PixArt-Alpha. Reference: https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/diffusion/model/nets/PixArtMS.py#L164C9-L168C29 """ def __init__(self, embedding_dim, size_emb_dim, use_additional_conditions: bool = False, dtype=None, device=None, operations=None): super().__init__() self.outdim = size_emb_dim self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim, dtype=dtype, device=device, operations=operations) def forward(self, timestep, resolution, aspect_ratio, batch_size, hidden_dtype): timesteps_proj = self.time_proj(timestep) timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype)) # (N, D) return timesteps_emb class AdaLayerNormSingle(nn.Module): r""" Norm layer adaptive layer norm single (adaLN-single). As proposed in PixArt-Alpha (see: https://arxiv.org/abs/2310.00426; Section 2.3). Parameters: embedding_dim (`int`): The size of each embedding vector. use_additional_conditions (`bool`): To use additional conditions for normalization or not. """ def __init__(self, embedding_dim: int, use_additional_conditions: bool = False, dtype=None, device=None, operations=None): super().__init__() self.emb = PixArtAlphaCombinedTimestepSizeEmbeddings( embedding_dim, size_emb_dim=embedding_dim // 3, use_additional_conditions=use_additional_conditions, dtype=dtype, device=device, operations=operations ) self.silu = nn.SiLU() self.linear = operations.Linear(embedding_dim, 6 * embedding_dim, bias=True, dtype=dtype, device=device) def forward( self, timestep: torch.Tensor, added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None, batch_size: Optional[int] = None, hidden_dtype: Optional[torch.dtype] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: # No modulation happening here. added_cond_kwargs = added_cond_kwargs or {"resolution": None, "aspect_ratio": None} embedded_timestep = self.emb(timestep, **added_cond_kwargs, batch_size=batch_size, hidden_dtype=hidden_dtype) return self.linear(self.silu(embedded_timestep)), embedded_timestep class PixArtAlphaTextProjection(nn.Module): """ Projects caption embeddings. Also handles dropout for classifier-free guidance. Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/nets/PixArt_blocks.py """ def __init__(self, in_features, hidden_size, out_features=None, act_fn="gelu_tanh", dtype=None, device=None, operations=None): super().__init__() if out_features is None: out_features = hidden_size self.linear_1 = operations.Linear(in_features=in_features, out_features=hidden_size, bias=True, dtype=dtype, device=device) if act_fn == "gelu_tanh": self.act_1 = nn.GELU(approximate="tanh") elif act_fn == "silu": self.act_1 = nn.SiLU() else: raise ValueError(f"Unknown activation function: {act_fn}") self.linear_2 = operations.Linear(in_features=hidden_size, out_features=out_features, bias=True, dtype=dtype, device=device) def forward(self, caption): hidden_states = self.linear_1(caption) hidden_states = self.act_1(hidden_states) hidden_states = self.linear_2(hidden_states) return hidden_states class GELU_approx(nn.Module): def __init__(self, dim_in, dim_out, dtype=None, device=None, operations=None): super().__init__() self.proj = operations.Linear(dim_in, dim_out, dtype=dtype, device=device) def forward(self, x): return torch.nn.functional.gelu(self.proj(x), approximate="tanh") class FeedForward(nn.Module): def __init__(self, dim, dim_out, mult=4, glu=False, dropout=0., dtype=None, device=None, operations=None): super().__init__() inner_dim = int(dim * mult) project_in = GELU_approx(dim, inner_dim, dtype=dtype, device=device, operations=operations) self.net = nn.Sequential( project_in, nn.Dropout(dropout), operations.Linear(inner_dim, dim_out, dtype=dtype, device=device) ) def forward(self, x): return self.net(x) def apply_rotary_emb(input_tensor, freqs_cis): #TODO: remove duplicate funcs and pick the best/fastest one cos_freqs = freqs_cis[0] sin_freqs = freqs_cis[1] t_dup = rearrange(input_tensor, "... (d r) -> ... d r", r=2) t1, t2 = t_dup.unbind(dim=-1) t_dup = torch.stack((-t2, t1), dim=-1) input_tensor_rot = rearrange(t_dup, "... d r -> ... (d r)") out = input_tensor * cos_freqs + input_tensor_rot * sin_freqs return out class CrossAttention(nn.Module): def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., attn_precision=None, dtype=None, device=None, operations=None): super().__init__() inner_dim = dim_head * heads context_dim = query_dim if context_dim is None else context_dim self.attn_precision = attn_precision self.heads = heads self.dim_head = dim_head self.q_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device) self.k_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device) self.to_q = operations.Linear(query_dim, inner_dim, bias=True, dtype=dtype, device=device) self.to_k = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device) self.to_v = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device) self.to_out = nn.Sequential(operations.Linear(inner_dim, query_dim, dtype=dtype, device=device), nn.Dropout(dropout)) def forward(self, x, context=None, mask=None, pe=None): q = self.to_q(x) context = x if context is None else context k = self.to_k(context) v = self.to_v(context) q = self.q_norm(q) k = self.k_norm(k) if pe is not None: q = apply_rotary_emb(q, pe) k = apply_rotary_emb(k, pe) if mask is None: out = comfy.ldm.modules.attention.optimized_attention(q, k, v, self.heads, attn_precision=self.attn_precision) else: out = comfy.ldm.modules.attention.optimized_attention_masked(q, k, v, self.heads, mask, attn_precision=self.attn_precision) return self.to_out(out) class BasicTransformerBlock(nn.Module): def __init__(self, dim, n_heads, d_head, context_dim=None, attn_precision=None, dtype=None, device=None, operations=None): super().__init__() self.attn_precision = attn_precision self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, context_dim=None, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations) self.ff = FeedForward(dim, dim_out=dim, glu=True, dtype=dtype, device=device, operations=operations) self.attn2 = CrossAttention(query_dim=dim, context_dim=context_dim, heads=n_heads, dim_head=d_head, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations) self.scale_shift_table = nn.Parameter(torch.empty(6, dim, device=device, dtype=dtype)) def forward(self, x, context=None, attention_mask=None, timestep=None, pe=None): shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (self.scale_shift_table[None, None].to(device=x.device, dtype=x.dtype) + timestep.reshape(x.shape[0], timestep.shape[1], self.scale_shift_table.shape[0], -1)).unbind(dim=2) x += self.attn1(comfy.ldm.common_dit.rms_norm(x) * (1 + scale_msa) + shift_msa, pe=pe) * gate_msa x += self.attn2(x, context=context, mask=attention_mask) y = comfy.ldm.common_dit.rms_norm(x) * (1 + scale_mlp) + shift_mlp x += self.ff(y) * gate_mlp return x def get_fractional_positions(indices_grid, max_pos): fractional_positions = torch.stack( [ indices_grid[:, i] / max_pos[i] for i in range(3) ], dim=-1, ) return fractional_positions def precompute_freqs_cis(indices_grid, dim, out_dtype, theta=10000.0, max_pos=[20, 2048, 2048]): dtype = torch.float32 #self.dtype fractional_positions = get_fractional_positions(indices_grid, max_pos) start = 1 end = theta device = fractional_positions.device indices = theta ** ( torch.linspace( math.log(start, theta), math.log(end, theta), dim // 6, device=device, dtype=dtype, ) ) indices = indices.to(dtype=dtype) indices = indices * math.pi / 2 freqs = ( (indices * (fractional_positions.unsqueeze(-1) * 2 - 1)) .transpose(-1, -2) .flatten(2) ) cos_freq = freqs.cos().repeat_interleave(2, dim=-1) sin_freq = freqs.sin().repeat_interleave(2, dim=-1) if dim % 6 != 0: cos_padding = torch.ones_like(cos_freq[:, :, : dim % 6]) sin_padding = torch.zeros_like(cos_freq[:, :, : dim % 6]) cos_freq = torch.cat([cos_padding, cos_freq], dim=-1) sin_freq = torch.cat([sin_padding, sin_freq], dim=-1) return cos_freq.to(out_dtype), sin_freq.to(out_dtype) class ReLTXVModel(torch.nn.Module): def __init__(self, in_channels=128, cross_attention_dim=2048, attention_head_dim=64, num_attention_heads=32, caption_channels=4096, num_layers=28, positional_embedding_theta=10000.0, positional_embedding_max_pos=[20, 2048, 2048], causal_temporal_positioning=False, vae_scale_factors=(8, 32, 32), dtype=None, device=None, operations=None, **kwargs): super().__init__() self.generator = None self.vae_scale_factors = vae_scale_factors self.dtype = dtype self.out_channels = in_channels self.inner_dim = num_attention_heads * attention_head_dim self.causal_temporal_positioning = causal_temporal_positioning self.patchify_proj = operations.Linear(in_channels, self.inner_dim, bias=True, dtype=dtype, device=device) self.adaln_single = AdaLayerNormSingle( self.inner_dim, use_additional_conditions=False, dtype=dtype, device=device, operations=operations ) # self.adaln_single.linear = operations.Linear(self.inner_dim, 4 * self.inner_dim, bias=True, dtype=dtype, device=device) self.caption_projection = PixArtAlphaTextProjection( in_features=caption_channels, hidden_size=self.inner_dim, dtype=dtype, device=device, operations=operations ) self.transformer_blocks = nn.ModuleList( [ BasicTransformerBlock( self.inner_dim, num_attention_heads, attention_head_dim, context_dim=cross_attention_dim, # attn_precision=attn_precision, dtype=dtype, device=device, operations=operations ) for d in range(num_layers) ] ) self.scale_shift_table = nn.Parameter(torch.empty(2, self.inner_dim, dtype=dtype, device=device)) self.norm_out = operations.LayerNorm(self.inner_dim, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.proj_out = operations.Linear(self.inner_dim, self.out_channels, dtype=dtype, device=device) self.patchifier = SymmetricPatchifier(1) def forward(self, x, timestep, context, attention_mask, frame_rate=25, transformer_options={}, keyframe_idxs=None, **kwargs): patches_replace = transformer_options.get("patches_replace", {}) SIGMA = timestep[0].unsqueeze(0) #/ 1000 EO = transformer_options.get("ExtraOptions", ExtraOptions("")) y0_style_pos = transformer_options.get("y0_style_pos") y0_style_neg = transformer_options.get("y0_style_neg") y0_style_pos_weight = transformer_options.get("y0_style_pos_weight", 0.0) y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight", 0.0) y0_style_pos_synweight *= y0_style_pos_weight y0_style_neg_weight = transformer_options.get("y0_style_neg_weight", 0.0) y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight", 0.0) y0_style_neg_synweight *= y0_style_neg_weight x_orig = x.clone() orig_shape = list(x.shape) x, latent_coords = self.patchifier.patchify(x) pixel_coords = latent_to_pixel_coords( latent_coords=latent_coords, scale_factors=self.vae_scale_factors, causal_fix=self.causal_temporal_positioning, ) if keyframe_idxs is not None: pixel_coords[:, :, -keyframe_idxs.shape[2]:] = keyframe_idxs fractional_coords = pixel_coords.to(torch.float32) fractional_coords[:, 0] = fractional_coords[:, 0] * (1.0 / frame_rate) x = self.patchify_proj(x) timestep = timestep * 1000.0 if attention_mask is not None and not torch.is_floating_point(attention_mask): attention_mask = (attention_mask - 1).to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])) * torch.finfo(x.dtype).max pe = precompute_freqs_cis(fractional_coords, dim=self.inner_dim, out_dtype=x.dtype) batch_size = x.shape[0] timestep, embedded_timestep = self.adaln_single( timestep.flatten(), {"resolution": None, "aspect_ratio": None}, batch_size=batch_size, hidden_dtype=x.dtype, ) # Second dimension is 1 or number of tokens (if timestep_per_token) timestep = timestep.view(batch_size, -1, timestep.shape[-1]) embedded_timestep = embedded_timestep.view( batch_size, -1, embedded_timestep.shape[-1] ) # 2. Blocks if self.caption_projection is not None: batch_size = x.shape[0] context = self.caption_projection(context) context = context.view( batch_size, -1, x.shape[-1] ) blocks_replace = patches_replace.get("dit", {}) for i, block in enumerate(self.transformer_blocks): if ("double_block", i) in blocks_replace: def block_wrap(args): out = {} out["img"] = block(args["img"], context=args["txt"], attention_mask=args["attention_mask"], timestep=args["vec"], pe=args["pe"]) return out out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "attention_mask": attention_mask, "vec": timestep, "pe": pe}, {"original_block": block_wrap}) x = out["img"] else: x = block( x, context=context, attention_mask=attention_mask, timestep=timestep, pe=pe ) # 3. Output scale_shift_values = ( self.scale_shift_table[None, None].to(device=x.device, dtype=x.dtype) + embedded_timestep[:, :, None] ) shift, scale = scale_shift_values[:, :, 0], scale_shift_values[:, :, 1] x = self.norm_out(x) # Modulation x = x * (1 + scale) + shift x = self.proj_out(x) x = self.patchifier.unpatchify( latents=x, output_height=orig_shape[3], output_width=orig_shape[4], output_num_frames=orig_shape[2], out_channels=orig_shape[1] // math.prod(self.patchifier.patch_size), ) eps = x dtype = eps.dtype if self.style_dtype is None else self.style_dtype pinv_dtype = torch.float32 if dtype != torch.float64 else dtype W_inv = None #if eps.shape[0] == 2 or (eps.shape[0] == 1): #: and not UNCOND): if y0_style_pos is not None and y0_style_pos_weight != 0.0: y0_style_pos = y0_style_pos.to(torch.float32) x = x_orig.clone().to(torch.float32) eps = eps.to(torch.float32) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps img, img_latent_coords = self.patchifier.patchify(denoised) img_y0_adain, img_y0_adain_latent_coords = self.patchifier.patchify(y0_style_pos) W = self.patchify_proj.weight.data.to(torch.float32) # shape [2560, 64] b = self.patchify_proj.bias .data.to(torch.float32) # shape [2560] denoised_embed = F.linear(img .to(W), W, b).to(img) y0_adain_embed = F.linear(img_y0_adain.to(W), W, b).to(img_y0_adain) if transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype) denoised_embed = F.linear(denoised_embed.to(W), W, b).to(img) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: self.y0_adain_embed = y0_adain_embed f_s = y0_adain_embed[0].clone() self.mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - self.mu_s cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T self.y0_color = whiten.to(f_s_centered) for wct_i in range(eps.shape[0]): f_c = denoised_embed[wct_i].clone() mu_c = f_c.mean(dim=0, keepdim=True) f_c_centered = f_c - mu_c cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) inv_sqrt_eig = S_eig.clamp(min=0).rsqrt() whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T whiten = whiten.to(f_c_centered) f_c_whitened = f_c_centered @ whiten.T f_cs = f_c_whitened @ self.y0_color.T + self.mu_s denoised_embed[wct_i] = f_cs denoised_approx = (denoised_embed - b.to(denoised_embed)) @ torch.linalg.pinv(W).T.to(denoised_embed) denoised_approx = denoised_approx.to(eps) denoised_approx = self.patchifier.unpatchify( latents=denoised_approx, output_height=orig_shape[3], output_width=orig_shape[4], output_num_frames=orig_shape[2], out_channels=orig_shape[1] // math.prod(self.patchifier.patch_size), ) eps = (x - denoised_approx) / sigma #UNCOND = transformer_options['cond_or_uncond'][cond_iter] == 1 if eps.shape[0] == 1 and transformer_options['cond_or_uncond'][0] == 1: eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) #if eps.shape[0] == 2: # eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) else: #if not UNCOND: if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) else: eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) eps = eps.float() #if eps.shape[0] == 2 or (eps.shape[0] == 1): # and UNCOND): if y0_style_neg is not None and y0_style_neg_weight != 0.0: y0_style_neg = y0_style_neg.to(torch.float32) x = x_orig.clone().to(torch.float32) eps = eps.to(torch.float32) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps img, img_latent_coords = self.patchifier.patchify(denoised) img_y0_adain, img_y0_adain_latent_coords = self.patchifier.patchify(y0_style_neg) W = self.patchify_proj.weight.data.to(torch.float32) # shape [2560, 64] b = self.patchify_proj.bias .data.to(torch.float32) # shape [2560] denoised_embed = F.linear(img .to(W), W, b).to(img) y0_adain_embed = F.linear(img_y0_adain.to(W), W, b).to(img_y0_adain) if transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype) denoised_embed = F.linear(denoised_embed.to(W), W, b).to(img) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: self.y0_adain_embed = y0_adain_embed f_s = y0_adain_embed[0].clone() self.mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - self.mu_s cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T self.y0_color = whiten.to(f_s_centered) for wct_i in range(eps.shape[0]): f_c = denoised_embed[wct_i].clone() mu_c = f_c.mean(dim=0, keepdim=True) f_c_centered = f_c - mu_c cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) inv_sqrt_eig = S_eig.clamp(min=0).rsqrt() whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T whiten = whiten.to(f_c_centered) f_c_whitened = f_c_centered @ whiten.T f_cs = f_c_whitened @ self.y0_color.T + self.mu_s denoised_embed[wct_i] = f_cs denoised_approx = (denoised_embed - b.to(denoised_embed)) @ torch.linalg.pinv(W).T.to(denoised_embed) denoised_approx = denoised_approx.to(eps) #denoised_approx = rearrange(denoised_approx, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w] #denoised_approx = self.unpatchify(denoised_approx, (h + 1) // self.patch_size, (w + 1) // self.patch_size)[:,:,:h,:w] denoised_approx = self.patchifier.unpatchify( latents=denoised_approx, output_height=orig_shape[3], output_width=orig_shape[4], output_num_frames=orig_shape[2], out_channels=orig_shape[1] // math.prod(self.patchifier.patch_size), ) if eps.shape[0] == 1 and not transformer_options['cond_or_uncond'][0] == 1: eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0]) else: eps = (x - denoised_approx) / sigma eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) eps = eps.float() return eps def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: mean_c = content.mean(1, keepdim=True) std_c = content.std (1, keepdim=True).add_(eps) # in-place add mean_s = style.mean (1, keepdim=True) std_s = style.std (1, keepdim=True).add_(eps) content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain return content def adain_seq(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: return ((content - content.mean(1, keepdim=True)) / (content.std(1, keepdim=True) + eps)) * (style.std(1, keepdim=True) + eps) + style.mean(1, keepdim=True) ================================================ FILE: lightricks/symmetric_patchifier.py ================================================ from abc import ABC, abstractmethod from typing import Tuple import torch from einops import rearrange from torch import Tensor def latent_to_pixel_coords( latent_coords: Tensor, scale_factors: Tuple[int, int, int], causal_fix: bool = False ) -> Tensor: """ Converts latent coordinates to pixel coordinates by scaling them according to the VAE's configuration. Args: latent_coords (Tensor): A tensor of shape [batch_size, 3, num_latents] containing the latent corner coordinates of each token. scale_factors (Tuple[int, int, int]): The scale factors of the VAE's latent space. causal_fix (bool): Whether to take into account the different temporal scale of the first frame. Default = False for backwards compatibility. Returns: Tensor: A tensor of pixel coordinates corresponding to the input latent coordinates. """ pixel_coords = ( latent_coords * torch.tensor(scale_factors, device=latent_coords.device)[None, :, None] ) if causal_fix: # Fix temporal scale for first frame to 1 due to causality pixel_coords[:, 0] = (pixel_coords[:, 0] + 1 - scale_factors[0]).clamp(min=0) return pixel_coords class Patchifier(ABC): def __init__(self, patch_size: int): super().__init__() self._patch_size = (1, patch_size, patch_size) @abstractmethod def patchify( self, latents: Tensor, frame_rates: Tensor, scale_grid: bool ) -> Tuple[Tensor, Tensor]: pass @abstractmethod def unpatchify( self, latents: Tensor, output_height: int, output_width: int, output_num_frames: int, out_channels: int, ) -> Tuple[Tensor, Tensor]: pass @property def patch_size(self): return self._patch_size def get_latent_coords( self, latent_num_frames, latent_height, latent_width, batch_size, device ): """ Return a tensor of shape [batch_size, 3, num_patches] containing the top-left corner latent coordinates of each latent patch. The tensor is repeated for each batch element. """ latent_sample_coords = torch.meshgrid( torch.arange(0, latent_num_frames, self._patch_size[0], device=device), torch.arange(0, latent_height, self._patch_size[1], device=device), torch.arange(0, latent_width, self._patch_size[2], device=device), indexing="ij", ) latent_sample_coords = torch.stack(latent_sample_coords, dim=0) latent_coords = latent_sample_coords.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1) latent_coords = rearrange( latent_coords, "b c f h w -> b c (f h w)", b=batch_size ) return latent_coords class SymmetricPatchifier(Patchifier): def patchify( self, latents: Tensor, ) -> Tuple[Tensor, Tensor]: b, _, f, h, w = latents.shape latent_coords = self.get_latent_coords(f, h, w, b, latents.device) latents = rearrange( latents, "b c (f p1) (h p2) (w p3) -> b (f h w) (c p1 p2 p3)", p1=self._patch_size[0], p2=self._patch_size[1], p3=self._patch_size[2], ) return latents, latent_coords def unpatchify( self, latents: Tensor, output_height: int, output_width: int, output_num_frames: int, out_channels: int, ) -> Tuple[Tensor, Tensor]: output_height = output_height // self._patch_size[1] output_width = output_width // self._patch_size[2] latents = rearrange( latents, "b (f h w) (c p q) -> b c f (h p) (w q) ", f=output_num_frames, h=output_height, w=output_width, p=self._patch_size[1], q=self._patch_size[2], ) return latents ================================================ FILE: lightricks/vae/causal_conv3d.py ================================================ from typing import Tuple, Union import torch import torch.nn as nn import comfy.ops ops = comfy.ops.disable_weight_init class CausalConv3d(nn.Module): def __init__( self, in_channels, out_channels, kernel_size: int = 3, stride: Union[int, Tuple[int]] = 1, dilation: int = 1, groups: int = 1, spatial_padding_mode: str = "zeros", **kwargs, ): super().__init__() self.in_channels = in_channels self.out_channels = out_channels kernel_size = (kernel_size, kernel_size, kernel_size) self.time_kernel_size = kernel_size[0] dilation = (dilation, 1, 1) height_pad = kernel_size[1] // 2 width_pad = kernel_size[2] // 2 padding = (0, height_pad, width_pad) self.conv = ops.Conv3d( in_channels, out_channels, kernel_size, stride=stride, dilation=dilation, padding=padding, padding_mode=spatial_padding_mode, groups=groups, ) def forward(self, x, causal: bool = True): if causal: first_frame_pad = x[:, :, :1, :, :].repeat( (1, 1, self.time_kernel_size - 1, 1, 1) ) x = torch.concatenate((first_frame_pad, x), dim=2) else: first_frame_pad = x[:, :, :1, :, :].repeat( (1, 1, (self.time_kernel_size - 1) // 2, 1, 1) ) last_frame_pad = x[:, :, -1:, :, :].repeat( (1, 1, (self.time_kernel_size - 1) // 2, 1, 1) ) x = torch.concatenate((first_frame_pad, x, last_frame_pad), dim=2) x = self.conv(x) return x @property def weight(self): return self.conv.weight ================================================ FILE: lightricks/vae/causal_video_autoencoder.py ================================================ from __future__ import annotations import torch from torch import nn from functools import partial import math from einops import rearrange from typing import List, Optional, Tuple, Union from .conv_nd_factory import make_conv_nd, make_linear_nd from .pixel_norm import PixelNorm from ..model import PixArtAlphaCombinedTimestepSizeEmbeddings import comfy.ops ops = comfy.ops.disable_weight_init class Encoder(nn.Module): r""" The `Encoder` layer of a variational autoencoder that encodes its input into a latent representation. Args: dims (`int` or `Tuple[int, int]`, *optional*, defaults to 3): The number of dimensions to use in convolutions. in_channels (`int`, *optional*, defaults to 3): The number of input channels. out_channels (`int`, *optional*, defaults to 3): The number of output channels. blocks (`List[Tuple[str, int]]`, *optional*, defaults to `[("res_x", 1)]`): The blocks to use. Each block is a tuple of the block name and the number of layers. base_channels (`int`, *optional*, defaults to 128): The number of output channels for the first convolutional layer. norm_num_groups (`int`, *optional*, defaults to 32): The number of groups for normalization. patch_size (`int`, *optional*, defaults to 1): The patch size to use. Should be a power of 2. norm_layer (`str`, *optional*, defaults to `group_norm`): The normalization layer to use. Can be either `group_norm` or `pixel_norm`. latent_log_var (`str`, *optional*, defaults to `per_channel`): The number of channels for the log variance. Can be either `per_channel`, `uniform`, `constant` or `none`. """ def __init__( self, dims: Union[int, Tuple[int, int]] = 3, in_channels: int = 3, out_channels: int = 3, blocks: List[Tuple[str, int | dict]] = [("res_x", 1)], base_channels: int = 128, norm_num_groups: int = 32, patch_size: Union[int, Tuple[int]] = 1, norm_layer: str = "group_norm", # group_norm, pixel_norm latent_log_var: str = "per_channel", spatial_padding_mode: str = "zeros", ): super().__init__() self.patch_size = patch_size self.norm_layer = norm_layer self.latent_channels = out_channels self.latent_log_var = latent_log_var self.blocks_desc = blocks in_channels = in_channels * patch_size**2 output_channel = base_channels self.conv_in = make_conv_nd( dims=dims, in_channels=in_channels, out_channels=output_channel, kernel_size=3, stride=1, padding=1, causal=True, spatial_padding_mode=spatial_padding_mode, ) self.down_blocks = nn.ModuleList([]) for block_name, block_params in blocks: input_channel = output_channel if isinstance(block_params, int): block_params = {"num_layers": block_params} if block_name == "res_x": block = UNetMidBlock3D( dims=dims, in_channels=input_channel, num_layers=block_params["num_layers"], resnet_eps=1e-6, resnet_groups=norm_num_groups, norm_layer=norm_layer, spatial_padding_mode=spatial_padding_mode, ) elif block_name == "res_x_y": output_channel = block_params.get("multiplier", 2) * output_channel block = ResnetBlock3D( dims=dims, in_channels=input_channel, out_channels=output_channel, eps=1e-6, groups=norm_num_groups, norm_layer=norm_layer, spatial_padding_mode=spatial_padding_mode, ) elif block_name == "compress_time": block = make_conv_nd( dims=dims, in_channels=input_channel, out_channels=output_channel, kernel_size=3, stride=(2, 1, 1), causal=True, spatial_padding_mode=spatial_padding_mode, ) elif block_name == "compress_space": block = make_conv_nd( dims=dims, in_channels=input_channel, out_channels=output_channel, kernel_size=3, stride=(1, 2, 2), causal=True, spatial_padding_mode=spatial_padding_mode, ) elif block_name == "compress_all": block = make_conv_nd( dims=dims, in_channels=input_channel, out_channels=output_channel, kernel_size=3, stride=(2, 2, 2), causal=True, spatial_padding_mode=spatial_padding_mode, ) elif block_name == "compress_all_x_y": output_channel = block_params.get("multiplier", 2) * output_channel block = make_conv_nd( dims=dims, in_channels=input_channel, out_channels=output_channel, kernel_size=3, stride=(2, 2, 2), causal=True, spatial_padding_mode=spatial_padding_mode, ) elif block_name == "compress_all_res": output_channel = block_params.get("multiplier", 2) * output_channel block = SpaceToDepthDownsample( dims=dims, in_channels=input_channel, out_channels=output_channel, stride=(2, 2, 2), spatial_padding_mode=spatial_padding_mode, ) elif block_name == "compress_space_res": output_channel = block_params.get("multiplier", 2) * output_channel block = SpaceToDepthDownsample( dims=dims, in_channels=input_channel, out_channels=output_channel, stride=(1, 2, 2), spatial_padding_mode=spatial_padding_mode, ) elif block_name == "compress_time_res": output_channel = block_params.get("multiplier", 2) * output_channel block = SpaceToDepthDownsample( dims=dims, in_channels=input_channel, out_channels=output_channel, stride=(2, 1, 1), spatial_padding_mode=spatial_padding_mode, ) else: raise ValueError(f"unknown block: {block_name}") self.down_blocks.append(block) # out if norm_layer == "group_norm": self.conv_norm_out = nn.GroupNorm( num_channels=output_channel, num_groups=norm_num_groups, eps=1e-6 ) elif norm_layer == "pixel_norm": self.conv_norm_out = PixelNorm() elif norm_layer == "layer_norm": self.conv_norm_out = LayerNorm(output_channel, eps=1e-6) self.conv_act = nn.SiLU() conv_out_channels = out_channels if latent_log_var == "per_channel": conv_out_channels *= 2 elif latent_log_var == "uniform": conv_out_channels += 1 elif latent_log_var == "constant": conv_out_channels += 1 elif latent_log_var != "none": raise ValueError(f"Invalid latent_log_var: {latent_log_var}") self.conv_out = make_conv_nd( dims, output_channel, conv_out_channels, 3, padding=1, causal=True, spatial_padding_mode=spatial_padding_mode, ) self.gradient_checkpointing = False def forward(self, sample: torch.FloatTensor) -> torch.FloatTensor: r"""The forward method of the `Encoder` class.""" sample = patchify(sample, patch_size_hw=self.patch_size, patch_size_t=1) sample = self.conv_in(sample) checkpoint_fn = ( partial(torch.utils.checkpoint.checkpoint, use_reentrant=False) if self.gradient_checkpointing and self.training else lambda x: x ) for down_block in self.down_blocks: sample = checkpoint_fn(down_block)(sample) sample = self.conv_norm_out(sample) sample = self.conv_act(sample) sample = self.conv_out(sample) if self.latent_log_var == "uniform": last_channel = sample[:, -1:, ...] num_dims = sample.dim() if num_dims == 4: # For shape (B, C, H, W) repeated_last_channel = last_channel.repeat( 1, sample.shape[1] - 2, 1, 1 ) sample = torch.cat([sample, repeated_last_channel], dim=1) elif num_dims == 5: # For shape (B, C, F, H, W) repeated_last_channel = last_channel.repeat( 1, sample.shape[1] - 2, 1, 1, 1 ) sample = torch.cat([sample, repeated_last_channel], dim=1) else: raise ValueError(f"Invalid input shape: {sample.shape}") elif self.latent_log_var == "constant": sample = sample[:, :-1, ...] approx_ln_0 = ( -30 ) # this is the minimal clamp value in DiagonalGaussianDistribution objects sample = torch.cat( [sample, torch.ones_like(sample, device=sample.device) * approx_ln_0], dim=1, ) return sample class Decoder(nn.Module): r""" The `Decoder` layer of a variational autoencoder that decodes its latent representation into an output sample. Args: dims (`int` or `Tuple[int, int]`, *optional*, defaults to 3): The number of dimensions to use in convolutions. in_channels (`int`, *optional*, defaults to 3): The number of input channels. out_channels (`int`, *optional*, defaults to 3): The number of output channels. blocks (`List[Tuple[str, int]]`, *optional*, defaults to `[("res_x", 1)]`): The blocks to use. Each block is a tuple of the block name and the number of layers. base_channels (`int`, *optional*, defaults to 128): The number of output channels for the first convolutional layer. norm_num_groups (`int`, *optional*, defaults to 32): The number of groups for normalization. patch_size (`int`, *optional*, defaults to 1): The patch size to use. Should be a power of 2. norm_layer (`str`, *optional*, defaults to `group_norm`): The normalization layer to use. Can be either `group_norm` or `pixel_norm`. causal (`bool`, *optional*, defaults to `True`): Whether to use causal convolutions or not. """ def __init__( self, dims, in_channels: int = 3, out_channels: int = 3, blocks: List[Tuple[str, int | dict]] = [("res_x", 1)], base_channels: int = 128, layers_per_block: int = 2, norm_num_groups: int = 32, patch_size: int = 1, norm_layer: str = "group_norm", causal: bool = True, timestep_conditioning: bool = False, spatial_padding_mode: str = "zeros", ): super().__init__() self.patch_size = patch_size self.layers_per_block = layers_per_block out_channels = out_channels * patch_size**2 self.causal = causal self.blocks_desc = blocks # Compute output channel to be product of all channel-multiplier blocks output_channel = base_channels for block_name, block_params in list(reversed(blocks)): block_params = block_params if isinstance(block_params, dict) else {} if block_name == "res_x_y": output_channel = output_channel * block_params.get("multiplier", 2) if block_name == "compress_all": output_channel = output_channel * block_params.get("multiplier", 1) self.conv_in = make_conv_nd( dims, in_channels, output_channel, kernel_size=3, stride=1, padding=1, causal=True, spatial_padding_mode=spatial_padding_mode, ) self.up_blocks = nn.ModuleList([]) for block_name, block_params in list(reversed(blocks)): input_channel = output_channel if isinstance(block_params, int): block_params = {"num_layers": block_params} if block_name == "res_x": block = UNetMidBlock3D( dims=dims, in_channels=input_channel, num_layers=block_params["num_layers"], resnet_eps=1e-6, resnet_groups=norm_num_groups, norm_layer=norm_layer, inject_noise=block_params.get("inject_noise", False), timestep_conditioning=timestep_conditioning, spatial_padding_mode=spatial_padding_mode, ) elif block_name == "attn_res_x": block = UNetMidBlock3D( dims=dims, in_channels=input_channel, num_layers=block_params["num_layers"], resnet_groups=norm_num_groups, norm_layer=norm_layer, inject_noise=block_params.get("inject_noise", False), timestep_conditioning=timestep_conditioning, attention_head_dim=block_params["attention_head_dim"], spatial_padding_mode=spatial_padding_mode, ) elif block_name == "res_x_y": output_channel = output_channel // block_params.get("multiplier", 2) block = ResnetBlock3D( dims=dims, in_channels=input_channel, out_channels=output_channel, eps=1e-6, groups=norm_num_groups, norm_layer=norm_layer, inject_noise=block_params.get("inject_noise", False), timestep_conditioning=False, spatial_padding_mode=spatial_padding_mode, ) elif block_name == "compress_time": block = DepthToSpaceUpsample( dims=dims, in_channels=input_channel, stride=(2, 1, 1), spatial_padding_mode=spatial_padding_mode, ) elif block_name == "compress_space": block = DepthToSpaceUpsample( dims=dims, in_channels=input_channel, stride=(1, 2, 2), spatial_padding_mode=spatial_padding_mode, ) elif block_name == "compress_all": output_channel = output_channel // block_params.get("multiplier", 1) block = DepthToSpaceUpsample( dims=dims, in_channels=input_channel, stride=(2, 2, 2), residual=block_params.get("residual", False), out_channels_reduction_factor=block_params.get("multiplier", 1), spatial_padding_mode=spatial_padding_mode, ) else: raise ValueError(f"unknown layer: {block_name}") self.up_blocks.append(block) if norm_layer == "group_norm": self.conv_norm_out = nn.GroupNorm( num_channels=output_channel, num_groups=norm_num_groups, eps=1e-6 ) elif norm_layer == "pixel_norm": self.conv_norm_out = PixelNorm() elif norm_layer == "layer_norm": self.conv_norm_out = LayerNorm(output_channel, eps=1e-6) self.conv_act = nn.SiLU() self.conv_out = make_conv_nd( dims, output_channel, out_channels, 3, padding=1, causal=True, spatial_padding_mode=spatial_padding_mode, ) self.gradient_checkpointing = False self.timestep_conditioning = timestep_conditioning if timestep_conditioning: self.timestep_scale_multiplier = nn.Parameter( torch.tensor(1000.0, dtype=torch.float32) ) self.last_time_embedder = PixArtAlphaCombinedTimestepSizeEmbeddings( output_channel * 2, 0, operations=ops, ) self.last_scale_shift_table = nn.Parameter(torch.empty(2, output_channel)) # def forward(self, sample: torch.FloatTensor, target_shape) -> torch.FloatTensor: def forward( self, sample: torch.FloatTensor, timestep: Optional[torch.Tensor] = None, ) -> torch.FloatTensor: r"""The forward method of the `Decoder` class.""" batch_size = sample.shape[0] sample = self.conv_in(sample, causal=self.causal) checkpoint_fn = ( partial(torch.utils.checkpoint.checkpoint, use_reentrant=False) if self.gradient_checkpointing and self.training else lambda x: x ) scaled_timestep = None if self.timestep_conditioning: assert ( timestep is not None ), "should pass timestep with timestep_conditioning=True" scaled_timestep = timestep * self.timestep_scale_multiplier.to(dtype=sample.dtype, device=sample.device) for up_block in self.up_blocks: if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D): sample = checkpoint_fn(up_block)( sample, causal=self.causal, timestep=scaled_timestep ) else: sample = checkpoint_fn(up_block)(sample, causal=self.causal) sample = self.conv_norm_out(sample) if self.timestep_conditioning: embedded_timestep = self.last_time_embedder( timestep=scaled_timestep.flatten(), resolution=None, aspect_ratio=None, batch_size=sample.shape[0], hidden_dtype=sample.dtype, ) embedded_timestep = embedded_timestep.view( batch_size, embedded_timestep.shape[-1], 1, 1, 1 ) ada_values = self.last_scale_shift_table[ None, ..., None, None, None ].to(device=sample.device, dtype=sample.dtype) + embedded_timestep.reshape( batch_size, 2, -1, embedded_timestep.shape[-3], embedded_timestep.shape[-2], embedded_timestep.shape[-1], ) shift, scale = ada_values.unbind(dim=1) sample = sample * (1 + scale) + shift sample = self.conv_act(sample) sample = self.conv_out(sample, causal=self.causal) sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1) return sample class UNetMidBlock3D(nn.Module): """ A 3D UNet mid-block [`UNetMidBlock3D`] with multiple residual blocks. Args: in_channels (`int`): The number of input channels. dropout (`float`, *optional*, defaults to 0.0): The dropout rate. num_layers (`int`, *optional*, defaults to 1): The number of residual blocks. resnet_eps (`float`, *optional*, 1e-6 ): The epsilon value for the resnet blocks. resnet_groups (`int`, *optional*, defaults to 32): The number of groups to use in the group normalization layers of the resnet blocks. norm_layer (`str`, *optional*, defaults to `group_norm`): The normalization layer to use. Can be either `group_norm` or `pixel_norm`. inject_noise (`bool`, *optional*, defaults to `False`): Whether to inject noise into the hidden states. timestep_conditioning (`bool`, *optional*, defaults to `False`): Whether to condition the hidden states on the timestep. Returns: `torch.FloatTensor`: The output of the last residual block, which is a tensor of shape `(batch_size, in_channels, height, width)`. """ def __init__( self, dims: Union[int, Tuple[int, int]], in_channels: int, dropout: float = 0.0, num_layers: int = 1, resnet_eps: float = 1e-6, resnet_groups: int = 32, norm_layer: str = "group_norm", inject_noise: bool = False, timestep_conditioning: bool = False, spatial_padding_mode: str = "zeros", ): super().__init__() resnet_groups = ( resnet_groups if resnet_groups is not None else min(in_channels // 4, 32) ) self.timestep_conditioning = timestep_conditioning if timestep_conditioning: self.time_embedder = PixArtAlphaCombinedTimestepSizeEmbeddings( in_channels * 4, 0, operations=ops, ) self.res_blocks = nn.ModuleList( [ ResnetBlock3D( dims=dims, in_channels=in_channels, out_channels=in_channels, eps=resnet_eps, groups=resnet_groups, dropout=dropout, norm_layer=norm_layer, inject_noise=inject_noise, timestep_conditioning=timestep_conditioning, spatial_padding_mode=spatial_padding_mode, ) for _ in range(num_layers) ] ) def forward( self, hidden_states: torch.FloatTensor, causal: bool = True, timestep: Optional[torch.Tensor] = None, ) -> torch.FloatTensor: timestep_embed = None if self.timestep_conditioning: assert ( timestep is not None ), "should pass timestep with timestep_conditioning=True" batch_size = hidden_states.shape[0] timestep_embed = self.time_embedder( timestep=timestep.flatten(), resolution=None, aspect_ratio=None, batch_size=batch_size, hidden_dtype=hidden_states.dtype, ) timestep_embed = timestep_embed.view( batch_size, timestep_embed.shape[-1], 1, 1, 1 ) for resnet in self.res_blocks: hidden_states = resnet(hidden_states, causal=causal, timestep=timestep_embed) return hidden_states class SpaceToDepthDownsample(nn.Module): def __init__(self, dims, in_channels, out_channels, stride, spatial_padding_mode): super().__init__() self.stride = stride self.group_size = in_channels * math.prod(stride) // out_channels self.conv = make_conv_nd( dims=dims, in_channels=in_channels, out_channels=out_channels // math.prod(stride), kernel_size=3, stride=1, causal=True, spatial_padding_mode=spatial_padding_mode, ) def forward(self, x, causal: bool = True): if self.stride[0] == 2: x = torch.cat( [x[:, :, :1, :, :], x], dim=2 ) # duplicate first frames for padding # skip connection x_in = rearrange( x, "b c (d p1) (h p2) (w p3) -> b (c p1 p2 p3) d h w", p1=self.stride[0], p2=self.stride[1], p3=self.stride[2], ) x_in = rearrange(x_in, "b (c g) d h w -> b c g d h w", g=self.group_size) x_in = x_in.mean(dim=2) # conv x = self.conv(x, causal=causal) x = rearrange( x, "b c (d p1) (h p2) (w p3) -> b (c p1 p2 p3) d h w", p1=self.stride[0], p2=self.stride[1], p3=self.stride[2], ) x = x + x_in return x class DepthToSpaceUpsample(nn.Module): def __init__( self, dims, in_channels, stride, residual=False, out_channels_reduction_factor=1, spatial_padding_mode="zeros", ): super().__init__() self.stride = stride self.out_channels = ( math.prod(stride) * in_channels // out_channels_reduction_factor ) self.conv = make_conv_nd( dims=dims, in_channels=in_channels, out_channels=self.out_channels, kernel_size=3, stride=1, causal=True, spatial_padding_mode=spatial_padding_mode, ) self.residual = residual self.out_channels_reduction_factor = out_channels_reduction_factor def forward(self, x, causal: bool = True, timestep: Optional[torch.Tensor] = None): if self.residual: # Reshape and duplicate the input to match the output shape x_in = rearrange( x, "b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)", p1=self.stride[0], p2=self.stride[1], p3=self.stride[2], ) num_repeat = math.prod(self.stride) // self.out_channels_reduction_factor x_in = x_in.repeat(1, num_repeat, 1, 1, 1) if self.stride[0] == 2: x_in = x_in[:, :, 1:, :, :] x = self.conv(x, causal=causal) x = rearrange( x, "b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)", p1=self.stride[0], p2=self.stride[1], p3=self.stride[2], ) if self.stride[0] == 2: x = x[:, :, 1:, :, :] if self.residual: x = x + x_in return x class LayerNorm(nn.Module): def __init__(self, dim, eps, elementwise_affine=True) -> None: super().__init__() self.norm = ops.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine) def forward(self, x): x = rearrange(x, "b c d h w -> b d h w c") x = self.norm(x) x = rearrange(x, "b d h w c -> b c d h w") return x class ResnetBlock3D(nn.Module): r""" A Resnet block. Parameters: in_channels (`int`): The number of channels in the input. out_channels (`int`, *optional*, default to be `None`): The number of output channels for the first conv layer. If None, same as `in_channels`. dropout (`float`, *optional*, defaults to `0.0`): The dropout probability to use. groups (`int`, *optional*, default to `32`): The number of groups to use for the first normalization layer. eps (`float`, *optional*, defaults to `1e-6`): The epsilon to use for the normalization. """ def __init__( self, dims: Union[int, Tuple[int, int]], in_channels: int, out_channels: Optional[int] = None, dropout: float = 0.0, groups: int = 32, eps: float = 1e-6, norm_layer: str = "group_norm", inject_noise: bool = False, timestep_conditioning: bool = False, spatial_padding_mode: str = "zeros", ): super().__init__() self.in_channels = in_channels out_channels = in_channels if out_channels is None else out_channels self.out_channels = out_channels self.inject_noise = inject_noise if norm_layer == "group_norm": self.norm1 = nn.GroupNorm( num_groups=groups, num_channels=in_channels, eps=eps, affine=True ) elif norm_layer == "pixel_norm": self.norm1 = PixelNorm() elif norm_layer == "layer_norm": self.norm1 = LayerNorm(in_channels, eps=eps, elementwise_affine=True) self.non_linearity = nn.SiLU() self.conv1 = make_conv_nd( dims, in_channels, out_channels, kernel_size=3, stride=1, padding=1, causal=True, spatial_padding_mode=spatial_padding_mode, ) if inject_noise: self.per_channel_scale1 = nn.Parameter(torch.zeros((in_channels, 1, 1))) if norm_layer == "group_norm": self.norm2 = nn.GroupNorm( num_groups=groups, num_channels=out_channels, eps=eps, affine=True ) elif norm_layer == "pixel_norm": self.norm2 = PixelNorm() elif norm_layer == "layer_norm": self.norm2 = LayerNorm(out_channels, eps=eps, elementwise_affine=True) self.dropout = torch.nn.Dropout(dropout) self.conv2 = make_conv_nd( dims, out_channels, out_channels, kernel_size=3, stride=1, padding=1, causal=True, spatial_padding_mode=spatial_padding_mode, ) if inject_noise: self.per_channel_scale2 = nn.Parameter(torch.zeros((in_channels, 1, 1))) self.conv_shortcut = ( make_linear_nd( dims=dims, in_channels=in_channels, out_channels=out_channels ) if in_channels != out_channels else nn.Identity() ) self.norm3 = ( LayerNorm(in_channels, eps=eps, elementwise_affine=True) if in_channels != out_channels else nn.Identity() ) self.timestep_conditioning = timestep_conditioning if timestep_conditioning: self.scale_shift_table = nn.Parameter( torch.randn(4, in_channels) / in_channels**0.5 ) def _feed_spatial_noise( self, hidden_states: torch.FloatTensor, per_channel_scale: torch.FloatTensor ) -> torch.FloatTensor: spatial_shape = hidden_states.shape[-2:] device = hidden_states.device dtype = hidden_states.dtype # similar to the "explicit noise inputs" method in style-gan spatial_noise = torch.randn(spatial_shape, device=device, dtype=dtype)[None] scaled_noise = (spatial_noise * per_channel_scale)[None, :, None, ...] hidden_states = hidden_states + scaled_noise return hidden_states def forward( self, input_tensor: torch.FloatTensor, causal: bool = True, timestep: Optional[torch.Tensor] = None, ) -> torch.FloatTensor: hidden_states = input_tensor batch_size = hidden_states.shape[0] hidden_states = self.norm1(hidden_states) if self.timestep_conditioning: assert ( timestep is not None ), "should pass timestep with timestep_conditioning=True" ada_values = self.scale_shift_table[ None, ..., None, None, None ].to(device=hidden_states.device, dtype=hidden_states.dtype) + timestep.reshape( batch_size, 4, -1, timestep.shape[-3], timestep.shape[-2], timestep.shape[-1], ) shift1, scale1, shift2, scale2 = ada_values.unbind(dim=1) hidden_states = hidden_states * (1 + scale1) + shift1 hidden_states = self.non_linearity(hidden_states) hidden_states = self.conv1(hidden_states, causal=causal) if self.inject_noise: hidden_states = self._feed_spatial_noise( hidden_states, self.per_channel_scale1.to(device=hidden_states.device, dtype=hidden_states.dtype) ) hidden_states = self.norm2(hidden_states) if self.timestep_conditioning: hidden_states = hidden_states * (1 + scale2) + shift2 hidden_states = self.non_linearity(hidden_states) hidden_states = self.dropout(hidden_states) hidden_states = self.conv2(hidden_states, causal=causal) if self.inject_noise: hidden_states = self._feed_spatial_noise( hidden_states, self.per_channel_scale2.to(device=hidden_states.device, dtype=hidden_states.dtype) ) input_tensor = self.norm3(input_tensor) batch_size = input_tensor.shape[0] input_tensor = self.conv_shortcut(input_tensor) output_tensor = input_tensor + hidden_states return output_tensor def patchify(x, patch_size_hw, patch_size_t=1): if patch_size_hw == 1 and patch_size_t == 1: return x if x.dim() == 4: x = rearrange( x, "b c (h q) (w r) -> b (c r q) h w", q=patch_size_hw, r=patch_size_hw ) elif x.dim() == 5: x = rearrange( x, "b c (f p) (h q) (w r) -> b (c p r q) f h w", p=patch_size_t, q=patch_size_hw, r=patch_size_hw, ) else: raise ValueError(f"Invalid input shape: {x.shape}") return x def unpatchify(x, patch_size_hw, patch_size_t=1): if patch_size_hw == 1 and patch_size_t == 1: return x if x.dim() == 4: x = rearrange( x, "b (c r q) h w -> b c (h q) (w r)", q=patch_size_hw, r=patch_size_hw ) elif x.dim() == 5: x = rearrange( x, "b (c p r q) f h w -> b c (f p) (h q) (w r)", p=patch_size_t, q=patch_size_hw, r=patch_size_hw, ) return x class processor(nn.Module): def __init__(self): super().__init__() self.register_buffer("std-of-means", torch.empty(128)) self.register_buffer("mean-of-means", torch.empty(128)) self.register_buffer("mean-of-stds", torch.empty(128)) self.register_buffer("mean-of-stds_over_std-of-means", torch.empty(128)) self.register_buffer("channel", torch.empty(128)) def un_normalize(self, x): return (x * self.get_buffer("std-of-means").view(1, -1, 1, 1, 1).to(x)) + self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1).to(x) def normalize(self, x): return (x - self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1).to(x)) / self.get_buffer("std-of-means").view(1, -1, 1, 1, 1).to(x) class VideoVAE(nn.Module): def __init__(self, version=0, config=None): super().__init__() if config is None: config = self.guess_config(version) self.timestep_conditioning = config.get("timestep_conditioning", False) double_z = config.get("double_z", True) latent_log_var = config.get( "latent_log_var", "per_channel" if double_z else "none" ) self.encoder = Encoder( dims=config["dims"], in_channels=config.get("in_channels", 3), out_channels=config["latent_channels"], blocks=config.get("encoder_blocks", config.get("encoder_blocks", config.get("blocks"))), patch_size=config.get("patch_size", 1), latent_log_var=latent_log_var, norm_layer=config.get("norm_layer", "group_norm"), spatial_padding_mode=config.get("spatial_padding_mode", "zeros"), ) self.decoder = Decoder( dims=config["dims"], in_channels=config["latent_channels"], out_channels=config.get("out_channels", 3), blocks=config.get("decoder_blocks", config.get("decoder_blocks", config.get("blocks"))), patch_size=config.get("patch_size", 1), norm_layer=config.get("norm_layer", "group_norm"), causal=config.get("causal_decoder", False), timestep_conditioning=self.timestep_conditioning, spatial_padding_mode=config.get("spatial_padding_mode", "zeros"), ) self.per_channel_statistics = processor() def guess_config(self, version): if version == 0: config = { "_class_name": "CausalVideoAutoencoder", "dims": 3, "in_channels": 3, "out_channels": 3, "latent_channels": 128, "blocks": [ ["res_x", 4], ["compress_all", 1], ["res_x_y", 1], ["res_x", 3], ["compress_all", 1], ["res_x_y", 1], ["res_x", 3], ["compress_all", 1], ["res_x", 3], ["res_x", 4], ], "scaling_factor": 1.0, "norm_layer": "pixel_norm", "patch_size": 4, "latent_log_var": "uniform", "use_quant_conv": False, "causal_decoder": False, } elif version == 1: config = { "_class_name": "CausalVideoAutoencoder", "dims": 3, "in_channels": 3, "out_channels": 3, "latent_channels": 128, "decoder_blocks": [ ["res_x", {"num_layers": 5, "inject_noise": True}], ["compress_all", {"residual": True, "multiplier": 2}], ["res_x", {"num_layers": 6, "inject_noise": True}], ["compress_all", {"residual": True, "multiplier": 2}], ["res_x", {"num_layers": 7, "inject_noise": True}], ["compress_all", {"residual": True, "multiplier": 2}], ["res_x", {"num_layers": 8, "inject_noise": False}] ], "encoder_blocks": [ ["res_x", {"num_layers": 4}], ["compress_all", {}], ["res_x_y", 1], ["res_x", {"num_layers": 3}], ["compress_all", {}], ["res_x_y", 1], ["res_x", {"num_layers": 3}], ["compress_all", {}], ["res_x", {"num_layers": 3}], ["res_x", {"num_layers": 4}] ], "scaling_factor": 1.0, "norm_layer": "pixel_norm", "patch_size": 4, "latent_log_var": "uniform", "use_quant_conv": False, "causal_decoder": False, "timestep_conditioning": True, } else: config = { "_class_name": "CausalVideoAutoencoder", "dims": 3, "in_channels": 3, "out_channels": 3, "latent_channels": 128, "encoder_blocks": [ ["res_x", {"num_layers": 4}], ["compress_space_res", {"multiplier": 2}], ["res_x", {"num_layers": 6}], ["compress_time_res", {"multiplier": 2}], ["res_x", {"num_layers": 6}], ["compress_all_res", {"multiplier": 2}], ["res_x", {"num_layers": 2}], ["compress_all_res", {"multiplier": 2}], ["res_x", {"num_layers": 2}] ], "decoder_blocks": [ ["res_x", {"num_layers": 5, "inject_noise": False}], ["compress_all", {"residual": True, "multiplier": 2}], ["res_x", {"num_layers": 5, "inject_noise": False}], ["compress_all", {"residual": True, "multiplier": 2}], ["res_x", {"num_layers": 5, "inject_noise": False}], ["compress_all", {"residual": True, "multiplier": 2}], ["res_x", {"num_layers": 5, "inject_noise": False}] ], "scaling_factor": 1.0, "norm_layer": "pixel_norm", "patch_size": 4, "latent_log_var": "uniform", "use_quant_conv": False, "causal_decoder": False, "timestep_conditioning": True } return config def encode(self, x): frames_count = x.shape[2] if ((frames_count - 1) % 8) != 0: raise ValueError("Invalid number of frames: Encode input must have 1 + 8 * x frames (e.g., 1, 9, 17, ...). Please check your input.") means, logvar = torch.chunk(self.encoder(x), 2, dim=1) return self.per_channel_statistics.normalize(means) def decode(self, x, timestep=0.05, noise_scale=0.025): if self.timestep_conditioning: #TODO: seed x = torch.randn_like(x) * noise_scale + (1.0 - noise_scale) * x return self.decoder(self.per_channel_statistics.un_normalize(x), timestep=timestep) ================================================ FILE: lightricks/vae/conv_nd_factory.py ================================================ from typing import Tuple, Union from .dual_conv3d import DualConv3d from .causal_conv3d import CausalConv3d import comfy.ops ops = comfy.ops.disable_weight_init def make_conv_nd( dims: Union[int, Tuple[int, int]], in_channels: int, out_channels: int, kernel_size: int, stride=1, padding=0, dilation=1, groups=1, bias=True, causal=False, spatial_padding_mode="zeros", temporal_padding_mode="zeros", ): if not (spatial_padding_mode == temporal_padding_mode or causal): raise NotImplementedError("spatial and temporal padding modes must be equal") if dims == 2: return ops.Conv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=spatial_padding_mode, ) elif dims == 3: if causal: return CausalConv3d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, spatial_padding_mode=spatial_padding_mode, ) return ops.Conv3d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=spatial_padding_mode, ) elif dims == (2, 1): return DualConv3d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias, padding_mode=spatial_padding_mode, ) else: raise ValueError(f"unsupported dimensions: {dims}") def make_linear_nd( dims: int, in_channels: int, out_channels: int, bias=True, ): if dims == 2: return ops.Conv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias ) elif dims == 3 or dims == (2, 1): return ops.Conv3d( in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias ) else: raise ValueError(f"unsupported dimensions: {dims}") ================================================ FILE: lightricks/vae/dual_conv3d.py ================================================ import math from typing import Tuple, Union import torch import torch.nn as nn import torch.nn.functional as F from einops import rearrange class DualConv3d(nn.Module): def __init__( self, in_channels, out_channels, kernel_size, stride: Union[int, Tuple[int, int, int]] = 1, padding: Union[int, Tuple[int, int, int]] = 0, dilation: Union[int, Tuple[int, int, int]] = 1, groups=1, bias=True, padding_mode="zeros", ): super(DualConv3d, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.padding_mode = padding_mode # Ensure kernel_size, stride, padding, and dilation are tuples of length 3 if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size, kernel_size) if kernel_size == (1, 1, 1): raise ValueError( "kernel_size must be greater than 1. Use make_linear_nd instead." ) if isinstance(stride, int): stride = (stride, stride, stride) if isinstance(padding, int): padding = (padding, padding, padding) if isinstance(dilation, int): dilation = (dilation, dilation, dilation) # Set parameters for convolutions self.groups = groups self.bias = bias # Define the size of the channels after the first convolution intermediate_channels = ( out_channels if in_channels < out_channels else in_channels ) # Define parameters for the first convolution self.weight1 = nn.Parameter( torch.Tensor( intermediate_channels, in_channels // groups, 1, kernel_size[1], kernel_size[2], ) ) self.stride1 = (1, stride[1], stride[2]) self.padding1 = (0, padding[1], padding[2]) self.dilation1 = (1, dilation[1], dilation[2]) if bias: self.bias1 = nn.Parameter(torch.Tensor(intermediate_channels)) else: self.register_parameter("bias1", None) # Define parameters for the second convolution self.weight2 = nn.Parameter( torch.Tensor( out_channels, intermediate_channels // groups, kernel_size[0], 1, 1 ) ) self.stride2 = (stride[0], 1, 1) self.padding2 = (padding[0], 0, 0) self.dilation2 = (dilation[0], 1, 1) if bias: self.bias2 = nn.Parameter(torch.Tensor(out_channels)) else: self.register_parameter("bias2", None) # Initialize weights and biases self.reset_parameters() def reset_parameters(self): nn.init.kaiming_uniform_(self.weight1, a=math.sqrt(5)) nn.init.kaiming_uniform_(self.weight2, a=math.sqrt(5)) if self.bias: fan_in1, _ = nn.init._calculate_fan_in_and_fan_out(self.weight1) bound1 = 1 / math.sqrt(fan_in1) nn.init.uniform_(self.bias1, -bound1, bound1) fan_in2, _ = nn.init._calculate_fan_in_and_fan_out(self.weight2) bound2 = 1 / math.sqrt(fan_in2) nn.init.uniform_(self.bias2, -bound2, bound2) def forward(self, x, use_conv3d=False, skip_time_conv=False): if use_conv3d: return self.forward_with_3d(x=x, skip_time_conv=skip_time_conv) else: return self.forward_with_2d(x=x, skip_time_conv=skip_time_conv) def forward_with_3d(self, x, skip_time_conv): # First convolution x = F.conv3d( x, self.weight1, self.bias1, self.stride1, self.padding1, self.dilation1, self.groups, padding_mode=self.padding_mode, ) if skip_time_conv: return x # Second convolution x = F.conv3d( x, self.weight2, self.bias2, self.stride2, self.padding2, self.dilation2, self.groups, padding_mode=self.padding_mode, ) return x def forward_with_2d(self, x, skip_time_conv): b, c, d, h, w = x.shape # First 2D convolution x = rearrange(x, "b c d h w -> (b d) c h w") # Squeeze the depth dimension out of weight1 since it's 1 weight1 = self.weight1.squeeze(2) # Select stride, padding, and dilation for the 2D convolution stride1 = (self.stride1[1], self.stride1[2]) padding1 = (self.padding1[1], self.padding1[2]) dilation1 = (self.dilation1[1], self.dilation1[2]) x = F.conv2d( x, weight1, self.bias1, stride1, padding1, dilation1, self.groups, padding_mode=self.padding_mode, ) _, _, h, w = x.shape if skip_time_conv: x = rearrange(x, "(b d) c h w -> b c d h w", b=b) return x # Second convolution which is essentially treated as a 1D convolution across the 'd' dimension x = rearrange(x, "(b d) c h w -> (b h w) c d", b=b) # Reshape weight2 to match the expected dimensions for conv1d weight2 = self.weight2.squeeze(-1).squeeze(-1) # Use only the relevant dimension for stride, padding, and dilation for the 1D convolution stride2 = self.stride2[0] padding2 = self.padding2[0] dilation2 = self.dilation2[0] x = F.conv1d( x, weight2, self.bias2, stride2, padding2, dilation2, self.groups, padding_mode=self.padding_mode, ) x = rearrange(x, "(b h w) c d -> b c d h w", b=b, h=h, w=w) return x @property def weight(self): return self.weight2 def test_dual_conv3d_consistency(): # Initialize parameters in_channels = 3 out_channels = 5 kernel_size = (3, 3, 3) stride = (2, 2, 2) padding = (1, 1, 1) # Create an instance of the DualConv3d class dual_conv3d = DualConv3d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=True, ) # Example input tensor test_input = torch.randn(1, 3, 10, 10, 10) # Perform forward passes with both 3D and 2D settings output_conv3d = dual_conv3d(test_input, use_conv3d=True) output_2d = dual_conv3d(test_input, use_conv3d=False) # Assert that the outputs from both methods are sufficiently close assert torch.allclose( output_conv3d, output_2d, atol=1e-6 ), "Outputs are not consistent between 3D and 2D convolutions." ================================================ FILE: lightricks/vae/pixel_norm.py ================================================ import torch from torch import nn class PixelNorm(nn.Module): def __init__(self, dim=1, eps=1e-8): super(PixelNorm, self).__init__() self.dim = dim self.eps = eps def forward(self, x): return x / torch.sqrt(torch.mean(x**2, dim=self.dim, keepdim=True) + self.eps) ================================================ FILE: loaders.py ================================================ import folder_paths import torch import comfy.samplers import comfy.sample import comfy.sampler_helpers import comfy.model_sampling import comfy.latent_formats import comfy.sd import comfy.clip_vision import comfy.supported_models from comfy.utils import load_torch_file # Documentation: Self-documenting code # Instructions for use: Obvious # Expected results: Fork desync # adapted from https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py clip_types = ["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "hunyuan_dit", "flux", "mochi", "ltxv", "hunyuan_video", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace"] class BaseModelLoader: @staticmethod def load_taesd(name): sd = {} approx_vaes = folder_paths.get_filename_list("vae_approx") encoder = next(filter(lambda a: a.startswith(f"{name}_encoder."), approx_vaes)) decoder = next(filter(lambda a: a.startswith(f"{name}_decoder."), approx_vaes)) enc = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("vae_approx", encoder)) for k in enc: sd[f"taesd_encoder.{k}"] = enc[k] dec = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("vae_approx", decoder)) for k in dec: sd[f"taesd_decoder.{k}"] = dec[k] # VAE scale and shift mapping vae_params = { "taesd": (0.18215, 0.0), "taesdxl": (0.13025, 0.0), "taesd3": (1.5305, 0.0609), "taef1": (0.3611, 0.1159) } if name in vae_params: scale, shift = vae_params[name] sd["vae_scale"] = torch.tensor(scale) sd["vae_shift"] = torch.tensor(shift) return sd @staticmethod def guess_clip_type(model): import comfy.model_base as mb type_map = [ (mb.SDXLRefiner, "sdxl"), (mb.SDXL, "sdxl"), (mb.SD15_instructpix2pix, "stable_diffusion"), (mb.SDXL_instructpix2pix, "sdxl"), (mb.StableCascade_C, "stable_cascade"), (mb.StableCascade_B, "stable_cascade"), (mb.Flux, "flux"), (mb.LTXV, "ltxv"), (mb.HunyuanDiT, "hunyuan_dit"), (mb.HunyuanVideo, "hunyuan_video"), (mb.HunyuanVideoI2V, "hunyuan_video"), (mb.HunyuanVideoSkyreelsI2V, "hunyuan_video"), (mb.PixArt, "pixart"), (mb.CosmosVideo, "cosmos"), (mb.Lumina2, "lumina2"), (mb.WAN21, "wan"), (mb.WAN21_Vace, "wan"), (mb.WAN21_Camera, "wan"), (mb.HiDream, "hidream"), (mb.Chroma, "chroma"), (mb.ACEStep, "ace"), (mb.SD3, "sd3"), (mb.GenmoMochi, "mochi"), ] for cls, clip_type in type_map: if isinstance(model, cls): return clip_type.upper() # fallback known_types = { "stable_diffusion", "stable_cascade", "sd3", "stable_audio", "hunyuan_dit", "flux", "mochi", "ltxv", "hunyuan_video", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace" } class_name = model.__class__.__name__.lower() for t in known_types: if t in class_name: return t.upper() default_clip_type = "stable_diffusion" return default_clip_type.upper() @staticmethod def get_model_files(): return [f for f in folder_paths.get_filename_list("checkpoints") + folder_paths.get_filename_list("diffusion_models") if f.endswith((".ckpt", ".safetensors", ".sft", ".pt"))] @staticmethod def get_weight_options(): return ["default", "fp8_e4m3fn", "fp8_e4m3fn_fast", "fp8_e5m2"] @staticmethod def get_clip_options(): return [".use_ckpt_clip"] + folder_paths.get_filename_list("text_encoders") @staticmethod def vae_list(): vaes = folder_paths.get_filename_list("vae") approx_vaes = folder_paths.get_filename_list("vae_approx") sdxl_taesd_enc = False sdxl_taesd_dec = False sd1_taesd_enc = False sd1_taesd_dec = False sd3_taesd_enc = False sd3_taesd_dec = False f1_taesd_enc = False f1_taesd_dec = False for v in approx_vaes: if v.startswith("taesd_decoder."): sd1_taesd_dec = True elif v.startswith("taesd_encoder."): sd1_taesd_enc = True elif v.startswith("taesdxl_decoder."): sdxl_taesd_dec = True elif v.startswith("taesdxl_encoder."): sdxl_taesd_enc = True elif v.startswith("taesd3_decoder."): sd3_taesd_dec = True elif v.startswith("taesd3_encoder."): sd3_taesd_enc = True elif v.startswith("taef1_encoder."): f1_taesd_enc = True elif v.startswith("taef1_decoder."): f1_taesd_dec = True if sd1_taesd_dec and sd1_taesd_enc: vaes.append("taesd") if sdxl_taesd_dec and sdxl_taesd_enc: vaes.append("taesdxl") if sd3_taesd_dec and sd3_taesd_enc: vaes.append("taesd3") if f1_taesd_dec and f1_taesd_enc: vaes.append("taef1") return vaes def process_weight_dtype(self, weight_dtype): model_options = {} if weight_dtype == "fp8_e4m3fn": model_options["dtype"] = torch.float8_e4m3fn elif weight_dtype == "fp8_e4m3fn_fast": model_options["dtype"] = torch.float8_e4m3fn model_options["fp8_optimizations"] = True elif weight_dtype == "fp8_e5m2": model_options["dtype"] = torch.float8_e5m2 return model_options def load_checkpoint(self, model_name, output_vae, output_clip, model_options): try: ckpt_path = folder_paths.get_full_path_or_raise("checkpoints", model_name) out = None try: out = comfy.sd.load_checkpoint_guess_config( ckpt_path, output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings"), model_options=model_options ) except RuntimeError as e: if "ERROR: Could not detect model type of:" in str(e): error_msg = "" if output_vae is True: error_msg += "Model/Checkpoint file does not contain a VAE\n" if output_clip is True: error_msg += "Model/Checkpoint file does not contain a CLIP\n" if error_msg != "": raise ValueError(error_msg) else: out = (comfy.sd.load_diffusion_model(ckpt_path, model_options),) else: raise e return out except FileNotFoundError: ckpt_path = folder_paths.get_full_path_or_raise("diffusion_models", model_name) model = comfy.sd.load_diffusion_model(ckpt_path, model_options=model_options) return (model, ) def load_vae(self, vae_name, ckpt_out): if vae_name == ".use_ckpt_vae": if ckpt_out[2] is None: raise ValueError("Model does not have a VAE") return ckpt_out[2] elif vae_name in ["taesd", "taesdxl", "taesd3", "taef1"]: sd = self.load_taesd(vae_name) return comfy.sd.VAE(sd=sd) elif vae_name == ".none": return None else: vae_path = folder_paths.get_full_path_or_raise("vae", vae_name) sd = comfy.utils.load_torch_file(vae_path) return comfy.sd.VAE(sd=sd) def load_clipvision(ckpt_path): sd = load_torch_file(ckpt_path) clip_vision = comfy.clip_vision.load(ckpt_path) return clip_vision class FluxLoader(BaseModelLoader): @classmethod def INPUT_TYPES(s): return {"required": { "model_name": (s.get_model_files(),), "weight_dtype": (s.get_weight_options(),), "clip_name1": (s.get_clip_options(),), "clip_name2_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),), "vae_name": ([".use_ckpt_vae"] + s.vae_list(),), "clip_vision_name": ([".none"] + folder_paths.get_filename_list("clip_vision"),), "style_model_name": ([".none"] + folder_paths.get_filename_list("style_models"),), }} RETURN_TYPES = ("MODEL", "CLIP", "VAE", "CLIP_VISION", "STYLE_MODEL") RETURN_NAMES = ("model", "clip", "vae", "clip_vision", "style_model") FUNCTION = "main" CATEGORY = "RES4LYF/loaders" def main(self, model_name, weight_dtype, clip_name1, clip_name2_opt, vae_name, clip_vision_name, style_model_name): model_options = self.process_weight_dtype(weight_dtype) torch.manual_seed(42) torch.cuda.manual_seed_all(42) if clip_name1 == ".use_ckpt_clip" and clip_name2_opt != ".none": raise ValueError("Cannot specify both \".use_ckpt_clip\" and another clip") output_vae = vae_name == ".use_ckpt_vae" output_clip = clip_name1 == ".use_ckpt_clip" ckpt_out = self.load_checkpoint(model_name, output_vae, output_clip, model_options) if clip_name1 == ".use_ckpt_clip": if ckpt_out[1] is None: raise ValueError("Model does not have a clip") clip = ckpt_out[1] else: clip_paths = [folder_paths.get_full_path_or_raise("text_encoders", clip_name1)] if clip_name2_opt != ".none": clip_paths.append(folder_paths.get_full_path_or_raise("text_encoders", clip_name2_opt)) clip = comfy.sd.load_clip(clip_paths, embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=comfy.sd.CLIPType.FLUX) clip_vision = None if clip_vision_name == ".none" else \ load_clipvision(folder_paths.get_full_path_or_raise("clip_vision", clip_vision_name)) style_model = None if style_model_name == ".none" else \ comfy.sd.load_style_model(folder_paths.get_full_path_or_raise("style_models", style_model_name)) vae = self.load_vae(vae_name, ckpt_out) return (ckpt_out[0], clip, vae, clip_vision, style_model) class SD35Loader(BaseModelLoader): @classmethod def INPUT_TYPES(s): return {"required": { "model_name": (s.get_model_files(),), "weight_dtype": (s.get_weight_options(),), "clip_name1": (s.get_clip_options(),), "clip_name2_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),), "clip_name3_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),), "vae_name": ([".use_ckpt_vae"] + folder_paths.get_filename_list("vae") + ["taesd", "taesdxl", "taesd3", "taef1"],), }} RETURN_TYPES = ("MODEL", "CLIP", "VAE") RETURN_NAMES = ("model", "clip", "vae") FUNCTION = "main" CATEGORY = "RES4LYF/loaders" def main(self, model_name, weight_dtype, clip_name1, clip_name2_opt, clip_name3_opt, vae_name): model_options = self.process_weight_dtype(weight_dtype) torch.manual_seed(42) torch.cuda.manual_seed_all(42) if clip_name1 == ".use_ckpt_clip" and (clip_name2_opt != ".none" or clip_name3_opt != ".none"): raise ValueError("Cannot specify both \".use_ckpt_clip\" and another clip") output_vae = vae_name == ".use_ckpt_vae" output_clip = clip_name1 == ".use_ckpt_clip" ckpt_out = self.load_checkpoint(model_name, output_vae, output_clip, model_options) if clip_name1 == ".use_ckpt_clip": if ckpt_out[1] is None: raise ValueError("Model does not have a clip") clip = ckpt_out[1] else: clip_paths = [folder_paths.get_full_path_or_raise("text_encoders", clip_name1)] for clip_name in [clip_name2_opt, clip_name3_opt]: if clip_name != ".none": clip_paths.append(folder_paths.get_full_path_or_raise("text_encoders", clip_name)) clip = comfy.sd.load_clip(clip_paths, embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=comfy.sd.CLIPType.SD3) vae = self.load_vae(vae_name, ckpt_out) return (ckpt_out[0], clip, vae) class RES4LYFModelLoader(BaseModelLoader): @classmethod def INPUT_TYPES(s): return {"required": { "model_name": (s.get_model_files(),), "weight_dtype": (s.get_weight_options(),), "clip_name1_opt": ([".none"] + s.get_clip_options(),), "clip_name2_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),), "clip_name3_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),), "clip_name4_opt": ([".none"] + folder_paths.get_filename_list("text_encoders"),), "clip_type": ([".auto"] + clip_types,), "vae_name": ([".none", ".use_ckpt_vae"] + folder_paths.get_filename_list("vae") + ["taesd", "taesdxl", "taesd3", "taef1"],), }} RETURN_TYPES = ("MODEL", "CLIP", "VAE") RETURN_NAMES = ("model", "clip", "vae") FUNCTION = "main" CATEGORY = "RES4LYF/loaders" def main(self, model_name, weight_dtype, clip_name1_opt, clip_name2_opt, clip_name3_opt, clip_name4_opt, clip_type, vae_name): model_options = self.process_weight_dtype(weight_dtype) torch.manual_seed(42) torch.cuda.manual_seed_all(42) if clip_name1_opt == ".use_ckpt_clip" and (clip_name2_opt != ".none" or clip_name3_opt != ".none" or clip_name4_opt != ".none"): raise ValueError("Cannot specify both \".use_ckpt_clip\" and another clip") output_vae = vae_name == ".use_ckpt_vae" output_clip = clip_name1_opt == ".use_ckpt_clip" ckpt_out = self.load_checkpoint(model_name, output_vae, output_clip, model_options) if clip_name1_opt == ".use_ckpt_clip": if ckpt_out[1] is None: raise ValueError("Model does not have a clip") clip = ckpt_out[1] elif clip_name1_opt == ".none": clip = None else: clip_paths = [folder_paths.get_full_path_or_raise("text_encoders", clip_name1_opt)] for clip_name in [clip_name2_opt, clip_name3_opt, clip_name4_opt]: if clip_name != ".none": clip_paths.append(folder_paths.get_full_path_or_raise("text_encoders", clip_name)) if "auto" in clip_type and ckpt_out[0].model is not None: sdCLIPType = getattr(comfy.sd.CLIPType, self.guess_clip_type(ckpt_out[0].model), comfy.sd.CLIPType.STABLE_DIFFUSION) else: sdCLIPType = getattr(comfy.sd.CLIPType, clip_type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION) clip = comfy.sd.load_clip(clip_paths, embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=sdCLIPType) vae = self.load_vae(vae_name, ckpt_out) return (ckpt_out[0], clip, vae) from .style_transfer import Retrojector import torch.nn as nn class LayerPatcher: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "embedder": (s.get_model_patches(),), "gates": (s.get_model_patches(),), "last_layer": (s.get_model_patches(),), "dtype": (["bfloat16", "float16", "float32", "float64"], {"default": "float64"}), #"retrojector": (s.get_model_patches(),), }} RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) FUNCTION = "main" CATEGORY = "RES4LYF/patchers" @staticmethod def get_model_patches(): return [f for f in folder_paths.get_filename_list("diffusion_models") if f.endswith((".safetensors", ".sft"))] def main(self, model, embedder, gates, last_layer, retrojector=None, dtype="float64"): dtype = getattr(torch, dtype) embedder = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("diffusion_models", embedder)) last_layer = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("diffusion_models", last_layer)) #retrojector = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("diffusion_models", retrojector)) gates = comfy.utils.load_torch_file(folder_paths.get_full_path_or_raise("diffusion_models", gates)) m = model.model.diffusion_model if embedder: m.x_embedder.proj = nn.Linear( m.x_embedder.proj.in_features, m.x_embedder.proj.out_features, bias=True, device=m.x_embedder.proj.weight.data.device, dtype=dtype ) m.x_embedder.proj.weight.data = embedder['x_embedder.proj.weight'].to(dtype).cuda() m.x_embedder.proj.bias.data = embedder['x_embedder.proj.bias'].to(dtype).cuda() if gates: for key, tensor in gates.items(): #print(f"Patching {key} with shape {tensor.shape}") set_nested_attr(model=m, key=key, value=tensor, dtype=dtype) if last_layer: m.final_layer.linear.weight.data = last_layer['final_layer.linear.weight'].to(dtype).cuda() m.final_layer.linear.bias.data = last_layer['final_layer.linear.bias'].to(dtype).cuda() m.final_layer.adaLN_modulation[1].weight.data = last_layer['final_layer.adaLN_modulation.1.weight'].to(dtype).cuda() m.final_layer.adaLN_modulation[1].bias.data = last_layer['final_layer.adaLN_modulation.1.bias'].to(dtype).cuda() #if retrojector: # m.Retrojector = Retrojector(model.model.diffusion_model.img_in, pinv_dtype=style_dtype, dtype=style_dtype) # m.final_layer.linear.weight.data = last_layer['final_layer.linear.weight'] # m.final_layer.linear.bias.data = last_layer['final_layer.linear.bias'] # m.final_layer.adaLN_modulation[1].weight.data = last_layer['final_layer.adaLN_modulation.1.weight'] # m.final_layer.adaLN_modulation[1].bias.data = last_layer['final_layer.adaLN_modulation.1.bias'] return (model,) def set_nested_attr(model, key, value, dtype): parts = key.split(".") attr = model for p in parts[:-1]: if p.isdigit(): attr = attr[int(p)] else: attr = getattr(attr, p) getattr(attr, parts[-1]).data.copy_(value.to(getattr(attr, parts[-1]).device, dtype=dtype)) ================================================ FILE: misc_scripts/replace_metadata.py ================================================ #!/usr/bin/env python3 import argparse from PIL import Image from PIL.PngImagePlugin import PngInfo def extract_metadata(image_path): image = Image.open(image_path) metadata = image.info return metadata def replace_metadata(source_image_path, target_image_path, output_image_path): metadata = extract_metadata(source_image_path) target_image = Image.open(target_image_path) png_info = PngInfo() for key, value in metadata.items(): png_info.add_text(key, str(value)) target_image.save(output_image_path, pnginfo=png_info) def main(): parser = argparse.ArgumentParser(description="Copy metadata from one PNG image to another.") parser.add_argument('source', type=str, help="Path to the source PNG image with the metadata.") parser.add_argument('target', type=str, help="Path to the target PNG image to replace metadata.") parser.add_argument('output', type=str, help="Path for the output PNG image with replaced metadata.") args = parser.parse_args() replace_metadata(args.source, args.target, args.output) print(f"Metadata from '{args.source}' has been copied to '{args.output}'.") if __name__ == "__main__": main() ================================================ FILE: models.py ================================================ import torch import types from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar import re import folder_paths import os import json import math import comfy.samplers import comfy.sample import comfy.sampler_helpers import comfy.utils import comfy.model_management from comfy.cli_args import args from .flux.redux import ReReduxImageEncoder from comfy.ldm.flux.redux import ReduxImageEncoder from comfy.ldm.flux.model import Flux from comfy.ldm.flux.layers import SingleStreamBlock, DoubleStreamBlock from .flux.model import ReFlux from .flux.layers import SingleStreamBlock as ReSingleStreamBlock, DoubleStreamBlock as ReDoubleStreamBlock from comfy.ldm.flux.model import Flux from comfy.ldm.flux.layers import SingleStreamBlock, DoubleStreamBlock from comfy.ldm.hidream.model import HiDreamImageTransformer2DModel from comfy.ldm.hidream.model import HiDreamImageBlock, HiDreamImageSingleTransformerBlock, HiDreamImageTransformerBlock, HiDreamAttention from .hidream.model import HDModel from .hidream.model import HDBlock, HDBlockDouble, HDBlockSingle, HDAttention, HDMoEGate, HDMOEFeedForwardSwiGLU, HDFeedForwardSwiGLU, HDLastLayer from comfy.ldm.modules.diffusionmodules.mmdit import OpenAISignatureMMDITWrapper, JointBlock from .sd35.mmdit import ReOpenAISignatureMMDITWrapper, ReJointBlock from comfy.ldm.aura.mmdit import MMDiT, DiTBlock, MMDiTBlock, SingleAttention, DoubleAttention from .aura.mmdit import ReMMDiT, ReDiTBlock, ReMMDiTBlock, ReSingleAttention, ReDoubleAttention from comfy.ldm.wan.model import WanAttentionBlock, WanI2VCrossAttention, WanModel, WanSelfAttention, WanT2VCrossAttention from .wan.model import ReWanAttentionBlock, ReWanI2VCrossAttention, ReWanModel, ReWanRawSelfAttention, ReWanSelfAttention, ReWanSlidingSelfAttention, ReWanT2VSlidingCrossAttention, ReWanT2VCrossAttention, ReWanT2VRawCrossAttention from comfy.ldm.chroma.model import Chroma from comfy.ldm.chroma.layers import SingleStreamBlock as ChromaSingleStreamBlock, DoubleStreamBlock as ChromaDoubleStreamBlock from .chroma.model import ReChroma from .chroma.layers import ReChromaSingleStreamBlock, ReChromaDoubleStreamBlock from comfy.ldm.lightricks.model import LTXVModel #from comfy.ldm.chroma.layers import SingleStreamBlock as ChromaSingleStreamBlock, DoubleStreamBlock as ChromaDoubleStreamBlock from .lightricks.model import ReLTXVModel #from .chroma.layers import ReChromaSingleStreamBlock, ReChromaDoubleStreamBlock from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel, ResBlock from comfy.ldm.modules.attention import SpatialTransformer, BasicTransformerBlock, CrossAttention from .sd.openaimodel import ReUNetModel, ReResBlock from .sd.attention import ReBasicTransformerBlock, ReCrossAttention, ReSpatialTransformer from .latents import get_orthogonal, get_cosine_similarity from .style_transfer import StyleWCT, WaveletStyleWCT, Retrojector, StyleMMDiT_Model from .res4lyf import RESplain from .helper import parse_range_string from comfy.model_sampling import * class PRED: TYPE_VP = {CONST} TYPE_VE = {EPS} TYPE_VPRED = {V_PREDICTION, EDM} TYPE_X0 = {X0, IMG_TO_IMG} TYPE_ALL = TYPE_VP | TYPE_VE | TYPE_VPRED | TYPE_X0 @classmethod def get_type(cls, model_sampling): bases = type(model_sampling).__mro__ return next((v_type for v_type in bases if v_type in cls.TYPE_ALL), None) def time_snr_shift_exponential(alpha, t): return math.exp(alpha) / (math.exp(alpha) + (1 / t - 1) ** 1.0) def time_snr_shift_linear(alpha, t): if alpha == 1.0: return t return alpha * t / (1 + (alpha - 1) * t) COMPILE_MODES = ["default", "max-autotune", "max-autotune-no-cudagraphs", "reduce-overhead"] class TorchCompileModels: def __init__(self): self._compiled = False @classmethod def INPUT_TYPES(s): return {"required": { "model" : ("MODEL",), "backend" : (["inductor", "cudagraphs"],), "fullgraph" : ("BOOLEAN", {"default": False, "tooltip": "Enable full graph mode"}), "mode" : (COMPILE_MODES, {"default": "default"}), "dynamic" : ("BOOLEAN", {"default": False, "tooltip": "Enable dynamic mode"}), "dynamo_cache_size_limit" : ("INT", {"default": 64, "min": 0, "max": 1024, "step": 1, "tooltip": "torch._dynamo.config.cache_size_limit"}), "triton_max_block_x" : ("INT", {"default": 0, "min": 0, "max": 4294967296, "step": 1}) }} RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) FUNCTION = "main" CATEGORY = "RES4LYF/model_patches" def main(self, model, backend = "inductor", mode = "default", fullgraph = False, dynamic = False, dynamo_cache_size_limit = 64, triton_max_block_x = 0, ): m = model.clone() diffusion_model = m.get_model_object("diffusion_model") torch._dynamo.config.cache_size_limit = dynamo_cache_size_limit if triton_max_block_x > 0: import os os.environ["TRITON_MAX_BLOCK_X"] = "4096" if not self._compiled: try: if hasattr(diffusion_model, "double_blocks"): for i, block in enumerate(diffusion_model.double_blocks): m.add_object_patch(f"diffusion_model.double_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True if hasattr(diffusion_model, "single_blocks"): for i, block in enumerate(diffusion_model.single_blocks): m.add_object_patch(f"diffusion_model.single_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True if hasattr(diffusion_model, "double_layers"): for i, block in enumerate(diffusion_model.double_layers): m.add_object_patch(f"diffusion_model.double_layers.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True if hasattr(diffusion_model, "single_layers"): for i, block in enumerate(diffusion_model.single_layers): m.add_object_patch(f"diffusion_model.single_layers.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True if hasattr(diffusion_model, "double_stream_blocks"): for i, block in enumerate(diffusion_model.double_stream_blocks): m.add_object_patch(f"diffusion_model.double_stream_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True if hasattr(diffusion_model, "single_stream_blocks"): for i, block in enumerate(diffusion_model.single_stream_blocks): m.add_object_patch(f"diffusion_model.single_stream_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True if hasattr(diffusion_model, "joint_blocks"): for i, block in enumerate(diffusion_model.joint_blocks): m.add_object_patch(f"diffusion_model.joint_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True if hasattr(diffusion_model, "blocks"): for i, block in enumerate(diffusion_model.blocks): m.add_object_patch(f"diffusion_model.blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True if self._compiled == False: raise RuntimeError("Model not compiled. Verify that this is a Flux, SD3.5, HiDream, WAN, or Aura model!") compile_settings = { "backend": backend, "mode": mode, "fullgraph": fullgraph, "dynamic": dynamic, } setattr(m.model, "compile_settings", compile_settings) except: raise RuntimeError("Failed to compile model. Verify that this is a Flux, SD3.5, HiDream, WAN, or Aura model!") return (m, ) class ReWanPatcherAdvanced: def __init__(self): self.sliding_window_size = 0 self.sliding_window_self_attn = "false" @classmethod def INPUT_TYPES(s): return { "required": { "model" : ("MODEL",), #"self_attn_blocks" : ("STRING", {"default": "0,1,2,3,4,5,6,7,8,9,", "multiline": True}), "self_attn_blocks" : ("STRING", {"default": "all", "multiline": True}), "cross_attn_blocks" : ("STRING", {"default": "all", "multiline": True}), "enable" : ("BOOLEAN", {"default": True}), "sliding_window_self_attn" : (['false', 'standard', 'circular'], {"default": "false"}), "sliding_window_frames" : ("INT", {"default": 60, "min": 4, "max": 0xffffffffffffffff, "step": 4, "tooltip": "How many real frames each frame sees. Divide frames by 4 to get real frames."}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" def main(self, model, self_attn_blocks, cross_attn_blocks, sliding_window_self_attn="false", sliding_window_frames=60, style_dtype="float32", enable=True, force=False): style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None model.model.diffusion_model.style_dtype = style_dtype model.model.diffusion_model.proj_weights = None model.model.diffusion_model.y0_adain_embed = None sliding_window_size = sliding_window_frames // 4 self_attn_blocks = parse_range_string(self_attn_blocks) cross_attn_blocks = parse_range_string(cross_attn_blocks) T2V = type(model.model.model_config) is comfy.supported_models.WAN21_T2V if (enable or force) and model.model.diffusion_model.__class__ == WanModel: m = model.clone() m.model.diffusion_model.__class__ = ReWanModel m.model.diffusion_model.threshold_inv = False for i, block in enumerate(m.model.diffusion_model.blocks): block.__class__ = ReWanAttentionBlock if i in self_attn_blocks: if sliding_window_self_attn != "false": block.self_attn.__class__ = ReWanSlidingSelfAttention block.self_attn.winderz = sliding_window_size block.self_attn.winderz_type = sliding_window_self_attn else: block.self_attn.__class__ = ReWanSelfAttention block.self_attn.winderz_type = "false" else: block.self_attn.__class__ = ReWanRawSelfAttention if i in cross_attn_blocks: if T2V: if False: #sliding_window_self_attn != "false": block.cross_attn.__class__ = ReWanT2VSlidingCrossAttention block.cross_attn.winderz = sliding_window_size block.cross_attn.winderz_type = sliding_window_self_attn else: block.cross_attn.__class__ = ReWanT2VCrossAttention else: block.cross_attn.__class__ = ReWanI2VCrossAttention block.idx = i block.self_attn.idx = i block.cross_attn.idx = i # 40 total blocks (i == 39) elif enable and (sliding_window_self_attn != self.sliding_window_self_attn or sliding_window_size != self.sliding_window_size) and model.model.diffusion_model.__class__ == ReWanModel: m = model.clone() for i, block in enumerate(m.model.diffusion_model.blocks): if i in self_attn_blocks: block.self_attn.winderz = sliding_window_size block.self_attn.winderz_type = sliding_window_self_attn elif not enable and model.model.diffusion_model.__class__ == ReWanModel: m = model.clone() m.model.diffusion_model.__class__ = WanModel for i, block in enumerate(m.model.diffusion_model.blocks): block.__class__ = WanAttentionBlock block.self_attn.__class__ = WanSelfAttention block.cross_attn.__class__ = WanT2VCrossAttention block.idx = i elif model.model.diffusion_model.__class__ not in {ReWanModel, WanModel}: raise ValueError("This node is for enabling regional conditioning for WAN only!") m = model return (m,) class ReWanPatcher(ReWanPatcherAdvanced): @classmethod def INPUT_TYPES(cls): return { "required": { "model" : ("MODEL",), "enable" : ("BOOLEAN", {"default": True}), } } def main(self, model, enable=True, force=False): return super().main( model = model, self_attn_blocks = "all", cross_attn_blocks = "all", enable = enable, force = force ) class ReDoubleStreamBlockNoMask(ReDoubleStreamBlock): def forward(self, c, mask=None): return super().forward(c, mask=None) class ReSingleStreamBlockNoMask(ReSingleStreamBlock): def forward(self, c, mask=None): return super().forward(c, mask=None) class ReFluxPatcherAdvanced: @classmethod def INPUT_TYPES(s): return { "required": { "model" : ("MODEL",), "doublestream_blocks" : ("STRING", {"default": "all", "multiline": True}), "singlestream_blocks" : ("STRING", {"default": "all", "multiline": True}), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" def main(self, model, doublestream_blocks, singlestream_blocks, style_dtype, enable=True, force=False): doublestream_blocks = parse_range_string(doublestream_blocks) singlestream_blocks = parse_range_string(singlestream_blocks) style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None model.model.diffusion_model.style_dtype = style_dtype model.model.diffusion_model.proj_weights = None model.model.diffusion_model.y0_adain_embed = None model.model.diffusion_model.adain_pw_cache = None model.model.diffusion_model.StyleWCT = StyleWCT() model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.img_in, pinv_dtype=style_dtype, dtype=style_dtype) if (enable or force) and model.model.diffusion_model.__class__ == Flux: m = model.clone() m.model.diffusion_model.__class__ = ReFlux m.model.diffusion_model.threshold_inv = False for i, block in enumerate(m.model.diffusion_model.double_blocks): if i in doublestream_blocks: block.__class__ = ReDoubleStreamBlock else: block.__class__ = ReDoubleStreamBlockNoMask block.idx = i for i, block in enumerate(m.model.diffusion_model.single_blocks): if i in singlestream_blocks: block.__class__ = ReSingleStreamBlock else: block.__class__ = ReSingleStreamBlockNoMask block.idx = i elif not enable and model.model.diffusion_model.__class__ == ReFlux: m = model.clone() m.model.diffusion_model.__class__ = Flux for i, block in enumerate(m.model.diffusion_model.double_blocks): block.__class__ = DoubleStreamBlock block.idx = i for i, block in enumerate(m.model.diffusion_model.single_blocks): block.__class__ = SingleStreamBlock block.idx = i #elif model.model.diffusion_model.__class__ != Flux and model.model.diffusion_model.__class__ != ReFlux: elif model.model.diffusion_model.__class__ not in {ReFlux, Flux}: raise ValueError("This node is for enabling regional conditioning for Flux only!") else: m = model return (m,) class ReFluxPatcher(ReFluxPatcherAdvanced): @classmethod def INPUT_TYPES(cls): return { "required": { "model" : ("MODEL",), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } def main(self, model, style_dtype="float32", enable=True, force=False): return super().main( model = model, doublestream_blocks = "all", singlestream_blocks = "all", style_dtype = style_dtype, enable = enable, force = force ) class ReReduxPatcher: @classmethod def INPUT_TYPES(s): return { "required": { "style_model" : ("STYLE_MODEL",), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("STYLE_MODEL",) RETURN_NAMES = ("style_model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" EXPERIMENTAL = True def main(self, style_model, style_dtype, enable=True, force=False): style_model.model.style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None style_model.model.proj_weights = None style_model.model.y0_adain_embed = None if (enable or force) and style_model.model.__class__ == ReduxImageEncoder: m = style_model#.clone() m.model.__class__ = ReReduxImageEncoder m.model.threshold_inv = False elif not enable and style_model.model.__class__ == ReReduxImageEncoder: m = style_model#.clone() m.model.__class__ = ReduxImageEncoder elif style_model.model.__class__ not in {ReReduxImageEncoder, ReduxImageEncoder}: raise ValueError("This node is for enabling style conditioning for Redux only!") else: m = style_model return (m,) class ReChromaDoubleStreamBlockNoMask(ReChromaDoubleStreamBlock): def forward(self, c, mask=None): return super().forward(c, mask=None) class ReChromaSingleStreamBlockNoMask(ReChromaSingleStreamBlock): def forward(self, c, mask=None): return super().forward(c, mask=None) class ReChromaPatcherAdvanced: @classmethod def INPUT_TYPES(s): return { "required": { "model" : ("MODEL",), "doublestream_blocks" : ("STRING", {"default": "all", "multiline": True}), "singlestream_blocks" : ("STRING", {"default": "all", "multiline": True}), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" def main(self, model, doublestream_blocks, singlestream_blocks, style_dtype, enable=True, force=False): doublestream_blocks = parse_range_string(doublestream_blocks) singlestream_blocks = parse_range_string(singlestream_blocks) style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None model.model.diffusion_model.style_dtype = style_dtype model.model.diffusion_model.proj_weights = None model.model.diffusion_model.y0_adain_embed = None model.model.diffusion_model.StyleWCT = StyleWCT() model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.img_in, pinv_dtype=style_dtype, dtype=style_dtype) if (enable or force) and model.model.diffusion_model.__class__ == Chroma: m = model.clone() m.model.diffusion_model.__class__ = ReChroma m.model.diffusion_model.threshold_inv = False for i, block in enumerate(m.model.diffusion_model.double_blocks): if i in doublestream_blocks: block.__class__ = ReChromaDoubleStreamBlock else: block.__class__ = ReChromaDoubleStreamBlockNoMask block.idx = i for i, block in enumerate(m.model.diffusion_model.single_blocks): if i in singlestream_blocks: block.__class__ = ReChromaSingleStreamBlock else: block.__class__ = ReChromaSingleStreamBlockNoMask block.idx = i elif not enable and model.model.diffusion_model.__class__ == ReChroma: m = model.clone() m.model.diffusion_model.__class__ = Chroma for i, block in enumerate(m.model.diffusion_model.double_blocks): block.__class__ = DoubleStreamBlock block.idx = i for i, block in enumerate(m.model.diffusion_model.single_blocks): block.__class__ = SingleStreamBlock block.idx = i #elif model.model.diffusion_model.__class__ != Chroma and model.model.diffusion_model.__class__ != ReChroma: elif model.model.diffusion_model.__class__ not in {ReChroma, Chroma}: raise ValueError("This node is for enabling regional conditioning for Chroma only!") else: m = model return (m,) class ReChromaPatcher(ReChromaPatcherAdvanced): @classmethod def INPUT_TYPES(cls): return { "required": { "model" : ("MODEL",), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } def main(self, model, style_dtype="float32", enable=True, force=False): return super().main( model = model, doublestream_blocks = "all", singlestream_blocks = "all", style_dtype = style_dtype, enable = enable, force = force ) """class ReLTXVDoubleStreamBlockNoMask(ReLTXVDoubleStreamBlock): def forward(self, c, mask=None): return super().forward(c, mask=None) class ReLTXVSingleStreamBlockNoMask(ReLTXVSingleStreamBlock): def forward(self, c, mask=None): return super().forward(c, mask=None)""" class ReLTXVPatcherAdvanced: @classmethod def INPUT_TYPES(s): return { "required": { "model" : ("MODEL",), "doublestream_blocks" : ("STRING", {"default": "all", "multiline": True}), "singlestream_blocks" : ("STRING", {"default": "all", "multiline": True}), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" def main(self, model, doublestream_blocks, singlestream_blocks, style_dtype, enable=True, force=False): doublestream_blocks = parse_range_string(doublestream_blocks) singlestream_blocks = parse_range_string(singlestream_blocks) style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None model.model.diffusion_model.style_dtype = style_dtype model.model.diffusion_model.proj_weights = None model.model.diffusion_model.y0_adain_embed = None model.model.diffusion_model.StyleWCT = StyleWCT() model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.patchify_proj, pinv_dtype=style_dtype, dtype=style_dtype) if (enable or force) and model.model.diffusion_model.__class__ == LTXVModel: m = model.clone() m.model.diffusion_model.__class__ = ReLTXVModel m.model.diffusion_model.threshold_inv = False """for i, block in enumerate(m.model.diffusion_model.double_blocks): if i in doublestream_blocks: block.__class__ = ReChromaDoubleStreamBlock else: block.__class__ = ReChromaDoubleStreamBlockNoMask block.idx = i for i, block in enumerate(m.model.diffusion_model.single_blocks): if i in singlestream_blocks: block.__class__ = ReChromaSingleStreamBlock else: block.__class__ = ReChromaSingleStreamBlockNoMask block.idx = i""" elif not enable and model.model.diffusion_model.__class__ == ReLTXVModel: m = model.clone() m.model.diffusion_model.__class__ = LTXVModel """for i, block in enumerate(m.model.diffusion_model.double_blocks): block.__class__ = DoubleStreamBlock block.idx = i for i, block in enumerate(m.model.diffusion_model.single_blocks): block.__class__ = SingleStreamBlock block.idx = i""" #elif model.model.diffusion_model.__class__ != LTXVModel and model.model.diffusion_model.__class__ != ReLTXVModel: elif model.model.diffusion_model.__class__ not in {ReLTXVModel, LTXVModel}: raise ValueError("This node is for enabling regional conditioning for LTXV only!") else: m = model return (m,) class ReLTXVPatcher(ReLTXVPatcherAdvanced): @classmethod def INPUT_TYPES(cls): return { "required": { "model" : ("MODEL",), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } def main(self, model, style_dtype="float32", enable=True, force=False): return super().main( model = model, doublestream_blocks = "all", singlestream_blocks = "all", style_dtype = style_dtype, enable = enable, force = force ) class ReSDPatcherAdvanced: @classmethod def INPUT_TYPES(s): return { "required": { "model" : ("MODEL",), "doublestream_blocks" : ("STRING", {"default": "all", "multiline": True}), "singlestream_blocks" : ("STRING", {"default": "all", "multiline": True}), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" #EXPERIMENTAL = True def main(self, model, doublestream_blocks, singlestream_blocks, style_dtype, enable=True, force=False): doublestream_blocks = parse_range_string(doublestream_blocks) singlestream_blocks = parse_range_string(singlestream_blocks) style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None model.model.diffusion_model.style_dtype = style_dtype model.model.diffusion_model.proj_weights = None model.model.diffusion_model.y0_adain_embed = None model.model.diffusion_model.StyleWCT = StyleWCT() model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.input_blocks[0][0], pinv_dtype=style_dtype, dtype=style_dtype, patch_size=1) if (enable or force) and model.model.diffusion_model.__class__ == UNetModel: m = model.clone() m.model.diffusion_model.__class__ = ReUNetModel m.model.diffusion_model.threshold_inv = False for i in range(len(m.model.diffusion_model.input_blocks)): for j in range(len(m.model.diffusion_model.input_blocks[i])): if isinstance(m.model.diffusion_model.input_blocks[i][j], ResBlock): m.model.diffusion_model.input_blocks[i][j].__class__ = ReResBlock if isinstance(m.model.diffusion_model.input_blocks[i][j], SpatialTransformer): m.model.diffusion_model.input_blocks[i][j].__class__ = ReSpatialTransformer for k in range(len(m.model.diffusion_model.input_blocks[i][j].transformer_blocks)): m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].__class__ = ReBasicTransformerBlock m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].attn1.__class__ = ReCrossAttention m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].attn2.__class__ = ReCrossAttention #m.model.diffusion_model.middle_block[1].transformer_blocks[0].__class__ = ReBasicTransformerBlock for i in range(len(m.model.diffusion_model.middle_block)): if isinstance(m.model.diffusion_model.middle_block[i], ResBlock): m.model.diffusion_model.middle_block[i].__class__ = ReResBlock if isinstance(m.model.diffusion_model.middle_block[i], SpatialTransformer): m.model.diffusion_model.middle_block[i].__class__ = ReSpatialTransformer for k in range(len(m.model.diffusion_model.middle_block[i].transformer_blocks)): m.model.diffusion_model.middle_block[i].transformer_blocks[k].__class__ = ReBasicTransformerBlock m.model.diffusion_model.middle_block[i].transformer_blocks[k].attn1.__class__ = ReCrossAttention m.model.diffusion_model.middle_block[i].transformer_blocks[k].attn2.__class__ = ReCrossAttention for i in range(len(m.model.diffusion_model.output_blocks)): for j in range(len(m.model.diffusion_model.output_blocks[i])): if isinstance(m.model.diffusion_model.output_blocks[i][j], ResBlock): m.model.diffusion_model.output_blocks[i][j].__class__ = ReResBlock if isinstance(m.model.diffusion_model.output_blocks[i][j], SpatialTransformer): m.model.diffusion_model.output_blocks[i][j].__class__ = ReSpatialTransformer for k in range(len(m.model.diffusion_model.output_blocks[i][j].transformer_blocks)): m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].__class__ = ReBasicTransformerBlock m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].attn1.__class__ = ReCrossAttention m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].attn2.__class__ = ReCrossAttention elif not enable and model.model.diffusion_model.__class__ == ReUNetModel: m = model.clone() m.model.diffusion_model.__class__ = UNetModel for i in range(len(m.model.diffusion_model.input_blocks)): for j in range(len(m.model.diffusion_model.input_blocks[i])): if isinstance(m.model.diffusion_model.input_blocks[i][j], ReResBlock): m.model.diffusion_model.input_blocks[i][j].__class__ = ResBlock if isinstance(m.model.diffusion_model.input_blocks[i][j], ReSpatialTransformer): m.model.diffusion_model.input_blocks[i][j].__class__ = SpatialTransformer for k in range(len(m.model.diffusion_model.input_blocks[i][j].transformer_blocks)): m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].__class__ = BasicTransformerBlock m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].attn1.__class__ = CrossAttention m.model.diffusion_model.input_blocks[i][j].transformer_blocks[k].attn2.__class__ = CrossAttention #m.model.diffusion_model.middle_block[1].transformer_blocks[0].__class__ = BasicTransformerBlock for i in range(len(m.model.diffusion_model.middle_block)): if isinstance(m.model.diffusion_model.middle_block[i], ReResBlock): m.model.diffusion_model.middle_block[i].__class__ = ResBlock if isinstance(m.model.diffusion_model.middle_block[i], ReSpatialTransformer): m.model.diffusion_model.middle_block[i].__class__ = SpatialTransformer for k in range(len(m.model.diffusion_model.middle_block[i].transformer_blocks)): m.model.diffusion_model.middle_block[i].transformer_blocks[k].__class__ = BasicTransformerBlock m.model.diffusion_model.middle_block[i].transformer_blocks[k].attn1.__class__ = CrossAttention m.model.diffusion_model.middle_block[i].transformer_blocks[k].attn2.__class__ = CrossAttention for i in range(len(m.model.diffusion_model.output_blocks)): for j in range(len(m.model.diffusion_model.output_blocks[i])): if isinstance(m.model.diffusion_model.output_blocks[i][j], ReResBlock): m.model.diffusion_model.output_blocks[i[j]].__class__ = ResBlock if isinstance(m.model.diffusion_model.output_blocks[i][j], ReSpatialTransformer): m.model.diffusion_model.output_blocks[i[j]].__class__ = SpatialTransformer for k in range(len(m.model.diffusion_model.output_blocks[i][j].transformer_blocks)): m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].__class__ = BasicTransformerBlock m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].attn1.__class__ = CrossAttention m.model.diffusion_model.output_blocks[i][j].transformer_blocks[k].attn2.__class__ = CrossAttention #elif model.model.diffusion_model.__class__ != UNetModel and model.model.diffusion_model.__class__ != ReUNetModel: elif model.model.diffusion_model.__class__ not in {ReUNetModel, UNetModel}: raise ValueError("This node is for enabling regional conditioning for SD1.5 and SDXL only!") else: m = model return (m,) class ReSDPatcher(ReSDPatcherAdvanced): @classmethod def INPUT_TYPES(cls): return { "required": { "model" : ("MODEL",), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } def main(self, model, style_dtype="float32", enable=True, force=False): return super().main( model = model, doublestream_blocks = "all", singlestream_blocks = "all", style_dtype = style_dtype, enable = enable, force = force ) class HDBlockDoubleNoMask(HDBlockDouble): def forward(self, c, mask=None): return super().forward(c, mask=None) class HDBlockSingleNoMask(HDBlockSingle): def forward(self, c, mask=None): return super().forward(c, mask=None) class ReHiDreamPatcherAdvanced: @classmethod def INPUT_TYPES(s): return { "required": { "model" : ("MODEL",), "double_stream_blocks" : ("STRING", {"default": "all", "multiline": True}), "single_stream_blocks" : ("STRING", {"default": "all", "multiline": True}), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" def main(self, model, double_stream_blocks, single_stream_blocks, style_dtype, enable=True, force=False): double_stream_blocks = parse_range_string(double_stream_blocks) single_stream_blocks = parse_range_string(single_stream_blocks) style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None model.model.diffusion_model.style_dtype = style_dtype model.model.diffusion_model.proj_weights = None model.model.diffusion_model.y0_adain_embed = None model.model.diffusion_model.StyleWCT = StyleWCT() model.model.diffusion_model.WaveletStyleWCT = WaveletStyleWCT() model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.x_embedder.proj, pinv_dtype=style_dtype, dtype=style_dtype) #model.model.diffusion_model.Endojector = Retrojector(model.model.diffusion_model.final_layer.linear, pinv_dtype=style_dtype, dtype=style_dtype, ENDO=True) #model.model.diffusion_model.Style = StyleMMDiT_HiDream() #model.model.diffusion_model.Style.Retrojector = Retrojector(model.model.diffusion_model.x_embedder.proj, pinv_dtype=style_dtype, dtype=style_dtype) sort_buffer = {} if (enable or force) and model.model.diffusion_model.__class__ == HiDreamImageTransformer2DModel: m = model.clone() m.model.diffusion_model.__class__ = HDModel m.model.diffusion_model.threshold_inv = False m.model.diffusion_model.final_layer.__class__ = HDLastLayer m.model.diffusion_model.final_layer.linear.weight.data = m.model.diffusion_model.final_layer.linear.weight.data.to(torch.bfloat16) m.model.diffusion_model.final_layer.linear.bias.data = m.model.diffusion_model.final_layer.linear.bias.data.to(torch.bfloat16) for i, block in enumerate(m.model.diffusion_model.double_stream_blocks): block.__class__ = HDBlock if i in double_stream_blocks: block.block.__class__ = HDBlockDouble else: block.block.__class__ = HDBlockDoubleNoMask block.block.attn1.__class__ = HDAttention block.block.ff_i.__class__ = HDMOEFeedForwardSwiGLU block.block.ff_i.shared_experts.__class__ = HDFeedForwardSwiGLU for j in range(len(block.block.ff_i.experts)): block.block.ff_i.experts[j].__class__ = HDFeedForwardSwiGLU block.block.ff_i.gate.__class__ = HDMoEGate block.block.ff_t.__class__ = HDFeedForwardSwiGLU block.block.attn1.single_stream = False block.block.attn1.double_stream = True block.block.sort_buffer = sort_buffer block.block.attn1.sort_buffer = sort_buffer block.idx = i block.block.idx = i block.block.attn1.idx = i for i, block in enumerate(m.model.diffusion_model.single_stream_blocks): block.__class__ = HDBlock if i in single_stream_blocks: block.block.__class__ = HDBlockSingle else: block.block.__class__ = HDBlockSingleNoMask block.block.attn1.__class__ = HDAttention block.block.ff_i.__class__ = HDMOEFeedForwardSwiGLU block.block.ff_i.shared_experts.__class__ = HDFeedForwardSwiGLU for j in range(len(block.block.ff_i.experts)): block.block.ff_i.experts[j].__class__ = HDFeedForwardSwiGLU block.block.ff_i.gate.__class__ = HDMoEGate block.block.attn1.single_stream = True block.block.attn1.double_stream = False block.block.sort_buffer = sort_buffer block.block.attn1.sort_buffer = sort_buffer block.idx = i block.block.idx = i block.block.attn1.idx = i elif not enable and model.model.diffusion_model.__class__ == HDModel: m = model.clone() m.model.diffusion_model.__class__ = HiDreamImageTransformer2DModel for i, block in enumerate(m.model.diffusion_model.double_stream_blocks): if i in double_stream_blocks: block.__class__ = HiDreamImageBlock block.block.__class__ = HiDreamImageTransformerBlock block.block.attn1.__class__ = HiDreamAttention block.idx = i for i, block in enumerate(m.model.diffusion_model.single_stream_blocks): if i in single_stream_blocks: block.__class__ = HiDreamImageBlock block.block.__class__ = HiDreamImageSingleTransformerBlock block.block.attn1.__class__ = HiDreamAttention block.idx = i #elif model.model.diffusion_model.__class__ != HDModel and model.model.diffusion_model.__class__ != HiDreamImageTransformer2DModel: elif model.model.diffusion_model.__class__ not in {HDModel, HiDreamImageTransformer2DModel}: raise ValueError("This node is for enabling regional conditioning for HiDream only!") else: m = model return (m,) class ReHiDreamPatcher(ReHiDreamPatcherAdvanced): @classmethod def INPUT_TYPES(cls): return { "required": { "model" : ("MODEL",), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } def main(self, model, style_dtype="default", enable=True, force=False): return super().main( model = model, double_stream_blocks = "all", single_stream_blocks = "all", style_dtype = style_dtype, enable = enable, force = force ) class ReJointBlockNoMask(ReJointBlock): def forward(self, c, mask=None): return super().forward(c, mask=None) class ReSD35PatcherAdvanced: @classmethod def INPUT_TYPES(s): return { "required": { "model" : ("MODEL",), "joint_blocks" : ("STRING", {"default": "all", "multiline": True}), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" def main(self, model, joint_blocks, style_dtype, enable=True, force=False): style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None model.model.diffusion_model.style_dtype = style_dtype model.model.diffusion_model.proj_weights = None model.model.diffusion_model.y0_adain_embed = None model.model.diffusion_model.StyleWCT = StyleWCT() model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.x_embedder.proj, pinv_dtype=style_dtype, dtype=style_dtype) joint_blocks = parse_range_string(joint_blocks) if (enable or force) and model.model.diffusion_model.__class__ == OpenAISignatureMMDITWrapper: m = model.clone() m.model.diffusion_model.__class__ = ReOpenAISignatureMMDITWrapper m.model.diffusion_model.threshold_inv = False for i, block in enumerate(m.model.diffusion_model.joint_blocks): if i in joint_blocks: block.__class__ = ReJointBlock else: ReJointBlockNoMask block.idx = i elif not enable and model.model.diffusion_model.__class__ == ReOpenAISignatureMMDITWrapper: m = model.clone() m.model.diffusion_model.__class__ = OpenAISignatureMMDITWrapper for i, block in enumerate(m.model.diffusion_model.joint_blocks): block.__class__ = JointBlock block.idx = i elif model.model.diffusion_model.__class__ not in {ReOpenAISignatureMMDITWrapper, OpenAISignatureMMDITWrapper}: raise ValueError("This node is for enabling regional conditioning for SD3.5 only!") m = model return (m,) class ReSD35Patcher(ReSD35PatcherAdvanced): @classmethod def INPUT_TYPES(cls): return { "required": { "model" : ("MODEL",), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } def main(self, model, style_dtype="float32", enable=True, force=False): return super().main( model = model, joint_blocks = "all", style_dtype = style_dtype, enable = enable, force = force ) class ReDoubleAttentionNoMask(ReDoubleAttention): def forward(self, c, mask=None): return super().forward(c, mask=None) class ReSingleAttentionNoMask(ReSingleAttention): def forward(self, c, mask=None): return super().forward(c, mask=None) class ReAuraPatcherAdvanced: @classmethod def INPUT_TYPES(s): return { "required": { "model" : ("MODEL",), "doublelayer_blocks" : ("STRING", {"default": "all", "multiline": True}), "singlelayer_blocks" : ("STRING", {"default": "all", "multiline": True}), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" def main(self, model, doublelayer_blocks, singlelayer_blocks, style_dtype, enable=True, force=False): doublelayer_blocks = parse_range_string(doublelayer_blocks) singlelayer_blocks = parse_range_string(singlelayer_blocks) style_dtype = getattr(torch, style_dtype) if style_dtype != "default" else None model.model.diffusion_model.style_dtype = style_dtype model.model.diffusion_model.proj_weights = None model.model.diffusion_model.y0_adain_embed = None model.model.diffusion_model.StyleWCT = StyleWCT() model.model.diffusion_model.Retrojector = Retrojector(model.model.diffusion_model.init_x_linear, pinv_dtype=style_dtype, dtype=style_dtype) if (enable or force) and model.model.diffusion_model.__class__ == MMDiT: m = model.clone() m.model.diffusion_model.__class__ = ReMMDiT m.model.diffusion_model.threshold_inv = False for i, block in enumerate(m.model.diffusion_model.double_layers): block.__class__ = ReMMDiTBlock if i in doublelayer_blocks: block.attn.__class__ = ReDoubleAttention else: block.attn.__class__ = ReDoubleAttentionNoMask block.idx = i for i, block in enumerate(m.model.diffusion_model.single_layers): block.__class__ = ReDiTBlock if i in singlelayer_blocks: block.attn.__class__ = ReSingleAttention else: block.attn.__class__ = ReSingleAttentionNoMask block.idx = i elif not enable and model.model.diffusion_model.__class__ == ReMMDiT: m = model.clone() m.model.diffusion_model.__class__ = MMDiT for i, block in enumerate(m.model.diffusion_model.double_layers): block.__class__ = MMDiTBlock block.attn.__class__ = DoubleAttention block.idx = i for i, block in enumerate(m.model.diffusion_model.single_layers): block.__class__ = DiTBlock block.attn.__class__ = SingleAttention block.idx = i elif model.model.diffusion_model.__class__ not in {ReMMDiT, MMDiT}: raise ValueError("This node is for enabling regional conditioning for AuraFlow only!") m = model return (m,) class ReAuraPatcher(ReAuraPatcherAdvanced): @classmethod def INPUT_TYPES(cls): return { "required": { "model" : ("MODEL",), "style_dtype" : (["default", "bfloat16", "float16", "float32", "float64"], {"default": "float64"}), "enable" : ("BOOLEAN", {"default": True}), } } def main(self, model, style_dtype="float32", enable=True, force=False): return super().main( model = model, doublelayer_blocks = "all", singlelayer_blocks = "all", style_dtype = style_dtype, enable = enable, force = force ) class FluxOrthoCFGPatcher: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "enable": ("BOOLEAN", {"default": True}), "ortho_T5": ("BOOLEAN", {"default": True}), "ortho_clip_L": ("BOOLEAN", {"default": True}), "zero_clip_L": ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) CATEGORY = "RES4LYF/model_patches" FUNCTION = "main" EXPERIMENTAL = True original_forward = Flux.forward @staticmethod def new_forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs): for _ in range(500): if self.ortho_T5 and get_cosine_similarity(context[0], context[1]) != 0: context[0] = get_orthogonal(context[0], context[1]) if self.ortho_clip_L and get_cosine_similarity(y[0], y[1]) != 0: y[0] = get_orthogonal(y[0].unsqueeze(0), y[1].unsqueeze(0)).squeeze(0) RESplain("postcossim1: ", get_cosine_similarity(context[0], context[1])) RESplain("postcossim2: ", get_cosine_similarity(y[0], y[1])) if self.zero_clip_L: y[0] = torch.zeros_like(y[0]) return FluxOrthoCFGPatcher.original_forward(self, x, timestep, context, y, guidance, control, transformer_options, **kwargs) def main(self, model, enable=True, ortho_T5=True, ortho_clip_L=True, zero_clip_L=True): m = model.clone() if enable: m.model.diffusion_model.ortho_T5 = ortho_T5 m.model.diffusion_model.ortho_clip_L = ortho_clip_L m.model.diffusion_model.zero_clip_L = zero_clip_L Flux.forward = types.MethodType(FluxOrthoCFGPatcher.new_forward, m.model.diffusion_model) else: Flux.forward = FluxOrthoCFGPatcher.original_forward return (m,) class FluxGuidanceDisable: @classmethod def INPUT_TYPES(s): return { "required": { "model": ("MODEL",), "disable": ("BOOLEAN", {"default": True}), "zero_clip_L": ("BOOLEAN", {"default": True}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) FUNCTION = "main" CATEGORY = "RES4LYF/model_patches" original_forward = Flux.forward @staticmethod def new_forward(self, x, timestep, context, y, guidance, control=None, transformer_options={}, **kwargs): y = torch.zeros_like(y) return FluxGuidanceDisable.original_forward(self, x, timestep, context, y, guidance, control, transformer_options, **kwargs) def main(self, model, disable=True, zero_clip_L=True): m = model.clone() if disable: m.model.diffusion_model.params.guidance_embed = False else: m.model.diffusion_model.params.guidance_embed = True #m.model.diffusion_model.zero_clip_L = zero_clip_L if zero_clip_L: Flux.forward = types.MethodType(FluxGuidanceDisable.new_forward, m.model.diffusion_model) return (m,) class ModelSamplingAdvanced: # this is used to set the "shift" using either exponential scaling (default for SD3.5M and Flux) or linear scaling (default for SD3.5L and SD3 2B beta) @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "scaling": (["exponential", "linear"], {"default": 'exponential'}), "shift": ("FLOAT", {"default": 3.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) FUNCTION = "main" CATEGORY = "RES4LYF/model_shift" def sigma_exponential(self, timestep): return time_snr_shift_exponential(self.timestep_shift, timestep / self.multiplier) def sigma_linear(self, timestep): return time_snr_shift_linear(self.timestep_shift, timestep / self.multiplier) def main(self, model, scaling, shift): m = model.clone() self.timestep_shift = shift self.multiplier = 1000 timesteps = 1000 sampling_base = None if isinstance(m.model.model_config, comfy.supported_models.Flux) or isinstance(m.model.model_config, comfy.supported_models.FluxSchnell) or isinstance(m.model.model_config, comfy.supported_models.Chroma): self.multiplier = 1 timesteps = 10000 sampling_base = comfy.model_sampling.ModelSamplingFlux sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.AuraFlow): self.multiplier = 1 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.SD3): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.HiDream): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.HunyuanVideo): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST if isinstance(m.model.model_config, comfy.supported_models.WAN21_T2V) or isinstance(m.model.model_config, comfy.supported_models.WAN21_I2V): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.CosmosT2V) or isinstance(m.model.model_config, comfy.supported_models.CosmosI2V): self.multiplier = 1 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingContinuousEDM sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.LTXV): self.multiplier = 1000 # incorrect? timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingFlux sampling_type = comfy.model_sampling.CONST if sampling_base is None: raise ValueError("Model not supported by ModelSamplingAdvanced") class ModelSamplingAdvanced(sampling_base, sampling_type): pass m.object_patches['model_sampling'] = m.model.model_sampling = ModelSamplingAdvanced(m.model.model_config) m.model.model_sampling.__dict__['shift'] = self.timestep_shift m.model.model_sampling.__dict__['multiplier'] = self.multiplier s_range = torch.arange(1, timesteps + 1, 1).to(torch.float64) if scaling == "exponential": ts = self.sigma_exponential((s_range / timesteps) * self.multiplier) elif scaling == "linear": ts = self.sigma_linear((s_range / timesteps) * self.multiplier) m.model.model_sampling.register_buffer('sigmas', ts) m.object_patches['model_sampling'].sigmas = m.model.model_sampling.sigmas return (m,) class ModelSamplingAdvancedResolution: # this is used to set the "shift" using either exponential scaling (default for SD3.5M and Flux) or linear scaling (default for SD3.5L and SD3 2B beta) @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "scaling": (["exponential", "linear"], {"default": 'exponential'}), "max_shift": ("FLOAT", {"default": 1.35, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), "base_shift": ("FLOAT", {"default": 0.85, "min": -100.0, "max": 100.0, "step":0.01, "round": False}), "latent_image": ("LATENT",), } } RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) FUNCTION = "main" CATEGORY = "RES4LYF/model_shift" def sigma_exponential(self, timestep): return time_snr_shift_exponential(self.timestep_shift, timestep / self.multiplier) def sigma_linear(self, timestep): return time_snr_shift_linear(self.timestep_shift, timestep / self.multiplier) def main(self, model, scaling, max_shift, base_shift, latent_image): m = model.clone() height, width = latent_image['samples'].shape[-2:] frames = latent_image['samples'].shape[-3] if latent_image['samples'].ndim == 5 else 1 x1 = 256 x2 = 4096 mm = (max_shift - base_shift) / (x2 - x1) b = base_shift - mm * x1 shift = (1 * width * height / (8 * 8 * 2 * 2)) * mm + b self.timestep_shift = shift self.multiplier = 1000 timesteps = 1000 if isinstance(m.model.model_config, comfy.supported_models.Flux) or isinstance(m.model.model_config, comfy.supported_models.FluxSchnell) or isinstance(m.model.model_config, comfy.supported_models.Chroma): self.multiplier = 1 timesteps = 10000 sampling_base = comfy.model_sampling.ModelSamplingFlux sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.AuraFlow): self.multiplier = 1 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.SD3): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.HiDream): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.HunyuanVideo): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST if isinstance(m.model.model_config, comfy.supported_models.WAN21_T2V) or isinstance(m.model.model_config, comfy.supported_models.WAN21_I2V): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.CosmosT2V) or isinstance(m.model.model_config, comfy.supported_models.CosmosI2V): self.multiplier = 1 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingContinuousEDM sampling_type = comfy.model_sampling.CONST elif isinstance(m.model.model_config, comfy.supported_models.LTXV): self.multiplier = 1000 timesteps = 1000 sampling_base = comfy.model_sampling.ModelSamplingFlux sampling_type = comfy.model_sampling.CONST class ModelSamplingAdvanced(sampling_base, sampling_type): pass m.object_patches['model_sampling'] = m.model.model_sampling = ModelSamplingAdvanced(m.model.model_config) m.model.model_sampling.__dict__['shift'] = self.timestep_shift m.model.model_sampling.__dict__['multiplier'] = self.multiplier s_range = torch.arange(1, timesteps + 1, 1).to(torch.float64) if scaling == "exponential": ts = self.sigma_exponential((s_range / timesteps) * self.multiplier) elif scaling == "linear": ts = self.sigma_linear((s_range / timesteps) * self.multiplier) m.model.model_sampling.register_buffer('sigmas', ts) m.object_patches['model_sampling'].sigmas = m.model.model_sampling.sigmas return (m,) # Code adapted from https://github.com/comfyanonymous/ComfyUI/ class UNetSave: def __init__(self): self.output_dir = folder_paths.get_output_directory() @classmethod def INPUT_TYPES(s): return { "required": { "model": ("MODEL",), "filename_prefix": ("STRING", {"default": "models/ComfyUI"}), }, "hidden": { "prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO" }, } RETURN_TYPES = () FUNCTION = "save" OUTPUT_NODE = True CATEGORY = "RES4LYF/model_merging" DESCRIPTION = "Save a .safetensors containing only the model data." def save(self, model, filename_prefix, prompt=None, extra_pnginfo=None): save_checkpoint( model, clip = None, vae = None, filename_prefix = filename_prefix, output_dir = self.output_dir, prompt = prompt, extra_pnginfo = extra_pnginfo, ) return {} def save_checkpoint( model, clip = None, vae = None, clip_vision = None, filename_prefix = None, output_dir = None, prompt = None, extra_pnginfo = None, ): full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, output_dir) prompt_info = "" if prompt is not None: prompt_info = json.dumps(prompt) metadata = {} enable_modelspec = True if isinstance(model.model, comfy.model_base.SDXL): if isinstance(model.model, comfy.model_base.SDXL_instructpix2pix): metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-edit" else: metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-base" elif isinstance(model.model, comfy.model_base.SDXLRefiner): metadata["modelspec.architecture"] = "stable-diffusion-xl-v1-refiner" elif isinstance(model.model, comfy.model_base.SVD_img2vid): metadata["modelspec.architecture"] = "stable-video-diffusion-img2vid-v1" elif isinstance(model.model, comfy.model_base.SD3): metadata["modelspec.architecture"] = "stable-diffusion-v3-medium" #TODO: other SD3 variants else: enable_modelspec = False if enable_modelspec: metadata["modelspec.sai_model_spec"] = "1.0.0" metadata["modelspec.implementation"] = "sgm" metadata["modelspec.title"] = "{} {}".format(filename, counter) #TODO: # "stable-diffusion-v1", "stable-diffusion-v1-inpainting", "stable-diffusion-v2-512", # "stable-diffusion-v2-768-v", "stable-diffusion-v2-unclip-l", "stable-diffusion-v2-unclip-h", # "v2-inpainting" extra_keys = {} model_sampling = model.get_model_object("model_sampling") if isinstance(model_sampling, comfy.model_sampling.ModelSamplingContinuousEDM): if isinstance(model_sampling, comfy.model_sampling.V_PREDICTION): extra_keys["edm_vpred.sigma_max"] = torch.tensor(model_sampling.sigma_max).float() extra_keys["edm_vpred.sigma_min"] = torch.tensor(model_sampling.sigma_min).float() if model.model.model_type == comfy.model_base.ModelType.EPS: metadata["modelspec.predict_key"] = "epsilon" elif model.model.model_type == comfy.model_base.ModelType.V_PREDICTION: metadata["modelspec.predict_key"] = "v" if not args.disable_metadata: metadata["prompt"] = prompt_info if extra_pnginfo is not None: for x in extra_pnginfo: metadata[x] = json.dumps(extra_pnginfo[x]) output_checkpoint = f"{filename}_{counter:05}_.safetensors" output_checkpoint = os.path.join(full_output_folder, output_checkpoint) sd_save_checkpoint(output_checkpoint, model, clip, vae, clip_vision, metadata=metadata, extra_keys=extra_keys) def sd_save_checkpoint(output_path, model, clip=None, vae=None, clip_vision=None, metadata=None, extra_keys={}): clip_sd = None load_models = [model] if clip is not None: load_models.append(clip.load_model()) clip_sd = clip.get_sd() comfy.model_management.load_models_gpu(load_models, force_patch_weights=True) clip_vision_sd = clip_vision.get_sd() if clip_vision is not None else None vae_sd = vae.get_sd() if vae is not None else None #THIS ALLOWS SAVING UNET ONLY sd = model.model.state_dict_for_saving(clip_sd, vae_sd, clip_vision_sd) for k in extra_keys: sd[k] = extra_keys[k] for k in sd: t = sd[k] if not t.is_contiguous(): sd[k] = t.contiguous() comfy.utils.save_torch_file(sd, output_path, metadata=metadata) # Code adapted from https://github.com/kijai/ComfyUI-KJNodes class TorchCompileModelFluxAdvanced: def __init__(self): self._compiled = False @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "backend": (["inductor", "cudagraphs"],), "fullgraph": ("BOOLEAN", {"default": False, "tooltip": "Enable full graph mode"}), "mode": (["default", "max-autotune", "max-autotune-no-cudagraphs", "reduce-overhead"], {"default": "default"}), "double_blocks": ("STRING", {"default": "0-18", "multiline": True}), "single_blocks": ("STRING", {"default": "0-37", "multiline": True}), "dynamic": ("BOOLEAN", {"default": False, "tooltip": "Enable dynamic mode"}), }} RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) FUNCTION = "main" CATEGORY = "RES4LYF/model_patches" def parse_blocks(self, blocks_str): blocks = [] for part in blocks_str.split(','): part = part.strip() if '-' in part: start, end = map(int, part.split('-')) blocks.extend(range(start, end + 1)) else: blocks.append(int(part)) return blocks def main(self, model, backend = "inductor", mode = "default", fullgraph = False, single_blocks = "0-37", double_blocks = "0-18", dynamic = False, ): single_block_list = self.parse_blocks(single_blocks) double_block_list = self.parse_blocks(double_blocks) m = model.clone() diffusion_model = m.get_model_object("diffusion_model") if not self._compiled: try: for i, block in enumerate(diffusion_model.double_blocks): if i in double_block_list: m.add_object_patch(f"diffusion_model.double_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) for i, block in enumerate(diffusion_model.single_blocks): if i in single_block_list: m.add_object_patch(f"diffusion_model.single_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True compile_settings = { "backend": backend, "mode": mode, "fullgraph": fullgraph, "dynamic": dynamic, } setattr(m.model, "compile_settings", compile_settings) except: raise RuntimeError("Failed to compile model. Verify that this is a Flux model!") return (m, ) # rest of the layers that are not patched # diffusion_model.final_layer = torch.compile(diffusion_model.final_layer, mode=mode, fullgraph=fullgraph, backend=backend) # diffusion_model.guidance_in = torch.compile(diffusion_model.guidance_in, mode=mode, fullgraph=fullgraph, backend=backend) # diffusion_model.img_in = torch.compile(diffusion_model.img_in, mode=mode, fullgraph=fullgraph, backend=backend) # diffusion_model.time_in = torch.compile(diffusion_model.time_in, mode=mode, fullgraph=fullgraph, backend=backend) # diffusion_model.txt_in = torch.compile(diffusion_model.txt_in, mode=mode, fullgraph=fullgraph, backend=backend) # diffusion_model.vector_in = torch.compile(diffusion_model.vector_in, mode=mode, fullgraph=fullgraph, backend=backend) # @torch.compile(mode="default", dynamic=False, fullgraph=False, backend="inductor") class TorchCompileModelAura: def __init__(self): self._compiled = False @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "backend": (["inductor", "cudagraphs"],), "fullgraph": ("BOOLEAN", {"default": False, "tooltip": "Enable full graph mode"}), "mode": (COMPILE_MODES , {"default": "default"}), "dynamic": ("BOOLEAN", {"default": False, "tooltip": "Enable dynamic mode"}), "dynamo_cache_size_limit": ("INT", {"default": 64, "min": 0, "max": 1024, "step": 1, "tooltip": "torch._dynamo.config.cache_size_limit"}), }} RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) FUNCTION = "main" CATEGORY = "RES4LYF/model_patches" def main(self, model, backend = "inductor", mode = "default", fullgraph = False, dynamic = False, dynamo_cache_size_limit = 64, ): m = model.clone() diffusion_model = m.get_model_object("diffusion_model") torch._dynamo.config.cache_size_limit = dynamo_cache_size_limit if not self._compiled: try: for i, block in enumerate(diffusion_model.double_layers): m.add_object_patch(f"diffusion_model.double_layers.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) for i, block in enumerate(diffusion_model.single_layers): m.add_object_patch(f"diffusion_model.single_layers.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True compile_settings = { "backend": backend, "mode": mode, "fullgraph": fullgraph, "dynamic": dynamic, } setattr(m.model, "compile_settings", compile_settings) except: raise RuntimeError("Failed to compile model. Verify that this is an AuraFlow model!") return (m, ) class TorchCompileModelSD35: def __init__(self): self._compiled = False @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "backend": (["inductor", "cudagraphs"],), "fullgraph": ("BOOLEAN", {"default": False, "tooltip": "Enable full graph mode"}), "mode": (COMPILE_MODES , {"default": "default"}), "dynamic": ("BOOLEAN", {"default": False, "tooltip": "Enable dynamic mode"}), "dynamo_cache_size_limit": ("INT", {"default": 64, "min": 0, "max": 1024, "step": 1, "tooltip": "torch._dynamo.config.cache_size_limit"}), }} RETURN_TYPES = ("MODEL",) RETURN_NAMES = ("model",) FUNCTION = "main" CATEGORY = "RES4LYF/model_patches" def main(self, model, backend = "inductor", mode = "default", fullgraph = False, dynamic = False, dynamo_cache_size_limit = 64, ): m = model.clone() diffusion_model = m.get_model_object("diffusion_model") torch._dynamo.config.cache_size_limit = dynamo_cache_size_limit if not self._compiled: try: for i, block in enumerate(diffusion_model.joint_blocks): m.add_object_patch(f"diffusion_model.joint_blocks.{i}", torch.compile(block, mode=mode, dynamic=dynamic, fullgraph=fullgraph, backend=backend)) self._compiled = True compile_settings = { "backend" : backend, "mode" : mode, "fullgraph": fullgraph, "dynamic" : dynamic, } setattr(m.model, "compile_settings", compile_settings) except: raise RuntimeError("Failed to compile model. Verify that this is a SD3.5 model!") return (m, ) class ClownpileModelWanVideo: def __init__(self): self._compiled = False @classmethod def INPUT_TYPES(s): return { "required": { "model" : ("MODEL",), "backend" : (["inductor","cudagraphs"], {"default" : "inductor"}), "fullgraph" : ("BOOLEAN", {"default" : False, "tooltip" : "Enable full graph mode"}), "mode" : (COMPILE_MODES, {"default": "default"}), "dynamic" : ("BOOLEAN", {"default" : False, "tooltip" : "Enable dynamic mode"}), "dynamo_cache_size_limit" : ("INT", {"default" : 64, "min" : 0, "max": 1024, "step": 1, "tooltip": "torch._dynamo.config.cache_size_limit"}), #"compile_self_attn_blocks" : ("INT", {"default" : 0, "min" : 0, "max": 100, "step" : 1, "tooltip": "Maximum blocks to compile. These use huge amounts of VRAM with large attention masks."}), "skip_self_attn_blocks" : ("STRING", {"default" : "0,1,2,3,4,5,6,7,8,9,", "multiline": True, "tooltip": "For WAN only: select self-attn blocks to disable. Due to the size of the self-attn masks, VRAM required to compile blocks using regional WAN is excessive. List any blocks selected in the ReWanPatcher node."}), "compile_transformer_blocks": ("BOOLEAN", {"default" : True, "tooltip" : "Compile all transformer blocks"}), "force_recompile" : ("BOOLEAN", {"default": False, "tooltip": "Force recompile."}), }, } RETURN_TYPES = ("MODEL",) FUNCTION = "patch" CATEGORY = "RES4LYF/model" EXPERIMENTAL = True def patch(self, model, backend, fullgraph, mode, dynamic, dynamo_cache_size_limit, skip_self_attn_blocks, compile_transformer_blocks, force_recompile): m = model.clone() diffusion_model = m.get_model_object("diffusion_model") torch._dynamo.config.cache_size_limit = dynamo_cache_size_limit skip_self_attn_blocks = parse_range_string(skip_self_attn_blocks) if force_recompile: self._compiled = False if not self._compiled: try: if compile_transformer_blocks: for i, block in enumerate(diffusion_model.blocks): #if i % 2 == 1: if i not in skip_self_attn_blocks: compiled_block = torch.compile(block, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode) m.add_object_patch(f"diffusion_model.blocks.{i}", compiled_block) #block.self_attn = torch.compile(block.self_attn, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode) #block.cross_attn = torch.compile(block.cross_attn, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode) #if i < compile_self_attn_blocks: # block.self_attn = torch.compile(block.self_attn, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode) # #compiled_block = torch.compile(block, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode) # #m.add_object_patch(f"diffusion_model.blocks.{i}", compiled_block) #block.cross_attn = torch.compile(block.cross_attn, fullgraph=fullgraph, dynamic=dynamic, backend=backend, mode=mode) self._compiled = True compile_settings = { "backend": backend, "mode": mode, "fullgraph": fullgraph, "dynamic": dynamic, } setattr(m.model, "compile_settings", compile_settings) except: raise RuntimeError("Failed to compile model. Verify that this is a WAN model!") return (m, ) ================================================ FILE: nodes_latents.py ================================================ import torch.nn.functional as F import copy import comfy.samplers import comfy.sample import comfy.sampler_helpers import comfy.utils import itertools import torch import math from nodes import MAX_RESOLUTION #MAX_RESOLUTION=8192 from .helper import ExtraOptions, initialize_or_scale, extra_options_flag, get_extra_options_list from .latents import latent_meancenter_channels, latent_stdize_channels, get_edge_mask, apply_to_state_info_tensors from .beta.noise_classes import NOISE_GENERATOR_NAMES, NOISE_GENERATOR_CLASSES, prepare_noise def fp_or(tensor1, tensor2): return torch.maximum(tensor1, tensor2) def fp_and(tensor1, tensor2): return torch.minimum(tensor1, tensor2) class AdvancedNoise: @classmethod def INPUT_TYPES(cls): return { "required":{ "alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}), "k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}), "noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), "noise_type": (NOISE_GENERATOR_NAMES, ), }, } RETURN_TYPES = ("NOISE",) FUNCTION = "get_noise" CATEGORY = "RES4LYF/noise" def get_noise(self, noise_seed, noise_type, alpha, k): return (Noise_RandomNoise(noise_seed, noise_type, alpha, k),) class Noise_RandomNoise: def __init__(self, seed, noise_type, alpha, k): self.seed = seed self.noise_type = noise_type self.alpha = alpha self.k = k def generate_noise(self, input_latent): latent_image = input_latent["samples"] batch_inds = input_latent["batch_index"] if "batch_index" in input_latent else None return prepare_noise(latent_image, self.seed, self.noise_type, batch_inds, self.alpha, self.k) class LatentNoised: @classmethod def INPUT_TYPES(cls): return {"required": { "add_noise": ("BOOLEAN", {"default": True}), "noise_is_latent": ("BOOLEAN", {"default": False}), "noise_type": (NOISE_GENERATOR_NAMES, ), "alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.1, "round": 0.01}), "k": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":2.0, "round": 0.01}), "noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), "latent_image": ("LATENT", ), "noise_strength": ("FLOAT", {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.01, "round": 0.01}), "normalize": (["false", "true"], {"default": "false"}), }, "optional": { "latent_noise": ("LATENT", ), "mask": ("MASK", ), } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent_noised",) FUNCTION = "main" CATEGORY = "RES4LYF/noise" def main(self, add_noise, noise_is_latent, noise_type, noise_seed, alpha, k, latent_image, noise_strength, normalize, latent_noise = None, mask = None ): latent_out = latent_image.copy() samples = latent_out["samples"].clone() torch.manual_seed(noise_seed) if not add_noise: noise = torch.zeros(samples.size(), dtype=samples.dtype, layout=samples.layout, device="cpu") elif latent_noise is None: batch_inds = latent_out["batch_index"] if "batch_index" in latent_out else None noise = prepare_noise(samples, noise_seed, noise_type, batch_inds, alpha, k) else: noise = latent_noise["samples"] if normalize == "true": latent_mean = samples.mean() latent_std = samples.std() noise = noise * latent_std + latent_mean if noise_is_latent: noise += samples.cpu() noise.sub_(noise.mean()).div_(noise.std()) noise = noise * noise_strength if mask is not None: if len(samples.shape) == 5: b, c, t, h, w = samples.shape mask_resized = F.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(h, w), mode="bilinear") if mask_resized.shape[0] < b: mask_resized = mask_resized.repeat((b - 1) // mask_resized.shape[0] + 1, 1, 1, 1)[:b] elif mask_resized.shape[0] > b: mask_resized = mask_resized[:b] mask_expanded = mask_resized.expand((-1, c, -1, -1)) mask_temporal = mask_expanded.unsqueeze(2).expand(-1, -1, t, -1, -1).to(samples.device) noise = mask_temporal * noise + (1 - mask_temporal) * torch.zeros_like(noise) else: mask = F.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(samples.shape[2], samples.shape[3]), mode="bilinear") mask = mask.expand((-1, samples.shape[1], -1, -1)).to(samples.device) if mask.shape[0] < samples.shape[0]: mask = mask.repeat((samples.shape[0] - 1) // mask.shape[0] + 1, 1, 1, 1)[:samples.shape[0]] elif mask.shape[0] > samples.shape[0]: mask = mask[:samples.shape[0]] noise = mask * noise + (1 - mask) * torch.zeros_like(noise) latent_out["samples"] = samples.cpu() + noise return (latent_out,) class LatentNoiseList: @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT",), "alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "k_flip": ("BOOLEAN", {"default": False}), "steps": ("INT", {"default": 0, "min": -10000, "max": 10000}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }, "optional": { "alphas": ("SIGMAS", ), "ks": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent_list",) OUTPUT_IS_LIST = (True,) FUNCTION = "main" CATEGORY = "RES4LYF/noise" def main(self, seed, latent, alpha, k_flip, steps, alphas = None, ks = None ): alphas = initialize_or_scale(alphas, alpha, steps) k_flip = -1 if k_flip else 1 ks = initialize_or_scale(ks, k_flip, steps) latent_samples = latent["samples"] latents = [] size = latent_samples.shape steps = len(alphas) if steps == 0 else steps noise_sampler = NOISE_GENERATOR_CLASSES.get('fractal')(x=latent_samples, seed=seed) for i in range(steps): noise = noise_sampler(alpha=alphas[i].item(), k=ks[i].item(), scale=0.1) noisy_latent = latent_samples + noise new_latent = {"samples": noisy_latent} latents.append(new_latent) return (latents, ) class MaskToggle: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "enable": ("BOOLEAN", {"default": True}), "mask": ("MASK", ), }, } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("mask",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" def main(self, enable=True, mask=None): if enable == False: mask = None return (mask, ) class latent_to_raw_x: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT", ), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent_raw_x",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, latent,): if 'state_info' not in latent: latent['state_info'] = {} latent['state_info']['raw_x'] = latent['samples'].to(torch.float64) return (latent,) # Adapted from https://github.com/comfyanonymous/ComfyUI/blob/5ee381c058d606209dcafb568af20196e7884fc8/comfy_extras/nodes_wan.py class TrimVideoLatent_state_info: @classmethod def INPUT_TYPES(s): return {"required": {"samples": ("LATENT",), "trim_amount": ("INT", {"default": 0, "min": 0, "max": 99999}), }} RETURN_TYPES = ("LATENT",) FUNCTION = "op" CATEGORY = "RES4LYF/latents" EXPERIMENTAL = True @staticmethod def _trim_tensor(tensor, trim_amount): """Trim frames from beginning of tensor along temporal dimension (-3)""" if tensor.shape[-3] > trim_amount: return tensor.narrow(-3, trim_amount, tensor.shape[-3] - trim_amount) return tensor def op(self, samples, trim_amount): ref_shape = samples["samples"].shape samples_out = apply_to_state_info_tensors(samples, ref_shape, self._trim_tensor, trim_amount) return (samples_out,) # Adapted from https://github.com/comfyanonymous/ComfyUI/blob/05df2df489f6b237f63c5f7d42a943ae2be417e9/nodes.py class LatentUpscaleBy_state_info: upscale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "bislerp"] @classmethod def INPUT_TYPES(s): return {"required": { "samples": ("LATENT",), "upscale_method": (s.upscale_methods,), "scale_by": ("FLOAT", {"default": 1.5, "min": 0.01, "max": 8.0, "step": 0.01}),}} RETURN_TYPES = ("LATENT",) FUNCTION = "op" CATEGORY = "latent" def _upscale_tensor(tensor, upscale_method, scale_by): width = round(tensor.shape[-1] * scale_by) height = round(tensor.shape[-2] * scale_by) tensor = comfy.utils.common_upscale(tensor, width, height, upscale_method, "disabled") return tensor def op(self, samples, upscale_method, scale_by): ref_shape = samples["samples"].shape samples_out = apply_to_state_info_tensors(samples, ref_shape, self._upscale_tensor, upscale_method, scale_by) return (samples_out,) class latent_clear_state_info: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT", ), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, latent,): latent_out = {} if 'samples' in latent: latent_out['samples'] = latent['samples'] return (latent_out,) class latent_replace_state_info: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT", ), "clear_raw_x": ("BOOLEAN", {"default": False}), "replace_end_step": ("INT", {"default": 0, "min": -10000, "max": 10000}), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, latent, clear_raw_x, replace_end_step): latent_out = copy.deepcopy(latent) if 'state_info' not in latent_out: latent_out['state_info'] = {} if clear_raw_x: latent_out['state_info']['raw_x'] = None if replace_end_step != 0: latent_out['state_info']['end_step'] = replace_end_step return (latent_out,) class latent_display_state_info: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT", ), }, } RETURN_TYPES = ("STRING",) FUNCTION = "execute" CATEGORY = "RES4LYF/latents" OUTPUT_NODE = True def execute(self, latent): text = "" if 'state_info' in latent: for key, value in latent['state_info'].items(): if isinstance(value, torch.Tensor): if value.numel() == 0: value_text = "empty tensor" elif value.numel() == 1: if value.dtype == torch.bool: value_text = f"bool({value.item()})" else: value_text = f"str({value.item():.3f}), dtype: {value.dtype}" else: shape_str = str(list(value.shape)).replace(" ", "") dtype = value.dtype if torch.is_floating_point(value) is False: if value.dtype == torch.bool: value_text = f"shape: {shape_str}, dtype: {dtype}, true: {value.sum().item()}, false: {(~value).sum().item()}" else: max_val = value.float().max().item() min_val = value.float().min().item() value_text = f"shape: {shape_str}, dtype: {dtype}, max: {max_val}, min: {min_val}" else: mean = value.float().mean().item() std = value.float().std().item() value_text = f"shape: {shape_str}, dtype: {dtype}, mean: {mean:.3f}, std: {std:.3f}" else: value_text = str(value) text += f"{key}: {value_text}\n" else: text = "No state info in latent" return {"ui": {"text": text}, "result": (text,)} class latent_transfer_state_info: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent_to": ("LATENT", ), "latent_from": ("LATENT", ), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, latent_to, latent_from): #if 'state_info' not in latent: # latent['state_info'] = {} latent_to['state_info'] = copy.deepcopy(latent_from['state_info']) return (latent_to,) class latent_mean_channels_from_to: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent_to": ("LATENT", ), "latent_from": ("LATENT", ), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, latent_to, latent_from): latent_to['samples'] = latent_to['samples'] - latent_to['samples'].mean(dim=(-2,-1), keepdim=True) + latent_from['samples'].mean(dim=(-2,-1), keepdim=True) return (latent_to,) class latent_get_channel_means: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT", ), }, } RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("channel_means",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, latent): channel_means = latent['samples'].mean(dim=(-2,-1)).squeeze(0) return (channel_means,) class latent_to_cuda: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT", ), "to_cuda": ("BOOLEAN", {"default": True}), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("passthrough",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, latent, to_cuda): match to_cuda: case "True": latent = latent.to('cuda') case "False": latent = latent.to('cpu') return (latent,) class latent_batch: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT", ), "batch_size": ("INT", {"default": 0, "min": -10000, "max": 10000}), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent_batch",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, latent, batch_size): latent = latent["samples"] b, c, h, w = latent.shape batch_latents = torch.zeros([batch_size, 4, h, w], device=latent.device) for i in range(batch_size): batch_latents[i] = latent return ({"samples": batch_latents}, ) class MaskFloatToBoolean: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "mask": ("MASK",), }, "optional": { }, } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("binary_mask",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" def main(self, mask=None,): return (mask.bool().to(mask.dtype),) class MaskEdge: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "dilation": ("INT", {"default": 20, "min": -10000, "max": 10000}), "mode": [["percent", "absolute"], {"default": "percent"}], "internal": ("FLOAT", {"default": 1.0, "min": -1.0, "max": 10000.0, "step": 0.01}), "external": ("FLOAT", {"default": 1.0, "min": -1.0, "max": 10000.0, "step": 0.01}), #"blur": ("BOOLEAN", {"default": False}), "mask": ("MASK",), }, "optional": { }, } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("edge_mask",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" def main(self, dilation=20, mode="percent", internal=1.0, external=1.0, blur=False, mask=None,): mask_dtype = mask.dtype mask = mask.float() if mode == "percent": dilation = (dilation/100) * int(mask.sum() ** 0.5) #if not blur: if int(internal * dilation) > 0: edge_mask_internal = get_edge_mask(mask, int(internal * dilation)) edge_mask_internal = fp_and(edge_mask_internal, mask) else: edge_mask_internal = mask if int(external * dilation) > 0: edge_mask_external = get_edge_mask(mask, int(external * dilation)) edge_mask_external = fp_and(edge_mask_external, 1-mask) else: edge_mask_external = 1-mask edge_mask = fp_or(edge_mask_internal, edge_mask_external) return (edge_mask.to(mask_dtype),) class Frame_Select_Latent_Raw: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "frames": ("IMAGE",), "select": ("INT", {"default": 0, "min": 0, "max": 10000}), }, "optional": { }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, frames=None, select=0): frame = frames['state_info']['raw_x'][:,:,select,:,:].clone().unsqueeze(dim=2) return (frame,) class Frames_Slice_Latent_Raw: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "frames": ("LATENT",), "start": ("INT", {"default": 0, "min": 0, "max": 10000}), "stop": ("INT", {"default": 1, "min": 1, "max": 10000}), }, "optional": { }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, frames=None, start=0, stop=1): frames_slice = frames['state_info']['raw_x'][:,:,start:stop,:,:].clone() return (frames_slice,) class Frames_Concat_Latent_Raw: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "frames_0": ("LATENT",), "frames_1": ("LATENT",), }, "optional": { }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, frames_0, frames_1): frames_concat = torch.cat((frames_0, frames_1), dim=2).clone() return (frames_concat,) class Frame_Select_Latent: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "frames": ("IMAGE",), "select": ("INT", {"default": 0, "min": 0, "max": 10000}), }, "optional": { }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, frames=None, select=0): frame = frames['samples'][:,:,select,:,:].clone().unsqueeze(dim=2) return ({"samples": frame},) class Frames_Slice_Latent: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "frames": ("LATENT",), "start": ("INT", {"default": 0, "min": 0, "max": 10000}), "stop": ("INT", {"default": 1, "min": 1, "max": 10000}), }, "optional": { }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, frames=None, start=0, stop=1): frames_slice = frames['samples'][:,:,start:stop,:,:].clone() return ({"samples": frames_slice},) class Frames_Concat_Latent: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "frames_0": ("LATENT",), "frames_1": ("LATENT",), }, "optional": { }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, frames_0, frames_1): frames_concat = torch.cat((frames_0['samples'], frames_1['samples']), dim=2).clone() return ({"samples": frames_concat},) class Frames_Concat_Masks: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "frames_0": ("MASK",), "frames_1": ("MASK",), }, "optional": { "frames_2": ("MASK",), "frames_3": ("MASK",), "frames_4": ("MASK",), "frames_5": ("MASK",), "frames_6": ("MASK",), "frames_7": ("MASK",), "frames_8": ("MASK",), "frames_9": ("MASK",), }, } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("temporal_mask",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" def main(self, frames_0, frames_1, frames_2=None, frames_3=None, frames_4=None, frames_5=None, frames_6=None, frames_7=None, frames_8=None, frames_9=None): frames_concat = torch.cat((frames_0, frames_1), dim=-3).clone() frames_concat = torch.cat((frames_concat, frames_2), dim=-3).clone() if frames_2 is not None else frames_concat frames_concat = torch.cat((frames_concat, frames_3), dim=-3).clone() if frames_3 is not None else frames_concat frames_concat = torch.cat((frames_concat, frames_4), dim=-3).clone() if frames_4 is not None else frames_concat frames_concat = torch.cat((frames_concat, frames_5), dim=-3).clone() if frames_5 is not None else frames_concat frames_concat = torch.cat((frames_concat, frames_6), dim=-3).clone() if frames_6 is not None else frames_concat frames_concat = torch.cat((frames_concat, frames_7), dim=-3).clone() if frames_7 is not None else frames_concat frames_concat = torch.cat((frames_concat, frames_8), dim=-3).clone() if frames_8 is not None else frames_concat frames_concat = torch.cat((frames_concat, frames_9), dim=-3).clone() if frames_9 is not None else frames_concat if frames_concat.ndim == 3: frames_concat.unsqueeze_(0) return (frames_concat,) class Frames_Masks_Uninterpolate: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "raw_temporal_mask": ("MASK",), "frame_chunk_size" : ("INT", {"default": 4, "min": 1, "max": 10000, "step": 1}), }, "optional": { }, } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("temporal_mask",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" def main(self, raw_temporal_mask, frame_chunk_size): #assert raw_temporal_mask.ndim == 3, "Not a raw temporal mask!" raw_frames = raw_temporal_mask.shape[-3] raw_frames_offset = raw_frames - 1 frames = raw_frames_offset // frame_chunk_size + 1 indices = torch.linspace(0, raw_frames_offset, steps=frames).long() temporal_mask = raw_temporal_mask[...,indices,:,:].unsqueeze(0) return (temporal_mask,) class Frames_Masks_ZeroOut: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "temporal_mask": ("MASK",), "zero_out_frame" : ("INT", {"default": 0, "min": 0, "max": 10000, "step": 1}), }, "optional": { }, } RETURN_TYPES = ("MASK",) RETURN_NAMES = ("temporal_mask",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" def main(self, temporal_mask, zero_out_frame): temporal_mask[...,zero_out_frame:zero_out_frame+1,:,:] = 1.0 return (temporal_mask,) class Frames_Latent_ReverseOrder: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "frames": ("LATENT",), }, "optional": { }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("frames_reversed",) FUNCTION = "main" CATEGORY = "RES4LYF/masks" def main(self, frames,): samples = frames['samples'] flipped_frames = torch.zeros_like(samples) t_len = samples.shape[-3] for i in range(t_len): flipped_frames[:,:,t_len-i-1,:,:] = samples[:,:,i,:,:] return ( {"samples": flipped_frames },) #return ( {"samples": torch.flip(frames['samples'], dims=[-3]) },) class LatentPhaseMagnitude: @classmethod def INPUT_TYPES(cls): return { "required": { "latent_0_batch": ("LATENT",), "latent_1_batch": ("LATENT",), "phase_mix_power": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_mix_power": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "latent_0_normal": ("BOOLEAN", {"default": True}), "latent_1_normal": ("BOOLEAN", {"default": True}), "latent_out_normal": ("BOOLEAN", {"default": True}), "latent_0_stdize": ("BOOLEAN", {"default": True}), "latent_1_stdize": ("BOOLEAN", {"default": True}), "latent_out_stdize": ("BOOLEAN", {"default": True}), "latent_0_meancenter": ("BOOLEAN", {"default": True}), "latent_1_meancenter": ("BOOLEAN", {"default": True}), "latent_out_meancenter": ("BOOLEAN", {"default": True}), }, "optional": { "phase_mix_powers": ("SIGMAS", ), "magnitude_mix_powers": ("SIGMAS", ), "phase_luminositys": ("SIGMAS", ), "phase_cyan_reds": ("SIGMAS", ), "phase_lime_purples": ("SIGMAS", ), "phase_pattern_structures": ("SIGMAS", ), "magnitude_luminositys": ("SIGMAS", ), "magnitude_cyan_reds": ("SIGMAS", ), "magnitude_lime_purples": ("SIGMAS", ), "magnitude_pattern_structures": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_repeat(latent, batch_size): b, c, h, w = latent.shape batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device) for i in range(batch_size): batch_latents[i] = latent return batch_latents @staticmethod def mix_latent_phase_magnitude(latent_0, latent_1, power_phase, power_magnitude, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure, ): dtype = torch.promote_types(latent_0.dtype, latent_1.dtype) # big accuracy problems with fp32 FFT! let's avoid that latent_0 = latent_0.double() latent_1 = latent_1.double() latent_0_fft = torch.fft.fft2(latent_0) latent_1_fft = torch.fft.fft2(latent_1) latent_0_phase = torch.angle(latent_0_fft) latent_1_phase = torch.angle(latent_1_fft) latent_0_magnitude = torch.abs(latent_0_fft) latent_1_magnitude = torch.abs(latent_1_fft) # DC corruption...? handle separately?? #dc_index = (0, 0) #dc_0 = latent_0_fft[:, :, dc_index[0], dc_index[1]] #dc_1 = latent_1_fft[:, :, dc_index[0], dc_index[1]] #mixed_dc = dc_0 * 0.5 + dc_1 * 0.5 #mixed_dc = dc_0 * (1 - phase_weight) + dc_1 * phase_weight # create complex FFT using a weighted mix of phases chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]] chan_weights_magnitude = [w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]] mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) for i in range(4): mixed_phase[:, i] = ( (latent_0_phase[:,i] * (1-chan_weights_phase[i])) ** power_phase + (latent_1_phase[:,i] * chan_weights_phase[i]) ** power_phase) ** (1/power_phase) mixed_magnitude[:, i] = ( (latent_0_magnitude[:,i] * (1-chan_weights_magnitude[i])) ** power_magnitude + (latent_1_magnitude[:,i] * chan_weights_magnitude[i]) ** power_magnitude) ** (1/power_magnitude) new_fft = mixed_magnitude * torch.exp(1j * mixed_phase) #new_fft[:, :, dc_index[0], dc_index[1]] = mixed_dc # inverse FFT to convert back to spatial domain mixed_phase_magnitude = torch.fft.ifft2(new_fft).real return mixed_phase_magnitude.to(dtype) def main(self, #batch_size, latent_1_repeat, latent_0_batch, latent_1_batch, latent_0_normal, latent_1_normal, latent_out_normal, latent_0_stdize, latent_1_stdize, latent_out_stdize, latent_0_meancenter, latent_1_meancenter, latent_out_meancenter, phase_mix_power, magnitude_mix_power, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure, phase_mix_powers = None, magnitude_mix_powers = None, phase_luminositys = None, phase_cyan_reds = None, phase_lime_purples = None, phase_pattern_structures = None, magnitude_luminositys = None, magnitude_cyan_reds = None, magnitude_lime_purples = None, magnitude_pattern_structures = None ): latent_0_batch = latent_0_batch["samples"].double() latent_1_batch = latent_1_batch["samples"].double().to(latent_0_batch.device) #if batch_size == 0: batch_size = latent_0_batch.shape[0] if latent_1_batch.shape[0] == 1: latent_1_batch = self.latent_repeat(latent_1_batch, batch_size) magnitude_mix_powers = initialize_or_scale(magnitude_mix_powers, magnitude_mix_power, batch_size) phase_mix_powers = initialize_or_scale(phase_mix_powers, phase_mix_power, batch_size) phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size) phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size) phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size) phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size) magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size) magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size) magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size) magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size) mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device) if latent_0_normal == True: latent_0_batch = latent_normalize_channels(latent_0_batch) if latent_1_normal == True: latent_1_batch = latent_normalize_channels(latent_1_batch) if latent_0_meancenter == True: latent_0_batch = latent_meancenter_channels(latent_0_batch) if latent_1_meancenter == True: latent_1_batch = latent_meancenter_channels(latent_1_batch) if latent_0_stdize == True: latent_0_batch = latent_stdize_channels(latent_0_batch) if latent_1_stdize == True: latent_1_batch = latent_stdize_channels(latent_1_batch) for i in range(batch_size): mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1], latent_1_batch[i:i+1], phase_mix_powers[i] .item(), magnitude_mix_powers[i] .item(), phase_luminositys[i] .item(), phase_cyan_reds[i] .item(), phase_lime_purples[i] .item(), phase_pattern_structures[i] .item(), magnitude_luminositys[i] .item(), magnitude_cyan_reds[i] .item(), magnitude_lime_purples[i] .item(), magnitude_pattern_structures[i].item() ) if latent_out_normal == True: mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude) if latent_out_stdize == True: mixed_phase_magnitude = latent_stdize_channels(mixed_phase_magnitude) if latent_out_meancenter == True: mixed_phase_magnitude = latent_meancenter_channels(mixed_phase_magnitude) mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude return ({"samples": mixed_phase_magnitude_batch}, ) class LatentPhaseMagnitudeMultiply: @classmethod def INPUT_TYPES(cls): return { "required": { "latent_0_batch": ("LATENT",), "phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "latent_0_normal": ("BOOLEAN", {"default": False}), "latent_out_normal": ("BOOLEAN", {"default": False}), }, "optional": { "phase_luminositys": ("SIGMAS", ), "phase_cyan_reds": ("SIGMAS", ), "phase_lime_purples": ("SIGMAS", ), "phase_pattern_structures": ("SIGMAS", ), "magnitude_luminositys": ("SIGMAS", ), "magnitude_cyan_reds": ("SIGMAS", ), "magnitude_lime_purples": ("SIGMAS", ), "magnitude_pattern_structures": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_repeat(latent, batch_size): b, c, h, w = latent.shape batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device) for i in range(batch_size): batch_latents[i] = latent return batch_latents @staticmethod def mix_latent_phase_magnitude(latent_0, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure ): dtype = latent_0.dtype # avoid big accuracy problems with fp32 FFT! latent_0 = latent_0.double() latent_0_fft = torch.fft.fft2(latent_0) latent_0_phase = torch.angle(latent_0_fft) latent_0_magnitude = torch.abs (latent_0_fft) # create new complex FFT using weighted mix of phases chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]] chan_weights_magnitude = [w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]] mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) for i in range(4): mixed_phase[:, i] = latent_0_phase[:,i] * chan_weights_phase[i] mixed_magnitude[:, i] = latent_0_magnitude[:,i] * chan_weights_magnitude[i] new_fft = mixed_magnitude * torch.exp(1j * mixed_phase) # inverse FFT to convert back to spatial domain mixed_phase_magnitude = torch.fft.ifft2(new_fft).real return mixed_phase_magnitude.to(dtype) def main(self, latent_0_batch, latent_0_normal, latent_out_normal, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure, phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None, magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None ): latent_0_batch = latent_0_batch["samples"].double() batch_size = latent_0_batch.shape[0] phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size) phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size) phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size) phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size) magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size) magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size) magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size) magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size) mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device) if latent_0_normal == True: latent_0_batch = latent_normalize_channels(latent_0_batch) for i in range(batch_size): mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1], phase_luminositys[i].item(), phase_cyan_reds[i].item(), phase_lime_purples[i].item(), phase_pattern_structures[i].item(), magnitude_luminositys[i].item(), magnitude_cyan_reds[i].item(), magnitude_lime_purples[i].item(), magnitude_pattern_structures[i].item() ) if latent_out_normal == True: mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude) mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude return ({"samples": mixed_phase_magnitude_batch}, ) class LatentPhaseMagnitudeOffset: @classmethod def INPUT_TYPES(cls): return { "required": { "latent_0_batch": ("LATENT",), "phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "latent_0_normal": ("BOOLEAN", {"default": False}), "latent_out_normal": ("BOOLEAN", {"default": False}), }, "optional": { "phase_luminositys": ("SIGMAS", ), "phase_cyan_reds": ("SIGMAS", ), "phase_lime_purples": ("SIGMAS", ), "phase_pattern_structures": ("SIGMAS", ), "magnitude_luminositys": ("SIGMAS", ), "magnitude_cyan_reds": ("SIGMAS", ), "magnitude_lime_purples": ("SIGMAS", ), "magnitude_pattern_structures": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_repeat(latent, batch_size): b, c, h, w = latent.shape batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device) for i in range(batch_size): batch_latents[i] = latent return batch_latents @staticmethod def mix_latent_phase_magnitude(latent_0, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure ): dtype = latent_0.dtype # avoid big accuracy problems with fp32 FFT! latent_0 = latent_0.double() latent_0_fft = torch.fft.fft2(latent_0) latent_0_phase = torch.angle(latent_0_fft) latent_0_magnitude = torch.abs(latent_0_fft) # create new complex FFT using a weighted mix of phases chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]] chan_weights_magnitude = [w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]] mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) for i in range(4): mixed_phase[:, i] = latent_0_phase[:,i] + chan_weights_phase[i] mixed_magnitude[:, i] = latent_0_magnitude[:,i] + chan_weights_magnitude[i] new_fft = mixed_magnitude * torch.exp(1j * mixed_phase) # inverse FFT to convert back to spatial domain mixed_phase_magnitude = torch.fft.ifft2(new_fft).real return mixed_phase_magnitude.to(dtype) def main(self, latent_0_batch, latent_0_normal, latent_out_normal, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure, phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None, magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None ): latent_0_batch = latent_0_batch["samples"].double() batch_size = latent_0_batch.shape[0] phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size) phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size) phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size) phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size) magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size) magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size) magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size) magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size) mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device) if latent_0_normal == True: latent_0_batch = latent_normalize_channels(latent_0_batch) for i in range(batch_size): mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1], phase_luminositys[i] .item(), phase_cyan_reds[i] .item(), phase_lime_purples[i] .item(), phase_pattern_structures[i] .item(), magnitude_luminositys[i] .item(), magnitude_cyan_reds[i] .item(), magnitude_lime_purples[i] .item(), magnitude_pattern_structures[i].item() ) if latent_out_normal == True: mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude) mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude return ({"samples": mixed_phase_magnitude_batch}, ) class LatentPhaseMagnitudePower: @classmethod def INPUT_TYPES(cls): return { "required": { "latent_0_batch": ("LATENT",), "phase_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "phase_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_luminosity": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_cyan_red": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_lime_purple": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "magnitude_pattern_structure": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "latent_0_normal": ("BOOLEAN", {"default": False}), "latent_out_normal": ("BOOLEAN", {"default": False}), }, "optional": { "phase_luminositys": ("SIGMAS", ), "phase_cyan_reds": ("SIGMAS", ), "phase_lime_purples": ("SIGMAS", ), "phase_pattern_structures": ("SIGMAS", ), "magnitude_luminositys": ("SIGMAS", ), "magnitude_cyan_reds": ("SIGMAS", ), "magnitude_lime_purples": ("SIGMAS", ), "magnitude_pattern_structures": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_repeat(latent, batch_size): b, c, h, w = latent.shape batch_latents = torch.zeros((batch_size, c, h, w), dtype=latent.dtype, layout=latent.layout, device=latent.device) for i in range(batch_size): batch_latents[i] = latent return batch_latents @staticmethod def mix_latent_phase_magnitude(latent_0, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure ): dtype = latent_0.dtype # avoid big accuracy problems with fp32 FFT! latent_0 = latent_0.double() latent_0_fft = torch.fft.fft2(latent_0) latent_0_phase = torch.angle(latent_0_fft) latent_0_magnitude = torch.abs(latent_0_fft) # create new complex FFT using a weighted mix of phases chan_weights_phase = [w for w in [phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure ]] chan_weights_magnitude = [w for w in [magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure]] mixed_phase = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) mixed_magnitude = torch.zeros_like(latent_0, dtype=latent_0.dtype, layout=latent_0.layout, device=latent_0.device) for i in range(4): mixed_phase[:, i] = latent_0_phase[:,i] ** chan_weights_phase[i] mixed_magnitude[:, i] = latent_0_magnitude[:,i] ** chan_weights_magnitude[i] new_fft = mixed_magnitude * torch.exp(1j * mixed_phase) # inverse FFT to convert back to spatial domain mixed_phase_magnitude = torch.fft.ifft2(new_fft).real return mixed_phase_magnitude.to(dtype) def main(self, latent_0_batch, latent_0_normal, latent_out_normal, phase_luminosity, phase_cyan_red, phase_lime_purple, phase_pattern_structure, magnitude_luminosity, magnitude_cyan_red, magnitude_lime_purple, magnitude_pattern_structure, phase_luminositys=None, phase_cyan_reds=None, phase_lime_purples=None, phase_pattern_structures=None, magnitude_luminositys=None, magnitude_cyan_reds=None, magnitude_lime_purples=None, magnitude_pattern_structures=None ): latent_0_batch = latent_0_batch["samples"].double() batch_size = latent_0_batch.shape[0] phase_luminositys = initialize_or_scale(phase_luminositys, phase_luminosity, batch_size) phase_cyan_reds = initialize_or_scale(phase_cyan_reds, phase_cyan_red, batch_size) phase_lime_purples = initialize_or_scale(phase_lime_purples, phase_lime_purple, batch_size) phase_pattern_structures = initialize_or_scale(phase_pattern_structures, phase_pattern_structure, batch_size) magnitude_luminositys = initialize_or_scale(magnitude_luminositys, magnitude_luminosity, batch_size) magnitude_cyan_reds = initialize_or_scale(magnitude_cyan_reds, magnitude_cyan_red, batch_size) magnitude_lime_purples = initialize_or_scale(magnitude_lime_purples, magnitude_lime_purple, batch_size) magnitude_pattern_structures = initialize_or_scale(magnitude_pattern_structures, magnitude_pattern_structure, batch_size) mixed_phase_magnitude_batch = torch.zeros(latent_0_batch.shape, device=latent_0_batch.device) if latent_0_normal == True: latent_0_batch = latent_normalize_channels(latent_0_batch) for i in range(batch_size): mixed_phase_magnitude = self.mix_latent_phase_magnitude(latent_0_batch[i:i+1], phase_luminositys[i] .item(), phase_cyan_reds[i] .item(), phase_lime_purples[i] .item(), phase_pattern_structures[i] .item(), magnitude_luminositys[i] .item(), magnitude_cyan_reds[i] .item(), magnitude_lime_purples[i] .item(), magnitude_pattern_structures[i].item() ) if latent_out_normal == True: mixed_phase_magnitude = latent_normalize_channels(mixed_phase_magnitude) mixed_phase_magnitude_batch[i, :, :, :] = mixed_phase_magnitude return ({"samples": mixed_phase_magnitude_batch}, ) class StableCascade_StageC_VAEEncode_Exact: def __init__(self, device="cpu"): self.device = device @classmethod def INPUT_TYPES(cls): return { "required": { "image": ("IMAGE",), "vae": ("VAE", ), "width": ("INT", {"default": 24, "min": 1, "max": 1024, "step": 1}), "height": ("INT", {"default": 24, "min": 1, "max": 1024, "step": 1}), } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("stage_c",) FUNCTION = "generate" CATEGORY = "RES4LYF/vae" def generate(self, image, vae, width, height): out_width = (width) * vae.downscale_ratio #downscale_ratio = 32 out_height = (height) * vae.downscale_ratio #movedim(-1,1) goes from 1,1024,1024,3 to 1,3,1024,1024 s = comfy.utils.common_upscale(image.movedim(-1,1), out_width, out_height, "lanczos", "center").movedim(1,-1) c_latent = vae.encode(s[:,:,:,:3]) #to slice off alpha channel? return ({ "samples": c_latent, },) class StableCascade_StageC_VAEEncode_Exact_Tiled: def __init__(self, device="cpu"): self.device = device @classmethod def INPUT_TYPES(cls): return { "required": { "image": ("IMAGE",), "vae": ("VAE", ), "tile_size": ("INT", {"default": 512, "min": 320, "max": 4096, "step": 64}), "overlap": ("INT", {"default": 16, "min": 8, "max": 128, "step": 8}), } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("stage_c",) FUNCTION = "generate" CATEGORY = "RES4LYF/vae" def generate(self, image, vae, tile_size, overlap): upscale_amount = vae.downscale_ratio # downscale_ratio = 32 image = image.movedim(-1, 1) # bhwc -> bchw encode_fn = lambda img: vae.encode(img.to(vae.device)).to("cpu") c_latent = tiled_scale_multidim(image, encode_fn, tile = (tile_size // 8, tile_size // 8), overlap = overlap, upscale_amount = upscale_amount, out_channels = 16, output_device = self.device ) return ({"samples": c_latent,},) @torch.inference_mode() def tiled_scale_multidim(samples, function, tile = (64, 64), overlap = 8, upscale_amount = 4, out_channels = 3, output_device = "cpu", pbar = None ): dims = len(tile) output_shape = [samples.shape[0], out_channels] + list(map(lambda a: round(a * upscale_amount), samples.shape[2:])) output = torch.zeros(output_shape, device=output_device) for b in range(samples.shape[0]): for it in itertools.product(*map(lambda a: range(0, a[0], a[1] - overlap), zip(samples.shape[2:], tile))): s_in = samples[b:b+1] upscaled = [] for d in range(dims): pos = max(0, min(s_in.shape[d + 2] - overlap, it[d])) l = min(tile[d], s_in.shape[d + 2] - pos) s_in = s_in.narrow(d + 2, pos, l) upscaled.append(round(pos * upscale_amount)) ps = function(s_in).to(output_device) mask = torch.ones_like(ps) feather = round(overlap * upscale_amount) for t in range(feather): for d in range(2, dims + 2): mask.narrow(d, t, 1).mul_((1.0 / feather) * (t + 1)) mask.narrow(d, mask.shape[d] - 1 - t, 1).mul_((1.0 / feather) * (t + 1)) o = output[b:b+1] for d in range(dims): o = o.narrow(d + 2, upscaled[d], mask.shape[d + 2]) o.add_(ps * mask) if pbar is not None: pbar.update(1) return output class EmptyLatentImageCustom: def __init__(self): self.device = comfy.model_management.intermediate_device() @classmethod def INPUT_TYPES(cls): return { "required": { "width": ("INT", {"default": 24, "min": 1, "max": MAX_RESOLUTION, "step": 1}), "height": ("INT", {"default": 24, "min": 1, "max": MAX_RESOLUTION, "step": 1}), "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), "channels": (['4', '16'], {"default": '4'}), "mode": (['sdxl', 'cascade_b', 'cascade_c', 'exact'], {"default": 'default'}), "compression": ("INT", {"default": 42, "min": 4, "max": 128, "step": 1}), "precision": (['fp16', 'fp32', 'fp64'], {"default": 'fp32'}), } } RETURN_TYPES = ("LATENT",) FUNCTION = "generate" CATEGORY = "RES4LYF/latents" def generate(self, width, height, batch_size, channels, mode, compression, precision ): c = int(channels) ratio = 1 match mode: case "sdxl": ratio = 8 case "cascade_b": ratio = 4 case "cascade_c": ratio = compression case "exact": ratio = 1 dtype=torch.float32 match precision: case "fp16": dtype=torch.float16 case "fp32": dtype=torch.float32 case "fp64": dtype=torch.float64 latent = torch.zeros([batch_size, c, height // ratio, width // ratio], dtype=dtype, device=self.device) return ({"samples":latent}, ) class EmptyLatentImage64: def __init__(self): self.device = comfy.model_management.intermediate_device() @classmethod def INPUT_TYPES(cls): return { "required": { "width": ("INT", {"default": 1024, "min": 16, "max": MAX_RESOLUTION, "step": 8}), "height": ("INT", {"default": 1024, "min": 16, "max": MAX_RESOLUTION, "step": 8}), "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}) } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "generate" CATEGORY = "RES4LYF/latents" def generate(self, width, height, batch_size=1): latent = torch.zeros([batch_size, 4, height // 8, width // 8], dtype=torch.float64, device=self.device) return ({"samples":latent}, ) class LatentNoiseBatch_perlin: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return {"required": { "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), "width": ("INT", {"default": 1024, "min": 8, "max": MAX_RESOLUTION, "step": 8}), "height": ("INT", {"default": 1024, "min": 8, "max": MAX_RESOLUTION, "step": 8}), "batch_size": ("INT", {"default": 1, "min": 1, "max": 256}), "detail_level": ("FLOAT", {"default": 0, "min":-1, "max": 1.0, "step": 0.1}), }, "optional": { "details": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "create_noisy_latents_perlin" CATEGORY = "RES4LYF/noise" # found at https://gist.github.com/vadimkantorov/ac1b097753f217c5c11bc2ff396e0a57 # which was ported from https://github.com/pvigier/perlin-numpy/blob/master/perlin2d.py def rand_perlin_2d(self, shape, res, fade = lambda t: 6*t**5 - 15*t**4 + 10*t**3): delta = (res[0] / shape[0], res[1] / shape[1]) d = (shape[0] // res[0], shape[1] // res[1]) grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1])), dim = -1) % 1 angles = 2*math.pi*torch.rand(res[0]+1, res[1]+1) gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim = -1) tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1) dot = lambda grad, shift: (torch.stack((grid[:shape[0],:shape[1],0] + shift[0], grid[:shape[0],:shape[1], 1] + shift[1] ), dim = -1) * grad[:shape[0], :shape[1]]).sum(dim = -1) n00 = dot(tile_grads([0, -1], [0, -1]), [0, 0]) n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0]) n01 = dot(tile_grads([0, -1],[1, None]), [0, -1]) n11 = dot(tile_grads([1, None], [1, None]), [-1,-1]) t = fade(grid[:shape[0], :shape[1]]) return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1]) def rand_perlin_2d_octaves(self, shape, res, octaves=1, persistence=0.5): noise = torch.zeros(shape) frequency = 1 amplitude = 1 for _ in range(octaves): noise += amplitude * self.rand_perlin_2d(shape, (frequency*res[0], frequency*res[1])) frequency *= 2 amplitude *= persistence noise = torch.remainder(torch.abs(noise)*1000000,11)/11 # noise = (torch.sin(torch.remainder(noise*1000000,83))+1)/2 return noise def scale_tensor(self, x): min_value = x.min() max_value = x.max() x = (x - min_value) / (max_value - min_value) return x def create_noisy_latents_perlin(self, seed, width, height, batch_size, detail_level, details=None): if details is None: details = torch.full((10000,), detail_level) else: details = detail_level * details torch.manual_seed(seed) noise = torch.zeros((batch_size, 4, height // 8, width // 8), dtype=torch.float32, device="cpu").cpu() for i in range(batch_size): for j in range(4): noise_values = self.rand_perlin_2d_octaves((height // 8, width // 8), (1,1), 1, 1) result = (1+details[i]/10)*torch.erfinv(2 * noise_values - 1) * (2 ** 0.5) result = torch.clamp(result,-5,5) noise[i, j, :, :] = result return ({"samples": noise},) class LatentNoiseBatch_gaussian_channels: @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT",), "mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "mean_luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "mean_cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "mean_lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "mean_pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "std": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "steps": ("INT", {"default": 0, "min": -10000, "max": 10000}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }, "optional": { "means": ("SIGMAS", ), "mean_luminositys": ("SIGMAS", ), "mean_cyan_reds": ("SIGMAS", ), "mean_lime_purples": ("SIGMAS", ), "mean_pattern_structures": ("SIGMAS", ), "stds": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent",) FUNCTION = "main" CATEGORY = "RES4LYF/noise" @staticmethod def gaussian_noise_channels(x, mean_luminosity = -0.1, mean_cyan_red = 0.0, mean_lime_purple=0.0, mean_pattern_structure=0.0): x = x.squeeze(0) luminosity = x[0:1] + mean_luminosity cyan_red = x[1:2] + mean_cyan_red lime_purple = x[2:3] + mean_lime_purple pattern_structure = x[3:4] + mean_pattern_structure x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0) return x def main(self, latent, steps, seed, mean, mean_luminosity, mean_cyan_red, mean_lime_purple, mean_pattern_structure, std, means=None, mean_luminositys=None, mean_cyan_reds=None, mean_lime_purples=None, mean_pattern_structures=None, stds=None): if steps == 0: steps = len(means) x = latent["samples"] b, c, h, w = x.shape noise_latents = torch.zeros([steps, 4, h, w], dtype=x.dtype, layout=x.layout, device=x.device) noise_sampler = NOISE_GENERATOR_CLASSES.get('gaussian')(x=x, seed = seed) means = initialize_or_scale(means , mean , steps) mean_luminositys = initialize_or_scale(mean_luminositys , mean_luminosity , steps) mean_cyan_reds = initialize_or_scale(mean_cyan_reds , mean_cyan_red , steps) mean_lime_purples = initialize_or_scale(mean_lime_purples , mean_lime_purple , steps) mean_pattern_structures = initialize_or_scale(mean_pattern_structures, mean_pattern_structure, steps) stds = initialize_or_scale(stds, std, steps) for i in range(steps): noise = noise_sampler(mean=means[i].item(), std=stds[i].item()) noise = self.gaussian_noise_channels(noise, mean_luminositys[i].item(), mean_cyan_reds[i].item(), mean_lime_purples[i].item(), mean_pattern_structures[i].item()) noise_latents[i] = x + noise return ({"samples": noise_latents}, ) class LatentNoiseBatch_gaussian: @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT",), "mean": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "std": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "steps": ("INT", {"default": 0, "min": -10000, "max": 10000}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }, "optional": { "means": ("SIGMAS", ), "stds": ("SIGMAS", ), "steps_": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/noise" def main(self, latent, mean, std, steps, seed, means=None, stds=None, steps_=None): if steps_ is not None: steps = len(steps_) means = initialize_or_scale(means, mean, steps) stds = initialize_or_scale(stds, std, steps) latent_samples = latent["samples"] b, c, h, w = latent_samples.shape noise_latents = torch.zeros([steps, c, h, w], dtype=latent_samples.dtype, layout=latent_samples.layout, device=latent_samples.device) noise_sampler = NOISE_GENERATOR_CLASSES.get('gaussian')(x=latent_samples, seed = seed) for i in range(steps): noise_latents[i] = noise_sampler(mean=means[i].item(), std=stds[i].item()) return ({"samples": noise_latents}, ) class LatentNoiseBatch_fractal: @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT",), "alpha": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.001}), "k_flip": ("BOOLEAN", {"default": False}), "steps": ("INT", {"default": 0, "min": -10000, "max": 10000}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }, "optional": { "alphas": ("SIGMAS", ), "ks": ("SIGMAS", ), "steps_": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/noise" def main(self, latent, alpha, k_flip, steps, seed = 42, alphas = None, ks = None, sigmas_ = None, steps_ = None ): if steps_ is not None: steps = len(steps_) alphas = initialize_or_scale(alphas, alpha, steps) k_flip = -1 if k_flip else 1 ks = initialize_or_scale(ks , k_flip, steps) latent_samples = latent["samples"] b, c, h, w = latent_samples.shape noise_latents = torch.zeros([steps, c, h, w], dtype=latent_samples.dtype, layout=latent_samples.layout, device=latent_samples.device) noise_sampler = NOISE_GENERATOR_CLASSES.get('fractal')(x=latent_samples, seed = seed) for i in range(steps): noise_latents[i] = noise_sampler(alpha=alphas[i].item(), k=ks[i].item(), scale=0.1) return ({"samples": noise_latents}, ) class LatentBatch_channels: @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT",), "mode": (["offset", "multiply", "power"],), "luminosity": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "cyan_red": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "lime_purple": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "pattern_structure": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), }, "optional": { "luminositys": ("SIGMAS", ), "cyan_reds": ("SIGMAS", ), "lime_purples": ("SIGMAS", ), "pattern_structures": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_channels_multiply(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0): luminosity = x[0:1] * luminosity cyan_red = x[1:2] * cyan_red lime_purple = x[2:3] * lime_purple pattern_structure = x[3:4] * pattern_structure x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0) return x @staticmethod def latent_channels_offset(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0): luminosity = x[0:1] + luminosity cyan_red = x[1:2] + cyan_red lime_purple = x[2:3] + lime_purple pattern_structure = x[3:4] + pattern_structure x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0) return x @staticmethod def latent_channels_power(x, luminosity = -0.1, cyan_red = 0.0, lime_purple=0.0, pattern_structure=0.0): luminosity = x[0:1] ** luminosity cyan_red = x[1:2] ** cyan_red lime_purple = x[2:3] ** lime_purple pattern_structure = x[3:4] ** pattern_structure x = torch.unsqueeze(torch.cat([luminosity, cyan_red, lime_purple, pattern_structure]), 0) return x def main(self, latent, mode, luminosity, cyan_red, lime_purple, pattern_structure, luminositys = None, cyan_reds = None, lime_purples = None, pattern_structures = None): x = latent["samples"] b, c, h, w = x.shape noise_latents = torch.zeros([b, c, h, w], dtype=x.dtype, layout=x.layout, device=x.device) luminositys = initialize_or_scale(luminositys, luminosity, b) cyan_reds = initialize_or_scale(cyan_reds, cyan_red, b) lime_purples = initialize_or_scale(lime_purples, lime_purple, b) pattern_structures = initialize_or_scale(pattern_structures, pattern_structure, b) for i in range(b): if mode == "offset": noise = self.latent_channels_offset(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item()) elif mode == "multiply": noise = self.latent_channels_multiply(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item()) elif mode == "power": noise = self.latent_channels_power(x[i], luminositys[i].item(), cyan_reds[i].item(), lime_purples[i].item(), pattern_structures[i].item()) noise_latents[i] = noise return ({"samples": noise_latents}, ) class LatentBatch_channels_16: @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT",), "mode": (["offset", "multiply", "power"],), "chan_1": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_2": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_3": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_4": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_5": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_6": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_7": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_8": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_9": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_10": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_11": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_12": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_13": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_14": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_15": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "chan_16": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), }, "optional": { "chan_1s": ("SIGMAS", ), "chan_2s": ("SIGMAS", ), "chan_3s": ("SIGMAS", ), "chan_4s": ("SIGMAS", ), "chan_5s": ("SIGMAS", ), "chan_6s": ("SIGMAS", ), "chan_7s": ("SIGMAS", ), "chan_8s": ("SIGMAS", ), "chan_9s": ("SIGMAS", ), "chan_10s": ("SIGMAS", ), "chan_11s": ("SIGMAS", ), "chan_12s": ("SIGMAS", ), "chan_13s": ("SIGMAS", ), "chan_14s": ("SIGMAS", ), "chan_15s": ("SIGMAS", ), "chan_16s": ("SIGMAS", ), } } RETURN_TYPES = ("LATENT",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" @staticmethod def latent_channels_multiply(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0): chan_1 = x[0:1] * chan_1 chan_2 = x[1:2] * chan_2 chan_3 = x[2:3] * chan_3 chan_4 = x[3:4] * chan_4 chan_5 = x[4:5] * chan_5 chan_6 = x[5:6] * chan_6 chan_7 = x[6:7] * chan_7 chan_8 = x[7:8] * chan_8 chan_9 = x[8:9] * chan_9 chan_10 = x[9:10] * chan_10 chan_11 = x[10:11] * chan_11 chan_12 = x[11:12] * chan_12 chan_13 = x[12:13] * chan_13 chan_14 = x[13:14] * chan_14 chan_15 = x[14:15] * chan_15 chan_16 = x[15:16] * chan_16 x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0) return x @staticmethod def latent_channels_offset(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0): chan_1 = x[0:1] + chan_1 chan_2 = x[1:2] + chan_2 chan_3 = x[2:3] + chan_3 chan_4 = x[3:4] + chan_4 chan_5 = x[4:5] + chan_5 chan_6 = x[5:6] + chan_6 chan_7 = x[6:7] + chan_7 chan_8 = x[7:8] + chan_8 chan_9 = x[8:9] + chan_9 chan_10 = x[9:10] + chan_10 chan_11 = x[10:11] + chan_11 chan_12 = x[11:12] + chan_12 chan_13 = x[12:13] + chan_13 chan_14 = x[13:14] + chan_14 chan_15 = x[14:15] + chan_15 chan_16 = x[15:16] + chan_16 x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0) return x @staticmethod def latent_channels_power(x, chan_1 = 0.0, chan_2 = 0.0, chan_3 = 0.0, chan_4 = 0.0, chan_5 = 0.0, chan_6 = 0.0, chan_7 = 0.0, chan_8 = 0.0, chan_9 = 0.0, chan_10 = 0.0, chan_11 = 0.0, chan_12 = 0.0, chan_13 = 0.0, chan_14 = 0.0, chan_15 = 0.0, chan_16 = 0.0): chan_1 = x[0:1] ** chan_1 chan_2 = x[1:2] ** chan_2 chan_3 = x[2:3] ** chan_3 chan_4 = x[3:4] ** chan_4 chan_5 = x[4:5] ** chan_5 chan_6 = x[5:6] ** chan_6 chan_7 = x[6:7] ** chan_7 chan_8 = x[7:8] ** chan_8 chan_9 = x[8:9] ** chan_9 chan_10 = x[9:10] ** chan_10 chan_11 = x[10:11] ** chan_11 chan_12 = x[11:12] ** chan_12 chan_13 = x[12:13] ** chan_13 chan_14 = x[13:14] ** chan_14 chan_15 = x[14:15] ** chan_15 chan_16 = x[15:16] ** chan_16 x = torch.unsqueeze(torch.cat([chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16]), 0) return x def main(self, latent, mode, chan_1, chan_2, chan_3, chan_4, chan_5, chan_6, chan_7, chan_8, chan_9, chan_10, chan_11, chan_12, chan_13, chan_14, chan_15, chan_16, chan_1s=None, chan_2s=None, chan_3s=None, chan_4s=None, chan_5s=None, chan_6s=None, chan_7s=None, chan_8s=None, chan_9s=None, chan_10s=None, chan_11s=None, chan_12s=None, chan_13s=None, chan_14s=None, chan_15s=None, chan_16s=None): x = latent["samples"] b, c, h, w = x.shape noise_latents = torch.zeros([b, c, h, w], dtype=x.dtype, layout=x.layout, device=x.device) chan_1s = initialize_or_scale(chan_1s, chan_1, b) chan_2s = initialize_or_scale(chan_2s, chan_2, b) chan_3s = initialize_or_scale(chan_3s, chan_3, b) chan_4s = initialize_or_scale(chan_4s, chan_4, b) chan_5s = initialize_or_scale(chan_5s, chan_5, b) chan_6s = initialize_or_scale(chan_6s, chan_6, b) chan_7s = initialize_or_scale(chan_7s, chan_7, b) chan_8s = initialize_or_scale(chan_8s, chan_8, b) chan_9s = initialize_or_scale(chan_9s, chan_9, b) chan_10s = initialize_or_scale(chan_10s, chan_10, b) chan_11s = initialize_or_scale(chan_11s, chan_11, b) chan_12s = initialize_or_scale(chan_12s, chan_12, b) chan_13s = initialize_or_scale(chan_13s, chan_13, b) chan_14s = initialize_or_scale(chan_14s, chan_14, b) chan_15s = initialize_or_scale(chan_15s, chan_15, b) chan_16s = initialize_or_scale(chan_16s, chan_16, b) for i in range(b): if mode == "offset": noise = self.latent_channels_offset(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item()) elif mode == "multiply": noise = self.latent_channels_multiply(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item()) elif mode == "power": noise = self.latent_channels_power(x[i], chan_1s[i].item(), chan_2s[i].item(), chan_3s[i].item(), chan_4s[i].item(), chan_5s[i].item(), chan_6s[i].item(), chan_7s[i].item(), chan_8s[i].item(), chan_9s[i].item(), chan_10s[i].item(), chan_11s[i].item(), chan_12s[i].item(), chan_13s[i].item(), chan_14s[i].item(), chan_15s[i].item(), chan_16s[i].item()) noise_latents[i] = noise return ({"samples": noise_latents}, ) class latent_normalize_channels: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent": ("LATENT", ), "mode": (["full", "channels"],), "operation": (["normalize", "center", "standardize"],), }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("passthrough",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, latent, mode, operation): x = latent["samples"] b, c, h, w = x.shape if mode == "full": if operation == "normalize": x = (x - x.mean()) / x.std() elif operation == "center": x = x - x.mean() elif operation == "standardize": x = x / x.std() elif mode == "channels": if operation == "normalize": for i in range(b): for j in range(c): x[i, j] = (x[i, j] - x[i, j].mean()) / x[i, j].std() elif operation == "center": for i in range(b): for j in range(c): x[i, j] = x[i, j] - x[i, j].mean() elif operation == "standardize": for i in range(b): for j in range(c): x[i, j] = x[i, j] / x[i, j].std() return ({"samples": x},) class latent_channelwise_match: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "model": ("MODEL",), "latent_target": ("LATENT", ), "latent_source": ("LATENT", ), }, "optional": { "mask_target": ("MASK", ), "mask_source": ("MASK", ), "extra_options": ("STRING", {"default": "", "multiline": True}), } } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("latent_matched",) FUNCTION = "main" CATEGORY = "RES4LYF/latents" def main(self, model, latent_target, mask_target, latent_source, mask_source, extra_options ): #EO = ExtraOptions(extra_options) dtype = latent_target['samples'].dtype exclude_channels = get_extra_options_list(exclude_channels, -1, extra_options) if extra_options_flag("disable_process_latent", extra_options): x_target = latent_target['samples'].clone() x_source = latent_source['samples'].clone() else: x_target = model.model.process_latent_in(latent_target['samples']).clone().to(torch.float64) x_source = model.model.process_latent_in(latent_source['samples']).clone().to(torch.float64) if mask_target is None: mask_target = torch.ones_like(x_target) else: mask_target = mask_target.unsqueeze(1) mask_target = mask_target.repeat(1, x_target.shape[1], 1, 1) mask_target = F.interpolate(mask_target, size=(x_target.shape[2], x_target.shape[3]), mode='bilinear', align_corners=False) mask_target = mask_target.to(x_target.dtype).to(x_target.device) if mask_source is None: mask_source = torch.ones_like(x_target) else: mask_source = mask_source.unsqueeze(1) mask_source = mask_source.repeat(1, x_target.shape[1], 1, 1) mask_source = F.interpolate(mask_source, size=(x_target.shape[2], x_target.shape[3]), mode='bilinear', align_corners=False) mask_source = mask_source.to(x_target.dtype).to(x_target.device) x_target_masked = x_target * ((mask_target==1)*mask_target) x_target_masked_inv = x_target - x_target_masked #x_source_masked = x_source * ((mask_source==1)*mask_source) x_matched = torch.zeros_like(x_target) for n in range(x_matched.shape[1]): if n in exclude_channels: x_matched[0][n] = x_target[0][n] continue x_target_masked_values = x_target[0][n][mask_target[0][n] == 1] x_source_masked_values = x_source[0][n][mask_source[0][n] == 1] x_target_masked_values_mean = x_target_masked_values.mean() x_target_masked_values_std = x_target_masked_values.std() x_target_masked_source_mean = x_source_masked_values.mean() x_target_masked_source_std = x_source_masked_values.std() x_target_mean = x_target.mean() x_target_std = x_target.std() x_source_mean = x_source.mean() x_source_std = x_source.std() #if re.search(r"\benable_std\b", extra_options) == None: if not extra_options_flag("enable_std", extra_options): x_target_std = x_target_masked_values_std = x_target_masked_source_std = 1 #if re.search(r"\bdisable_mean\b", extra_options): if extra_options_flag("disable_mean", extra_options): x_target_mean = x_target_masked_values_mean = x_target_masked_source_mean = 1 #if re.search(r"\bdisable_masks\b", extra_options): if extra_options_flag("disable_masks", extra_options): x_matched[0][n] = (x_target[0][n] - x_target_mean) / x_target_std x_matched[0][n] = (x_matched[0][n] * x_source_std) + x_source_mean else: x_matched[0][n] = (x_target_masked[0][n] - x_target_masked_values_mean) / x_target_masked_values_std x_matched[0][n] = (x_matched[0][n] * x_target_masked_source_std) + x_target_masked_source_mean x_matched[0][n] = x_target_masked_inv[0][n] + x_matched[0][n] * ((mask_target[0][n]==1)*mask_target[0][n]) #if re.search(r"\bdisable_process_latent\b", extra_options) == None: if not extra_options_flag("disable_process_latent", extra_options): x_matched = model.model.process_latent_out(x_matched).clone() return ({"samples": x_matched.to(dtype)}, ) ================================================ FILE: nodes_misc.py ================================================ import folder_paths import os import random class SetImageSize: @classmethod def INPUT_TYPES(cls): return {"required": { "width" : ("INT", {"default": 1024, "min": 1, "max": 10000}), "height": ("INT", {"default": 1024, "min": 1, "max": 10000}), }, "optional": { } } RETURN_TYPES = ("INT", "INT",) RETURN_NAMES = ("width","height",) FUNCTION = "main" CATEGORY = "RES4LYF/images" DESCRIPTION = "Generate a pair of integers for image sizes." def main(self, width, height): return (width, height,) class SetImageSizeWithScale: @classmethod def INPUT_TYPES(cls): return {"required": { "width" : ("INT", {"default": 1024, "min": 1, "max": 10000}), "height": ("INT", {"default": 1024, "min": 1, "max": 10000}), "scale_by": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10000, "step":0.01}), }, "optional": { } } RETURN_TYPES = ("INT", "INT", "INT", "INT",) RETURN_NAMES = ("width","height","width_scaled","height_scaled",) FUNCTION = "main" CATEGORY = "RES4LYF/images" DESCRIPTION = "Generate a pair of integers for image sizes." def main(self, width, height, scale_by): return (width, height, int(width*scale_by), int(height*scale_by)) class TextBox1: @classmethod def INPUT_TYPES(cls): return {"required": { "text1": ("STRING", {"default": "", "multiline": True}), }, "optional": { } } RETURN_TYPES = ("STRING",) RETURN_NAMES = ("text1",) FUNCTION = "main" CATEGORY = "RES4LYF/text" DESCRIPTION = "Multiline textbox." def main(self, text1): return (text1,) class TextBox2: @classmethod def INPUT_TYPES(cls): return {"required": { "text1": ("STRING", {"default": "", "multiline": True}), "text2": ("STRING", {"default": "", "multiline": True}), }, "optional": { } } RETURN_TYPES = ("STRING", "STRING",) RETURN_NAMES = ("text1", "text2",) FUNCTION = "main" CATEGORY = "RES4LYF/text" DESCRIPTION = "Multiline textbox." def main(self, text1, text2,): return (text1, text2,) class TextBox3: @classmethod def INPUT_TYPES(cls): return {"required": { "text1": ("STRING", {"default": "", "multiline": True}), "text2": ("STRING", {"default": "", "multiline": True}), "text3": ("STRING", {"default": "", "multiline": True}), }, "optional": { } } RETURN_TYPES = ("STRING", "STRING","STRING",) RETURN_NAMES = ("text1", "text2", "text3",) FUNCTION = "main" CATEGORY = "RES4LYF/text" DESCRIPTION = "Multiline textbox." def main(self, text1, text2, text3 ): return (text1, text2, text3, ) class TextLoadFile: @classmethod def INPUT_TYPES(cls): input_dir = folder_paths.get_input_directory() files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f)) and f.lower().endswith('.txt')] return { "required": { "text_file": (sorted(files), {"text_upload": True}) } } RETURN_TYPES = ("STRING",) RETURN_NAMES = ("text",) FUNCTION = "main" CATEGORY = "RES4LYF/text" def main(self, text_file): input_dir = folder_paths.get_input_directory() text_file_path = os.path.join(input_dir, text_file) if not os.path.exists(text_file_path): print(f"Error: The file `{text_file_path}` cannot be found.") return ("",) with open(text_file_path, "r", encoding="utf-8") as f: text = f.read() return (text,) class TextShuffle: @classmethod def INPUT_TYPES(cls): return { "required": { "text": ("STRING", {"forceInput": True}), "separator": ("STRING", {"default": " ", "multiline": False}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }, "optional": { } } RETURN_TYPES = ("STRING",) RETURN_NAMES = ("shuffled_text",) FUNCTION = "main" CATEGORY = "RES4LYF/text" def main(self, text, separator, seed, ): if seed is not None: random.seed(seed) parts = text.split(separator) random.shuffle(parts) shuffled_text = separator.join(parts) return (shuffled_text, ) def truncate_tokens(text, truncate_to, clip, clip_type, stop_token): if truncate_to == 0: return "" truncate_words_to = truncate_to total = truncate_to + 1 tokens = {} while total > truncate_to: words = text.split() truncated_words = words[:truncate_words_to] truncated_text = " ".join(truncated_words) try: tokens[clip_type] = clip.tokenize(truncated_text)[clip_type] except: return "" if clip_type not in tokens: return truncated_text clip_end=0 for b in range(len(tokens[clip_type])): for i in range(len(tokens[clip_type][b])): clip_end += 1 if tokens[clip_type][b][i][0] == stop_token: break if clip_type == 'l' or clip_type == 'g': clip_end -= 2 elif clip_type == 't5xxl': clip_end -= 1 total = clip_end truncate_words_to -= 1 return truncated_text class TextShuffleAndTruncate: @classmethod def INPUT_TYPES(cls): return { "required": { "text": ("STRING", {"forceInput": True}), "separator": ("STRING", {"default": " ", "multiline": False}), "truncate_words_to": ("INT", {"default": 77, "min": 1, "max": 10000}), "truncate_tokens_to": ("INT", {"default": 77, "min": 1, "max": 10000}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }, "optional": { "clip": ("CLIP", ), } } RETURN_TYPES = ("STRING","STRING","STRING","STRING","STRING",) RETURN_NAMES = ("shuffled_text", "text_words", "text_clip_l", "text_clip_g", "text_t5",) FUNCTION = "main" CATEGORY = "RES4LYF/text" def main(self, text, separator, truncate_words_to, truncate_tokens_to, seed, clip=None): if seed is not None: random.seed(seed) parts = text.split(separator) random.shuffle(parts) shuffled_text = separator.join(parts) words = shuffled_text.split() truncated_words = words[:truncate_words_to] truncated_text = " ".join(truncated_words) #t5_name = "t5xxl" if not hasattr(clip.tokenizer, "pile_t5xl") else "pile_t5xl" t5_name = "t5xxl" if hasattr(clip.tokenizer, "clip_name"): t5_name = "t5xxl" if clip.tokenizer.clip_name != "pile_t5xl" else "pile_t5xl" text_clip_l = truncate_tokens(truncated_text, truncate_tokens_to, clip, "l", 49407) text_clip_g = truncate_tokens(truncated_text, truncate_tokens_to, clip, "g", 49407) text_t5 = truncate_tokens(truncated_text, truncate_tokens_to, clip, t5_name, 1) return (shuffled_text, truncated_text, text_clip_l, text_clip_g, text_t5,) class TextTruncateTokens: @classmethod def INPUT_TYPES(cls): return { "required": { "text": ("STRING", {"forceInput": True}), "truncate_words_to": ("INT", {"default": 30, "min": 0, "max": 10000}), "truncate_clip_l_to": ("INT", {"default": 77, "min": 0, "max": 10000}), "truncate_clip_g_to": ("INT", {"default": 77, "min": 0, "max": 10000}), "truncate_t5_to": ("INT", {"default": 77, "min": 0, "max": 10000}), }, "optional": { "clip": ("CLIP", ), } } RETURN_TYPES = ("STRING","STRING","STRING","STRING",) RETURN_NAMES = ("text_words","text_clip_l","text_clip_g","text_t5",) FUNCTION = "main" CATEGORY = "RES4LYF/text" def main(self, text, truncate_words_to, truncate_clip_l_to, truncate_clip_g_to, truncate_t5_to, clip=None): words = text.split() truncated_words = words[:truncate_words_to] truncated_text = " ".join(truncated_words) #t5_name = "t5xxl" if not hasattr(clip.tokenizer, "pile_t5xl") else "pile_t5xl" t5_name = "t5xxl" if hasattr(clip.tokenizer, "clip_name"): t5_name = "t5xxl" if clip.tokenizer.clip_name != "pile_t5xl" else "pile_t5xl" if clip is not None: text_clip_l = truncate_tokens(text, truncate_clip_l_to, clip, "l", 49407) text_clip_g = truncate_tokens(text, truncate_clip_g_to, clip, "g", 49407) text_t5 = truncate_tokens(truncated_text, truncate_t5_to, clip, t5_name, 1) else: text_clip_l = None text_clip_g = None text_t5 = None return (truncated_text, text_clip_l, text_clip_g, text_t5,) class TextConcatenate: @classmethod def INPUT_TYPES(cls): return { "required": { }, "optional": { "text_1": ("STRING", {"multiline": False, "default": "", "forceInput": True}), "text_2": ("STRING", {"multiline": False, "default": "", "forceInput": True}), "separator": ("STRING", {"multiline": False, "default": ""}), }, } RETURN_TYPES = ("STRING",) RETURN_NAMES = ("text",) FUNCTION = "main" CATEGORY = "RES4LYF/text" def main(self, text_1="", text_2="", separator=""): return (text_1 + separator + text_2, ) class TextBoxConcatenate: @classmethod def INPUT_TYPES(cls): return { "required": { "text": ("STRING", {"default": "", "multiline": True}), }, "optional": { "text_external": ("STRING", {"multiline": False, "default": "", "forceInput": True}), "separator": ("STRING", {"multiline": False, "default": ""}), "mode": (['append_external_input', 'prepend_external_input',],), }, } RETURN_TYPES = ("STRING",) RETURN_NAMES = ("text",) FUNCTION = "main" CATEGORY = "RES4LYF/text" DESCRIPTION = "Multiline textbox with concatenate functionality." def main(self, text="", text_external="", separator="", mode="append_external_input"): if mode == "append_external_input": text = text + separator + text_external elif mode == "prepend_external_input": text = text_external + separator + text return (text, ) class SeedGenerator: @classmethod def INPUT_TYPES(cls): return { "required": { "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }, "optional": { } } RETURN_TYPES = ("INT", "INT",) RETURN_NAMES = ("seed", "seed+1",) FUNCTION = "main" CATEGORY = "RES4LYF/utilities" def main(self, seed,): return (seed, seed+1,) ================================================ FILE: nodes_precision.py ================================================ import torch from .helper import precision_tool class set_precision: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent_image": ("LATENT", ), "precision": (["16", "32", "64"], ), "set_default": ("BOOLEAN", {"default": False}) }, } RETURN_TYPES = ("LATENT",) RETURN_NAMES = ("passthrough",) FUNCTION = "main" CATEGORY = "RES4LYF/precision" def main(self, precision = "32", latent_image = None, set_default = False ): match precision: case "16": if set_default is True: torch.set_default_dtype(torch.float16) x = latent_image["samples"].to(torch.float16) case "32": if set_default is True: torch.set_default_dtype(torch.float32) x = latent_image["samples"].to(torch.float32) case "64": if set_default is True: torch.set_default_dtype(torch.float64) x = latent_image["samples"].to(torch.float64) return ({"samples": x}, ) class set_precision_universal: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "precision": (["bf16", "fp16", "fp32", "fp64", "passthrough"], {"default": "fp32"}), "set_default": ("BOOLEAN", {"default": False}) }, "optional": { "cond_pos": ("CONDITIONING",), "cond_neg": ("CONDITIONING",), "sigmas": ("SIGMAS", ), "latent_image": ("LATENT", ), }, } RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "SIGMAS", "LATENT",) RETURN_NAMES = ("cond_pos", "cond_neg", "sigmas", "latent_image",) FUNCTION = "main" CATEGORY = "RES4LYF/precision" def main(self, precision = "fp32", cond_pos = None, cond_neg = None, sigmas = None, latent_image = None, set_default = False ): dtype = None match precision: case "bf16": dtype = torch.bfloat16 case "fp16": dtype = torch.float16 case "fp32": dtype = torch.float32 case "fp64": dtype = torch.float64 case "passthrough": return (cond_pos, cond_neg, sigmas, latent_image, ) if cond_pos is not None: cond_pos[0][0] = cond_pos[0][0].clone().to(dtype) cond_pos[0][1]["pooled_output"] = cond_pos[0][1]["pooled_output"].clone().to(dtype) if cond_neg is not None: cond_neg[0][0] = cond_neg[0][0].clone().to(dtype) cond_neg[0][1]["pooled_output"] = cond_neg[0][1]["pooled_output"].clone().to(dtype) if sigmas is not None: sigmas = sigmas.clone().to(dtype) if latent_image is not None: x = latent_image["samples"].clone().to(dtype) latent_image = {"samples": x} if set_default is True: torch.set_default_dtype(dtype) return (cond_pos, cond_neg, sigmas, latent_image, ) class set_precision_advanced: def __init__(self): pass @classmethod def INPUT_TYPES(cls): return { "required": { "latent_image": ("LATENT", ), "global_precision": (["64", "32", "16"], ), "shark_precision": (["64", "32", "16"], ), }, } RETURN_TYPES = ("LATENT","LATENT","LATENT","LATENT","LATENT",) RETURN_NAMES = ("passthrough", "latent_cast_to_global", "latent_16", "latent_32", "latent_64", ) FUNCTION = "main" CATEGORY = "RES4LYF/precision" def main(self, global_precision = "32", shark_precision = "64", latent_image = None ): dtype_map = { "16": torch.float16, "32": torch.float32, "64": torch.float64 } precision_map = { "16": 'fp16', "32": 'fp32', "64": 'fp64' } torch.set_default_dtype(dtype_map[global_precision]) precision_tool.set_cast_type(precision_map[shark_precision]) latent_passthrough = latent_image["samples"] latent_out16 = latent_image["samples"].to(torch.float16) latent_out32 = latent_image["samples"].to(torch.float32) latent_out64 = latent_image["samples"].to(torch.float64) target_dtype = dtype_map[global_precision] if latent_image["samples"].dtype != target_dtype: latent_image["samples"] = latent_image["samples"].to(target_dtype) latent_cast_to_global = latent_image["samples"] return ({"samples": latent_passthrough}, {"samples": latent_cast_to_global}, {"samples": latent_out16}, {"samples": latent_out32}, {"samples": latent_out64} ) ================================================ FILE: requirements.txt ================================================ opencv-python matplotlib pywavelets numpy>=1.26.4 ================================================ FILE: res4lyf.py ================================================ # Code adapted from https://github.com/pythongosssss/ComfyUI-Custom-Scripts import asyncio import os import json import shutil import inspect import aiohttp import math import comfy.model_sampling import comfy.samplers from aiohttp import web from server import PromptServer from tqdm import tqdm CONFIG_FILE_NAME = "res4lyf.config.json" DEFAULT_CONFIG_FILE_NAME = "web/js/res4lyf.default.json" config = None using_RES4LYF_time_snr_shift = False original_time_snr_shift = comfy.model_sampling.time_snr_shift def time_snr_shift_RES4LYF(alpha, t): if using_RES4LYF_time_snr_shift and get_config_value("updatedTimestepScaling", False): out = math.exp(alpha) / (math.exp(alpha) + (1 / t - 1) ** 1.0) else: out = original_time_snr_shift(alpha, t) return out display_sampler_category = False def get_display_sampler_category(): global display_sampler_category return display_sampler_category @PromptServer.instance.routes.post("/reslyf/settings") async def update_settings(request): try: json_data = await request.json() setting = json_data.get("setting") value = json_data.get("value") if setting: save_config_value(setting, value) if setting == "updatedTimestepScaling": global using_RES4LYF_time_snr_shift using_RES4LYF_time_snr_shift = value if ( using_RES4LYF_time_snr_shift is True ): RESplain("Using RES4LYF time SNR shift") else: RESplain("Disabled RES4LYF time SNR shift") elif setting == "displayCategory": global display_sampler_category display_sampler_category = value if ( display_sampler_category is True ): RESplain("Displaying sampler category", debug=True) else: RESplain("Not displaying sampler category", debug=True) return web.Response(status=200) except Exception as e: return web.Response(status=500, text=str(e)) @PromptServer.instance.routes.post("/reslyf/log") async def log_message(request): try: json_data = await request.json() log_text = json_data.get("log") if log_text: RESplain(log_text, debug=True) return web.Response(status=200) else: return web.Response(status=400, text="No log text provided") except Exception as e: return web.Response(status=500, text=str(e)) original_calculate_sigmas = comfy.samplers.calculate_sigmas def calculate_sigmas_RES4LYF(model_sampling, scheduler_name, steps): if scheduler_name == "beta57": sigmas = comfy.samplers.beta_scheduler(model_sampling, steps, alpha=0.5, beta=0.7) else: return original_calculate_sigmas(model_sampling, scheduler_name, steps) return sigmas def init(check_imports=None): RESplain("Init") # initialize display category global display_sampler_category display_sampler_category = get_config_value("displayCategory", False) if ( display_sampler_category is True ): RESplain("Displaying sampler category", debug=True) # Initialize using_RES4LYF_time_snr_shift from config (deprecated, disabled by default) global using_RES4LYF_time_snr_shift using_RES4LYF_time_snr_shift = get_config_value("updatedTimestepScaling", False) if using_RES4LYF_time_snr_shift: comfy.model_sampling.time_snr_shift = time_snr_shift_RES4LYF RESplain("Using RES4LYF time SNR shift but this is deprecated and will be disabled at some completely unpredictable point in the future") # monkey patch comfy.samplers.calculate_sigmas with custom implementation comfy.samplers.calculate_sigmas = calculate_sigmas_RES4LYF if "beta57" not in comfy.samplers.SCHEDULER_NAMES: comfy.samplers.SCHEDULER_NAMES = comfy.samplers.SCHEDULER_NAMES + ["beta57"] if "beta57" not in comfy.samplers.KSampler.SCHEDULERS: comfy.samplers.KSampler.SCHEDULERS = comfy.samplers.KSampler.SCHEDULERS + ["beta57"] return True def save_config_value(key, value): config = get_extension_config() keys = key.split(".") d = config for k in keys[:-1]: if k not in d: d[k] = {} d = d[k] d[keys[-1]] = value config_path = get_ext_dir(CONFIG_FILE_NAME) with open(config_path, "w") as f: json.dump(config, f, indent=4) def get_config_value(key, default=None, throw=False): config = get_extension_config() keys = key.split(".") d = config for k in keys[:-1]: if k not in d: if throw: raise KeyError("Configuration key missing: " + key) else: return default d = d[k] return d.get(keys[-1], default) def is_debug_logging_enabled(): logging_enabled = get_config_value("enableDebugLogs", False) return logging_enabled def RESplain(*args, debug='info'): if isinstance(debug, bool): type = 'debug' if debug else 'info' else: type = debug if type == 'debug' and not is_debug_logging_enabled(): return if not args: return name = get_extension_config()["name"] message = " ".join(map(str, args)) if type != 'debug' and type != 'warning': print(f"({name}) {message}") else: print(f"({name} {type}) {message}") def get_ext_dir(subpath=None, mkdir=False): dir = os.path.dirname(__file__) if subpath is not None: dir = os.path.join(dir, subpath) dir = os.path.abspath(dir) if mkdir and not os.path.exists(dir): os.makedirs(dir) return dir def merge_default_config(config, default_config): for key, value in default_config.items(): if key not in config: config[key] = value elif isinstance(value, dict): config[key] = merge_default_config(config.get(key, {}), value) return config def get_extension_config(reload=False): global config if not reload and config is not None: return config config_path = get_ext_dir(CONFIG_FILE_NAME) default_config_path = get_ext_dir(DEFAULT_CONFIG_FILE_NAME) if os.path.exists(default_config_path): with open(default_config_path, "r") as f: default_config = json.loads(f.read()) else: default_config = {} if not os.path.exists(config_path): config = default_config with open(config_path, "w") as f: json.dump(config, f, indent=4) else: with open(config_path, "r") as f: config = json.loads(f.read()) config = merge_default_config(config, default_config) with open(config_path, "w") as f: json.dump(config, f, indent=4) return config def get_comfy_dir(subpath=None, mkdir=False): dir = os.path.dirname(inspect.getfile(PromptServer)) if subpath is not None: dir = os.path.join(dir, subpath) dir = os.path.abspath(dir) if mkdir and not os.path.exists(dir): os.makedirs(dir) return dir def get_web_ext_dir(): config = get_extension_config() name = config["name"] dir = get_comfy_dir("web/extensions/res4lyf") if not os.path.exists(dir): os.makedirs(dir) dir = os.path.join(dir, name) return dir def link_js(src, dst): src = os.path.abspath(src) dst = os.path.abspath(dst) if os.name == "nt": try: import _winapi _winapi.CreateJunction(src, dst) return True except: pass try: os.symlink(src, dst) return True except: import logging logging.exception('') return False def is_junction(path): if os.name != "nt": return False try: return bool(os.readlink(path)) except OSError: return False def install_js(): src_dir = get_ext_dir("web/js") if not os.path.exists(src_dir): RESplain("No JS") return should_install = should_install_js() if should_install: RESplain("it looks like you're running an old version of ComfyUI that requires manual setup of web files, it is recommended you update your installation.", "warning", True) dst_dir = get_web_ext_dir() linked = os.path.islink(dst_dir) or is_junction(dst_dir) if linked or os.path.exists(dst_dir): if linked: if should_install: RESplain("JS already linked") else: os.unlink(dst_dir) RESplain("JS unlinked, PromptServer will serve extension") elif not should_install: shutil.rmtree(dst_dir) RESplain("JS deleted, PromptServer will serve extension") return if not should_install: RESplain("JS skipped, PromptServer will serve extension") return if link_js(src_dir, dst_dir): RESplain("JS linked") return RESplain("Copying JS files") shutil.copytree(src_dir, dst_dir, dirs_exist_ok=True) def should_install_js(): return not hasattr(PromptServer.instance, "supports") or "custom_nodes_from_web" not in PromptServer.instance.supports def get_async_loop(): loop = None try: loop = asyncio.get_event_loop() except: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) return loop def get_http_session(): loop = get_async_loop() return aiohttp.ClientSession(loop=loop) async def download(url, stream, update_callback=None, session=None): close_session = False if session is None: close_session = True session = get_http_session() try: async with session.get(url) as response: size = int(response.headers.get('content-length', 0)) or None with tqdm( unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1], total=size, ) as progressbar: perc = 0 async for chunk in response.content.iter_chunked(2048): stream.write(chunk) progressbar.update(len(chunk)) if update_callback is not None and progressbar.total is not None and progressbar.total != 0: last = perc perc = round(progressbar.n / progressbar.total, 2) if perc != last: last = perc await update_callback(perc) finally: if close_session and session is not None: await session.close() async def download_to_file(url, destination, update_callback=None, is_ext_subpath=True, session=None): if is_ext_subpath: destination = get_ext_dir(destination) with open(destination, mode='wb') as f: download(url, f, update_callback, session) def wait_for_async(async_fn, loop=None): res = [] async def run_async(): r = await async_fn() res.append(r) if loop is None: try: loop = asyncio.get_event_loop() except: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.run_until_complete(run_async()) return res[0] def update_node_status(client_id, node, text, progress=None): if client_id is None: client_id = PromptServer.instance.client_id if client_id is None: return PromptServer.instance.send_sync("res4lyf/update_status", { "node": node, "progress": progress, "text": text }, client_id) async def update_node_status_async(client_id, node, text, progress=None): if client_id is None: client_id = PromptServer.instance.client_id if client_id is None: return await PromptServer.instance.send("res4lyf/update_status", { "node": node, "progress": progress, "text": text }, client_id) def get_config_value(key, default=None, throw=False): split = key.split(".") obj = get_extension_config() for s in split: if s in obj: obj = obj[s] else: if throw: raise KeyError("Configuration key missing: " + key) else: return default return obj def is_inside_dir(root_dir, check_path): root_dir = os.path.abspath(root_dir) if not os.path.isabs(check_path): check_path = os.path.abspath(os.path.join(root_dir, check_path)) return os.path.commonpath([check_path, root_dir]) == root_dir def get_child_dir(root_dir, child_path, throw_if_outside=True): child_path = os.path.abspath(os.path.join(root_dir, child_path)) if is_inside_dir(root_dir, child_path): return child_path if throw_if_outside: raise NotADirectoryError( "Saving outside the target folder is not allowed.") return None ================================================ FILE: rk_method_beta.py ================================================ import torch from torch import Tensor from typing import Optional, Callable, Tuple, List, Dict, Any, Union import comfy.model_patcher import comfy.supported_models import itertools from .phi_functions import Phi from .rk_coefficients_beta import get_implicit_sampler_name_list, get_rk_methods_beta from ..helper import ExtraOptions from ..latents import get_orthogonal, get_collinear, get_cosine_similarity, tile_latent, untile_latent from ..res4lyf import RESplain MAX_STEPS = 10000 def get_data_from_step (x:Tensor, x_next:Tensor, sigma:Tensor, sigma_next:Tensor) -> Tensor: h = sigma_next - sigma return (sigma_next * x - sigma * x_next) / h def get_epsilon_from_step(x:Tensor, x_next:Tensor, sigma:Tensor, sigma_next:Tensor) -> Tensor: h = sigma_next - sigma return (x - x_next) / h class RK_Method_Beta: def __init__(self, model, rk_type : str, noise_anchor : float, noise_boost_normalize : bool = True, model_device : str = 'cuda', work_device : str = 'cpu', dtype : torch.dtype = torch.float64, extra_options : str = "" ): self.work_device = work_device self.model_device = model_device self.dtype : torch.dtype = dtype self.model = model if hasattr(model, "model"): model_sampling = model.model.model_sampling elif hasattr(model, "inner_model"): model_sampling = model.inner_model.inner_model.model_sampling self.sigma_min : Tensor = model_sampling.sigma_min.to(dtype=dtype, device=work_device) self.sigma_max : Tensor = model_sampling.sigma_max.to(dtype=dtype, device=work_device) self.rk_type : str = rk_type self.IMPLICIT : str = rk_type in get_implicit_sampler_name_list(nameOnly=True) self.EXPONENTIAL : bool = RK_Method_Beta.is_exponential(rk_type) self.SYNC_SUBSTEP_MEAN_CW : bool = noise_boost_normalize self.A : Optional[Tensor] = None self.B : Optional[Tensor] = None self.U : Optional[Tensor] = None self.V : Optional[Tensor] = None self.rows : int = 0 self.cols : int = 0 self.denoised : Optional[Tensor] = None self.uncond : Optional[Tensor] = None self.y0 : Optional[Tensor] = None self.y0_inv : Optional[Tensor] = None self.multistep_stages : int = 0 self.row_offset : Optional[int] = None self.cfg_cw : float = 1.0 self.extra_args : Optional[Dict[str, Any]] = None self.extra_options : str = extra_options self.EO : ExtraOptions = ExtraOptions(extra_options) self.reorder_tableau_indices : list[int] = self.EO("reorder_tableau_indices", [-1]) self.LINEAR_ANCHOR_X_0 : float = noise_anchor self.tile_sizes : Optional[List[Tuple[int,int]]] = None self.tile_cnt : int = 0 self.latent_compression_ratio : int = 8 @staticmethod def is_exponential(rk_type:str) -> bool: if rk_type.startswith(( "res", "dpmpp", "ddim", "pec", "etdrk", "lawson", "abnorsett", )): return True else: return False @staticmethod def create(model, rk_type : str, noise_anchor : float = 1.0, noise_boost_normalize : bool = True, model_device : str = 'cuda', work_device : str = 'cpu', dtype : torch.dtype = torch.float64, extra_options : str = "" ) -> "Union[RK_Method_Exponential, RK_Method_Linear]": if RK_Method_Beta.is_exponential(rk_type): return RK_Method_Exponential(model, rk_type, noise_anchor, noise_boost_normalize, model_device, work_device, dtype, extra_options) else: return RK_Method_Linear (model, rk_type, noise_anchor, noise_boost_normalize, model_device, work_device, dtype, extra_options) def __call__(self): raise NotImplementedError("This method got clownsharked!") def model_epsilon(self, x:Tensor, sigma:Tensor, **extra_args) -> Tuple[Tensor, Tensor]: s_in = x.new_ones([x.shape[0]]) denoised = self.model(x, sigma * s_in, **extra_args) denoised = self.calc_cfg_channelwise(denoised) eps = (x - denoised) / (sigma * s_in).view(x.shape[0], 1, 1, 1) #return x0 ###################################THIS WORKS ONLY WITH THE MODEL SAMPLING PATCH return eps, denoised def model_denoised(self, x:Tensor, sigma:Tensor, **extra_args) -> Tensor: s_in = x.new_ones([x.shape[0]]) control_tiles = None y0_style_pos = self.extra_args['model_options']['transformer_options'].get("y0_style_pos") y0_style_neg = self.extra_args['model_options']['transformer_options'].get("y0_style_neg") y0_style_pos_tile, sy0_style_neg_tiles = None, None if self.EO("tile_model_calls"): tile_h = self.EO("tile_h", 128) tile_w = self.EO("tile_w", 128) denoised_tiles = [] tiles, orig_shape, grid, strides = tile_latent(x, tile_size=(tile_h,tile_w)) for i in range(tiles.shape[0]): tile = tiles[i].unsqueeze(0) denoised_tile = self.model(tile, sigma * s_in, **extra_args) denoised_tiles.append(denoised_tile) denoised_tiles = torch.cat(denoised_tiles, dim=0) denoised = untile_latent(denoised_tiles, orig_shape, grid, strides) elif self.tile_sizes is not None: tile_h_full = self.tile_sizes[self.tile_cnt % len(self.tile_sizes)][0] tile_w_full = self.tile_sizes[self.tile_cnt % len(self.tile_sizes)][1] if tile_h_full == -1: tile_h = x.shape[-2] tile_h_full = tile_h * self.latent_compression_ratio else: tile_h = tile_h_full // self.latent_compression_ratio if tile_w_full == -1: tile_w = x.shape[-1] tile_w_full = tile_w * self.latent_compression_ratio else: tile_w = tile_w_full // self.latent_compression_ratio #tile_h = tile_h_full // self.latent_compression_ratio #tile_w = tile_w_full // self.latent_compression_ratio self.tile_cnt += 1 #if len(self.tile_sizes) == 1 and self.tile_cnt % 2 == 1: # tile_h, tile_w = tile_w, tile_h # tile_h_full, tile_w_full = tile_w_full, tile_h_full if (self.tile_cnt // len(self.tile_sizes)) % 2 == 1 and self.EO("tiles_autorotate"): tile_h, tile_w = tile_w, tile_h tile_h_full, tile_w_full = tile_w_full, tile_h_full xt_negative = self.model.inner_model.conds.get('xt_negative', self.model.inner_model.conds.get('negative')) negative_control = xt_negative[0].get('control') if negative_control is not None and hasattr(negative_control, 'cond_hint_original'): negative_cond_hint_init = negative_control.cond_hint.clone() if negative_control.cond_hint is not None else None xt_positive = self.model.inner_model.conds.get('xt_positive', self.model.inner_model.conds.get('positive')) positive_control = xt_positive[0].get('control') if positive_control is not None and hasattr(positive_control, 'cond_hint_original'): positive_cond_hint_init = positive_control.cond_hint.clone() if positive_control.cond_hint is not None else None if positive_control.cond_hint_original.shape[-1] != x.shape[-2] * self.latent_compression_ratio or positive_control.cond_hint_original.shape[-2] != x.shape[-1] * self.latent_compression_ratio: positive_control_pretile = comfy.utils.bislerp(positive_control.cond_hint_original.clone().to(torch.float16).to('cuda'), x.shape[-1] * self.latent_compression_ratio, x.shape[-2] * self.latent_compression_ratio) positive_control.cond_hint_original = positive_control_pretile.to(positive_control.cond_hint_original) positive_control_pretile = positive_control.cond_hint_original.clone().to(torch.float16).to('cuda') control_tiles, control_orig_shape, control_grid, control_strides = tile_latent(positive_control_pretile, tile_size=(tile_h_full,tile_w_full)) control_tiles = control_tiles denoised_tiles = [] tiles, orig_shape, grid, strides = tile_latent(x, tile_size=(tile_h,tile_w)) if y0_style_pos is not None: y0_style_pos_tiles, _, _, _ = tile_latent(y0_style_pos, tile_size=(tile_h,tile_w)) if y0_style_neg is not None: y0_style_neg_tiles, _, _, _ = tile_latent(y0_style_neg, tile_size=(tile_h,tile_w)) for i in range(tiles.shape[0]): tile = tiles[i].unsqueeze(0) if control_tiles is not None: positive_control.cond_hint = control_tiles[i].unsqueeze(0).to(positive_control.cond_hint) if negative_control is not None: negative_control.cond_hint = control_tiles[i].unsqueeze(0).to(positive_control.cond_hint) if y0_style_pos is not None: self.extra_args['model_options']['transformer_options']['y0_style_pos'] = y0_style_pos_tiles[i].unsqueeze(0) if y0_style_neg is not None: self.extra_args['model_options']['transformer_options']['y0_style_neg'] = y0_style_neg_tiles[i].unsqueeze(0) denoised_tile = self.model(tile, sigma * s_in, **extra_args) denoised_tiles.append(denoised_tile) denoised_tiles = torch.cat(denoised_tiles, dim=0) denoised = untile_latent(denoised_tiles, orig_shape, grid, strides) else: denoised = self.model(x, sigma * s_in, **extra_args) if control_tiles is not None: positive_control.cond_hint = positive_cond_hint_init if negative_control is not None: negative_control.cond_hint = negative_cond_hint_init if y0_style_pos is not None: self.extra_args['model_options']['transformer_options']['y0_style_pos'] = y0_style_pos if y0_style_neg is not None: self.extra_args['model_options']['transformer_options']['y0_style_neg'] = y0_style_neg denoised = self.calc_cfg_channelwise(denoised) return denoised def update_transformer_options(self, transformer_options : Optional[dict] = None, ): self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options) return def set_coeff(self, rk_type : str, h : Tensor, c1 : float = 0.0, c2 : float = 0.5, c3 : float = 1.0, step : int = 0, sigmas : Optional[Tensor] = None, sigma_down : Optional[Tensor] = None, ) -> None: self.rk_type = rk_type self.IMPLICIT = rk_type in get_implicit_sampler_name_list(nameOnly=True) self.EXPONENTIAL = RK_Method_Beta.is_exponential(rk_type) sigma = sigmas[step] sigma_next = sigmas[step+1] h_prev = [] a, b, u, v, ci, multistep_stages, hybrid_stages, FSAL = get_rk_methods_beta(rk_type, h, c1, c2, c3, h_prev, step, sigmas, sigma, sigma_next, sigma_down, self.extra_options, ) self.multistep_stages = multistep_stages self.hybrid_stages = hybrid_stages self.A = torch.tensor(a, dtype=h.dtype, device=h.device) self.B = torch.tensor(b, dtype=h.dtype, device=h.device) self.C = torch.tensor(ci, dtype=h.dtype, device=h.device) self.U = torch.tensor(u, dtype=h.dtype, device=h.device) if u is not None else None self.V = torch.tensor(v, dtype=h.dtype, device=h.device) if v is not None else None self.rows = self.A.shape[0] self.cols = self.A.shape[1] self.row_offset = 1 if not self.IMPLICIT and self.A[0].sum() == 0 else 0 if self.IMPLICIT and self.reorder_tableau_indices[0] != -1: self.reorder_tableau(self.reorder_tableau_indices) def reorder_tableau(self, indices:list[int]) -> None: #if indices[0]: self.A = self.A [indices] self.B[0] = self.B[0][indices] self.C = self.C [indices] self.C = torch.cat((self.C, self.C[-1:])) return def update_substep(self, x_0 : Tensor, x_ : Tensor, eps_ : Tensor, eps_prev_ : Tensor, row : int, row_offset : int, h_new : Tensor, h_new_orig : Tensor, lying_eps_row_factor : float = 1.0, ) -> Tensor: if row < self.rows - row_offset and self.multistep_stages == 0: row_tmp_offset = row + row_offset else: row_tmp_offset = row + 1 zr_base = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_) if self.SYNC_SUBSTEP_MEAN_CW and lying_eps_row_factor != 1.0: zr_orig = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_) x_orig_row = x_0 + h_new * zr_orig #eps_row = eps_ [row].clone() #eps_prev_row = eps_prev_[row].clone() eps_ [row] *= lying_eps_row_factor eps_prev_[row] *= lying_eps_row_factor zr = self.zum(row+row_offset+self.multistep_stages, eps_, eps_prev_) x_[row_tmp_offset] = x_0 + h_new * zr if self.SYNC_SUBSTEP_MEAN_CW and lying_eps_row_factor != 1.0: x_[row_tmp_offset] = x_[row_tmp_offset] - x_[row_tmp_offset].mean(dim=(-2,-1), keepdim=True) + x_orig_row.mean(dim=(-2,-1), keepdim=True) #eps_ [row] = eps_row #eps_prev_[row] = eps_prev_row if (self.SYNC_SUBSTEP_MEAN_CW and h_new != h_new_orig) or self.EO("sync_mean_noise"): if not self.EO("disable_sync_mean_noise"): x_row_down = x_0 + h_new_orig * zr x_[row_tmp_offset] = x_[row_tmp_offset] - x_[row_tmp_offset].mean(dim=(-2,-1), keepdim=True) + x_row_down.mean(dim=(-2,-1), keepdim=True) return x_ def a_k_einsum(self, row:int, k :Tensor) -> Tensor: return torch.einsum('i, i... -> ...', self.A[row], k[:self.cols]) def b_k_einsum(self, row:int, k :Tensor) -> Tensor: return torch.einsum('i, i... -> ...', self.B[row], k[:self.cols]) def u_k_einsum(self, row:int, k_prev:Tensor) -> Tensor: return torch.einsum('i, i... -> ...', self.U[row], k_prev[:self.cols]) if (self.U is not None and k_prev is not None) else 0 def v_k_einsum(self, row:int, k_prev:Tensor) -> Tensor: return torch.einsum('i, i... -> ...', self.V[row], k_prev[:self.cols]) if (self.V is not None and k_prev is not None) else 0 def zum(self, row:int, k:Tensor, k_prev:Tensor=None,) -> Tensor: if row < self.rows: return self.a_k_einsum(row, k) + self.u_k_einsum(row, k_prev) else: row = row - self.rows return self.b_k_einsum(row, k) + self.v_k_einsum(row, k_prev) def zum_tableau(self, k:Tensor, k_prev:Tensor=None,) -> Tensor: a_k_sum = torch.einsum('ij, j... -> i...', self.A, k[:self.cols]) u_k_sum = torch.einsum('ij, j... -> i...', self.U, k_prev[:self.cols]) if (self.U is not None and k_prev is not None) else 0 return a_k_sum + u_k_sum def init_cfg_channelwise(self, x:Tensor, cfg_cw:float=1.0, **extra_args) -> Dict[str, Any]: self.uncond = [torch.full_like(x, 0.0)] self.cfg_cw = cfg_cw if cfg_cw != 1.0: def post_cfg_function(args): self.uncond[0] = args["uncond_denoised"] return args["denoised"] model_options = extra_args.get("model_options", {}).copy() extra_args["model_options"] = comfy.model_patcher.set_model_options_post_cfg_function(model_options, post_cfg_function, disable_cfg1_optimization=True) return extra_args def calc_cfg_channelwise(self, denoised:Tensor) -> Tensor: if self.cfg_cw != 1.0: avg = 0 for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])): avg += torch.norm(denoised[b][c] - self.uncond[0][b][c]) avg /= denoised.shape[1] for b, c in itertools.product(range(denoised.shape[0]), range(denoised.shape[1])): ratio = torch.nan_to_num(torch.norm(denoised[b][c] - self.uncond[0][b][c]) / avg, 0) denoised_new = self.uncond[0] + ratio * self.cfg_cw * (denoised - self.uncond[0]) return denoised_new else: return denoised @staticmethod def calculate_res_2m_step( x_0 : Tensor, denoised_ : Tensor, sigma_down : Tensor, sigmas : Tensor, step : int, ) -> Tuple[Tensor, Tensor]: if denoised_[2].sum() == 0: return None, None sigma = sigmas[step] sigma_prev = sigmas[step-1] h_prev = -torch.log(sigma/sigma_prev) h = -torch.log(sigma_down/sigma) c1 = 0 c2 = (-h_prev / h).item() ci = [c1,c2] φ = Phi(h, ci, analytic_solution=True) b2 = φ(2)/c2 b1 = φ(1) - b2 eps_2 = denoised_[1] - x_0 eps_1 = denoised_[0] - x_0 h_a_k_sum = h * (b1 * eps_1 + b2 * eps_2) x = torch.exp(-h) * x_0 + h_a_k_sum denoised = x_0 + (sigma / (sigma - sigma_down)) * h_a_k_sum return x, denoised @staticmethod def calculate_res_3m_step( x_0 : Tensor, denoised_ : Tensor, sigma_down : Tensor, sigmas : Tensor, step : int, ) -> Tuple[Tensor, Tensor]: if denoised_[3].sum() == 0: return None, None sigma = sigmas[step] sigma_prev = sigmas[step-1] sigma_prev2 = sigmas[step-2] h = -torch.log(sigma_down/sigma) h_prev = -torch.log(sigma/sigma_prev) h_prev2 = -torch.log(sigma/sigma_prev2) c1 = 0 c2 = (-h_prev / h).item() c3 = (-h_prev2 / h).item() ci = [c1,c2,c3] φ = Phi(h, ci, analytic_solution=True) gamma = (3*(c3**3) - 2*c3) / (c2*(2 - 3*c2)) b3 = (1 / (gamma * c2 + c3)) * φ(2, -h) b2 = gamma * b3 b1 = φ(1, -h) - b2 - b3 eps_3 = denoised_[2] - x_0 eps_2 = denoised_[1] - x_0 eps_1 = denoised_[0] - x_0 h_a_k_sum = h * (b1 * eps_1 + b2 * eps_2 + b3 * eps_3) x = torch.exp(-h) * x_0 + h_a_k_sum denoised = x_0 + (sigma / (sigma - sigma_down)) * h_a_k_sum return x, denoised def swap_rk_type_at_step_or_threshold(self, x_0 : Tensor, data_prev_ : Tensor, NS, sigmas : Tensor, step : Tensor, rk_swap_step : int, rk_swap_threshold : float, rk_swap_type : str, rk_swap_print : bool, ) -> str: if rk_swap_type == "": if self.EXPONENTIAL: rk_swap_type = "res_3m" else: rk_swap_type = "deis_3m" if step > rk_swap_step and self.rk_type != rk_swap_type: RESplain("Switching rk_type to:", rk_swap_type) self.rk_type = rk_swap_type if RK_Method_Beta.is_exponential(rk_swap_type): self.__class__ = RK_Method_Exponential else: self.__class__ = RK_Method_Linear if rk_swap_type in get_implicit_sampler_name_list(nameOnly=True): self.IMPLICIT = True self.row_offset = 0 NS.row_offset = 0 else: self.IMPLICIT = False self.row_offset = 1 NS.row_offset = 1 NS.h_fn = self.h_fn NS.t_fn = self.t_fn NS.sigma_fn = self.sigma_fn if step > 2 and sigmas[step+1] > 0 and self.rk_type != rk_swap_type and rk_swap_threshold > 0: x_res_2m, denoised_res_2m = self.calculate_res_2m_step(x_0, data_prev_, NS.sigma_down, sigmas, step) x_res_3m, denoised_res_3m = self.calculate_res_3m_step(x_0, data_prev_, NS.sigma_down, sigmas, step) if denoised_res_2m is not None: if rk_swap_print: RESplain("res_3m - res_2m:", torch.norm(denoised_res_3m - denoised_res_2m).item()) if rk_swap_threshold > torch.norm(denoised_res_2m - denoised_res_3m): RESplain("Switching rk_type to:", rk_swap_type, "at step:", step) self.rk_type = rk_swap_type if RK_Method_Beta.is_exponential(rk_swap_type): self.__class__ = RK_Method_Exponential else: self.__class__ = RK_Method_Linear if rk_swap_type in get_implicit_sampler_name_list(nameOnly=True): self.IMPLICIT = True self.row_offset = 0 NS.row_offset = 0 else: self.IMPLICIT = False self.row_offset = 1 NS.row_offset = 1 NS.h_fn = self.h_fn NS.t_fn = self.t_fn NS.sigma_fn = self.sigma_fn return self.rk_type def bong_iter(self, x_0 : Tensor, x_ : Tensor, eps_ : Tensor, eps_prev_ : Tensor, data_ : Tensor, sigma : Tensor, s_ : Tensor, row : int, row_offset: int, h : Tensor, step : int, ) -> Tuple[Tensor, Tensor, Tensor]: if x_0.ndim == 4: norm_dim = (-2,-1) elif x_0.ndim == 5: norm_dim = (-4,-2,-1) if self.EO("bong_start_step", 0) > step or step > self.EO("bong_stop_step", 10000): return x_0, x_, eps_ bong_iter_max_row = self.rows - row_offset if self.EO("bong_iter_max_row_full"): bong_iter_max_row = self.rows if self.EO("bong_iter_lock_x_0_ch_means"): x_0_ch_means = x_0.mean(dim=norm_dim, keepdim=True) if self.EO("bong_iter_lock_x_row_ch_means"): x_row_means = [] for rr in range(row+row_offset): x_row_mean = x_[rr].mean(dim=norm_dim, keepdim=True) x_row_means.append(x_row_mean) if row < bong_iter_max_row and self.multistep_stages == 0: bong_strength = self.EO("bong_strength", 1.0) if bong_strength != 1.0: x_0_tmp = x_0.clone() x_tmp_ = x_.clone() eps_tmp_ = eps_.clone() for i in range(100): x_0 = x_[row+row_offset] - h * self.zum(row+row_offset, eps_, eps_prev_) if self.EO("bong_iter_lock_x_0_ch_means"): x_0 = x_0 - x_0.mean(dim=norm_dim, keepdim=True) + x_0_ch_means for rr in range(row+row_offset): x_[rr] = x_0 + h * self.zum(rr, eps_, eps_prev_) if self.EO("bong_iter_lock_x_row_ch_means"): for rr in range(row+row_offset): x_[rr] = x_[rr] - x_[rr].mean(dim=norm_dim, keepdim=True) + x_row_means[rr] for rr in range(row+row_offset): if self.EO("zonkytar"): #eps_[rr] = self.get_unsample_epsilon(x_[rr], x_0, data_[rr], sigma, s_[rr]) eps_[rr] = self.get_epsilon(x_[rr], x_0, data_[rr], sigma, s_[rr]) else: eps_[rr] = self.get_epsilon(x_0, x_[rr], data_[rr], sigma, s_[rr]) if bong_strength != 1.0: x_0 = x_0_tmp + bong_strength * (x_0 - x_0_tmp) x_ = x_tmp_ + bong_strength * (x_ - x_tmp_) eps_ = eps_tmp_ + bong_strength * (eps_ - eps_tmp_) return x_0, x_, eps_ def newton_iter(self, x_0 : Tensor, x_ : Tensor, eps_ : Tensor, eps_prev_ : Tensor, data_ : Tensor, s_ : Tensor, row : int, h : Tensor, sigmas : Tensor, step : int, newton_name: str, ) -> Tuple[Tensor, Tensor]: newton_iter_name = "newton_iter_" + newton_name default_anchor_x_all = False if newton_name == "lying": default_anchor_x_all = True newton_iter = self.EO(newton_iter_name, 100) newton_iter_skip_last_steps = self.EO(newton_iter_name + "_skip_last_steps", 0) newton_iter_mixing_rate = self.EO(newton_iter_name + "_mixing_rate", 1.0) newton_iter_anchor = self.EO(newton_iter_name + "_anchor", 0) newton_iter_anchor_x_all = self.EO(newton_iter_name + "_anchor_x_all", default_anchor_x_all) newton_iter_type = self.EO(newton_iter_name + "_type", "from_epsilon") newton_iter_sequence = self.EO(newton_iter_name + "_sequence", "double") row_b_offset = 0 if self.EO(newton_iter_name + "_include_row_b"): row_b_offset = 1 if step >= len(sigmas)-1-newton_iter_skip_last_steps or sigmas[step+1] == 0 or not self.IMPLICIT: return x_, eps_ sigma = sigmas[step] start, stop = 0, self.rows+row_b_offset if newton_name == "pre": start = row elif newton_name == "post": start = row + 1 if newton_iter_anchor >= 0: eps_anchor = eps_[newton_iter_anchor].clone() if newton_iter_anchor_x_all: x_orig_ = x_.clone() for n_iter in range(newton_iter): for r in range(start, stop): if newton_iter_anchor >= 0: eps_[newton_iter_anchor] = eps_anchor.clone() if newton_iter_anchor_x_all: x_ = x_orig_.clone() x_tmp, eps_tmp = x_[r].clone(), eps_[r].clone() seq_start, seq_stop = r, r+1 if newton_iter_sequence == "double": seq_start, seq_stop = start, stop for r_ in range(seq_start, seq_stop): x_[r_] = x_0 + h * self.zum(r_, eps_, eps_prev_) for r_ in range(seq_start, seq_stop): if newton_iter_type == "from_data": data_[r_] = get_data_from_step(x_0, x_[r_], sigma, s_[r_]) eps_ [r_] = self.get_epsilon(x_0, x_[r_], data_[r_], sigma, s_[r_]) elif newton_iter_type == "from_step": eps_ [r_] = get_epsilon_from_step(x_0, x_[r_], sigma, s_[r_]) elif newton_iter_type == "from_alt": eps_ [r_] = x_0/sigma - x_[r_]/s_[r_] elif newton_iter_type == "from_epsilon": eps_ [r_] = self.get_epsilon(x_0, x_[r_], data_[r_], sigma, s_[r_]) if self.EO(newton_iter_name + "_opt"): opt_timing, opt_type, opt_subtype = self.EO(newton_iter_name+"_opt", [str]) opt_start, opt_stop = 0, self.rows+row_b_offset if opt_timing == "early": opt_stop = row + 1 elif opt_timing == "late": opt_start = row + 1 for r2 in range(opt_start, opt_stop): if r_ != r2: if opt_subtype == "a": eps_a = eps_[r2] eps_b = eps_[r_] elif opt_subtype == "b": eps_a = eps_[r_] eps_b = eps_[r2] if opt_type == "ortho": eps_ [r_] = get_orthogonal(eps_a, eps_b) elif opt_type == "collin": eps_ [r_] = get_collinear (eps_a, eps_b) elif opt_type == "proj": eps_ [r_] = get_collinear (eps_a, eps_b) + get_orthogonal(eps_b, eps_a) x_ [r_] = x_tmp + newton_iter_mixing_rate * (x_ [r_] - x_tmp) eps_[r_] = eps_tmp + newton_iter_mixing_rate * (eps_[r_] - eps_tmp) if newton_iter_sequence == "double": break return x_, eps_ class RK_Method_Exponential(RK_Method_Beta): def __init__(self, model, rk_type : str, noise_anchor : float, noise_boost_normalize : bool, model_device : str = 'cuda', work_device : str = 'cpu', dtype : torch.dtype = torch.float64, extra_options : str = "", ): super().__init__(model, rk_type, noise_anchor, noise_boost_normalize, model_device = model_device, work_device = work_device, dtype = dtype, extra_options = extra_options, ) @staticmethod def alpha_fn(neg_h:Tensor) -> Tensor: return torch.exp(neg_h) @staticmethod def sigma_fn(t:Tensor) -> Tensor: return t.neg().exp() @staticmethod def t_fn(sigma:Tensor) -> Tensor: return sigma.log().neg() @staticmethod def h_fn(sigma_down:Tensor, sigma:Tensor) -> Tensor: return -torch.log(sigma_down/sigma) def __call__(self, x : Tensor, sub_sigma : Tensor, x_0 : Optional[Tensor] = None, sigma : Optional[Tensor] = None, transformer_options : Optional[dict] = None, ) -> Tuple[Tensor, Tensor]: x_0 = x if x_0 is None else x_0 sigma = sub_sigma if sigma is None else sigma if transformer_options is not None: self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options) denoised = self.model_denoised(x.to(self.model_device), sub_sigma.to(self.model_device), **self.extra_args).to(sigma.device) eps_anchored = (x_0 - denoised) / sigma eps_unmoored = (x - denoised) / sub_sigma eps = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored) denoised = x_0 - sigma * eps epsilon = denoised - x_0 return epsilon, denoised def get_epsilon(self, x_0 : Tensor, x : Tensor, denoised : Tensor, sigma : Tensor, sub_sigma : Tensor, ) -> Tensor: eps_anchored = (x_0 - denoised) / sigma eps_unmoored = (x - denoised) / sub_sigma eps = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored) denoised = x_0 - sigma * eps return denoised - x_0 def get_epsilon_anchored(self, x_0:Tensor, denoised:Tensor, sigma:Tensor) -> Tensor: return denoised - x_0 def get_guide_epsilon(self, x_0 : Tensor, x : Tensor, y : Tensor, sigma : Tensor, sigma_cur : Tensor, sigma_down : Optional[Tensor] = None, epsilon_scale : Optional[Tensor] = None, ) -> Tensor: sigma_cur = epsilon_scale if epsilon_scale is not None else sigma_cur if sigma_down > sigma: eps_unmoored = (sigma_cur/(self.sigma_max - sigma_cur)) * (x - y) else: eps_unmoored = y - x if self.EO("manually_anchor_unsampler"): if sigma_down > sigma: eps_anchored = (sigma /(self.sigma_max - sigma)) * (x_0 - y) else: eps_anchored = y - x_0 eps_guide = eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchored - eps_unmoored) else: eps_guide = eps_unmoored return eps_guide class RK_Method_Linear(RK_Method_Beta): def __init__(self, model, rk_type : str, noise_anchor : float, noise_boost_normalize : bool, model_device : str = 'cuda', work_device : str = 'cpu', dtype : torch.dtype = torch.float64, extra_options : str = "", ): super().__init__(model, rk_type, noise_anchor, noise_boost_normalize, model_device = model_device, work_device = work_device, dtype = dtype, extra_options = extra_options, ) @staticmethod def alpha_fn(neg_h:Tensor) -> Tensor: return torch.ones_like(neg_h) @staticmethod def sigma_fn(t:Tensor) -> Tensor: return t @staticmethod def t_fn(sigma:Tensor) -> Tensor: return sigma @staticmethod def h_fn(sigma_down:Tensor, sigma:Tensor) -> Tensor: return sigma_down - sigma def __call__(self, x : Tensor, sub_sigma : Tensor, x_0 : Optional[Tensor] = None, sigma : Optional[Tensor] = None, transformer_options : Optional[dict] = None, ) -> Tuple[Tensor, Tensor]: x_0 = x if x_0 is None else x_0 sigma = sub_sigma if sigma is None else sigma if transformer_options is not None: self.extra_args.setdefault("model_options", {}).setdefault("transformer_options", {}).update(transformer_options) denoised = self.model_denoised(x.to(self.model_device), sub_sigma.to(self.model_device), **self.extra_args).to(sigma.device) epsilon_anchor = (x_0 - denoised) / sigma epsilon_unmoored = (x - denoised) / sub_sigma epsilon = epsilon_unmoored + self.LINEAR_ANCHOR_X_0 * (epsilon_anchor - epsilon_unmoored) return epsilon, denoised def get_epsilon(self, x_0 : Tensor, x : Tensor, denoised : Tensor, sigma : Tensor, sub_sigma : Tensor, ) -> Tensor: eps_anchor = (x_0 - denoised) / sigma eps_unmoored = (x - denoised) / sub_sigma return eps_unmoored + self.LINEAR_ANCHOR_X_0 * (eps_anchor - eps_unmoored) def get_epsilon_anchored(self, x_0:Tensor, denoised:Tensor, sigma:Tensor) -> Tensor: return (x_0 - denoised) / sigma def get_guide_epsilon(self, x_0 : Tensor, x : Tensor, y : Tensor, sigma : Tensor, sigma_cur : Tensor, sigma_down : Optional[Tensor] = None, epsilon_scale : Optional[Tensor] = None, ) -> Tensor: if sigma_down > sigma: sigma_ratio = self.sigma_max - sigma_cur.clone() else: sigma_ratio = sigma_cur.clone() sigma_ratio = epsilon_scale if epsilon_scale is not None else sigma_ratio if sigma_down is None: return (x - y) / sigma_ratio else: if sigma_down > sigma: return (y - x) / sigma_ratio else: return (x - y) / sigma_ratio ================================================ FILE: samplers_extensions.py ================================================ import torch from torch import Tensor import torch.nn.functional as F from dataclasses import dataclass, asdict from typing import Optional, Callable, Tuple, Dict, Any, Union import copy from nodes import MAX_RESOLUTION from ..helper import OptionsManager, FrameWeightsManager, initialize_or_scale, get_res4lyf_scheduler_list, parse_range_string, parse_tile_sizes from .rk_coefficients_beta import RK_SAMPLER_NAMES_BETA_FOLDERS, get_default_sampler_name, get_sampler_name_list, process_sampler_name from .noise_classes import NOISE_GENERATOR_NAMES_SIMPLE from .rk_noise_sampler_beta import NOISE_MODE_NAMES from .constants import IMPLICIT_TYPE_NAMES, GUIDE_MODE_NAMES_BETA_SIMPLE, MAX_STEPS, FRAME_WEIGHTS_CONFIG_NAMES, FRAME_WEIGHTS_DYNAMICS_NAMES, FRAME_WEIGHTS_SCHEDULE_NAMES class ClownSamplerSelector_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}), }, "optional": { } } RETURN_TYPES = (RK_SAMPLER_NAMES_BETA_FOLDERS,) RETURN_NAMES = ("sampler_name",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, sampler_name = "res_2m", ): sampler_name, implicit_sampler_name = process_sampler_name(sampler_name) sampler_name = sampler_name if implicit_sampler_name == "use_explicit" else implicit_sampler_name return (sampler_name,) class ClownOptions_SDE_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "noise_type_sde": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_type_sde_substep": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "noise_mode_sde": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "noise_mode_sde_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How noise scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "eta_substep": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Calculated noise amount to be added, then removed, after each step."}), "seed": ("INT", {"default": -1, "min": -1, "max": 0xffffffffffffffff}), }, "optional": { "etas": ("SIGMAS", ), "etas_substep": ("SIGMAS", ), "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, noise_type_sde = "gaussian", noise_type_sde_substep = "gaussian", noise_mode_sde = "hard", noise_mode_sde_substep = "hard", eta = 0.5, eta_substep = 0.5, seed : int = -1, etas : Optional[Tensor] = None, etas_substep : Optional[Tensor] = None, options = None, ): options = options if options is not None else {} if noise_mode_sde == "none": noise_mode_sde = "hard" eta = 0.0 if noise_mode_sde_substep == "none": noise_mode_sde_substep = "hard" eta_substep = 0.0 if noise_type_sde == "none": noise_type_sde = "gaussian" eta = 0.0 if noise_type_sde_substep == "none": noise_type_sde_substep = "gaussian" eta_substep = 0.0 options['noise_type_sde'] = noise_type_sde options['noise_type_sde_substep'] = noise_type_sde_substep options['noise_mode_sde'] = noise_mode_sde options['noise_mode_sde_substep'] = noise_mode_sde_substep options['eta'] = eta options['eta_substep'] = eta_substep options['noise_seed_sde'] = seed options['etas'] = etas options['etas_substep'] = etas_substep return (options,) class ClownOptions_StepSize_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "overshoot_mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How step size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "overshoot_mode_substep": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "How substep size overshoot scales with the sigma schedule. Hard is the most aggressive, the others start strong and drop rapidly."}), "overshoot": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising step, then rescale to match the original. Has a softening effect."}), "overshoot_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Boost the size of each denoising substep, then rescale to match the original. Has a softening effect."}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, overshoot_mode = "hard", overshoot_mode_substep = "hard", overshoot = 0.0, overshoot_substep = 0.0, options = None, ): options = options if options is not None else {} options['overshoot_mode'] = overshoot_mode options['overshoot_mode_substep'] = overshoot_mode_substep options['overshoot'] = overshoot options['overshoot_substep'] = overshoot_substep return (options, ) @dataclass class DetailBoostOptions: noise_scaling_weight : float = 0.0 noise_boost_step : float = 0.0 noise_boost_substep : float = 0.0 noise_anchor : float = 1.0 s_noise : float = 1.0 s_noise_substep : float = 1.0 d_noise : float = 1.0 DETAIL_BOOST_METHODS = [ 'sampler', 'sampler_normal', 'sampler_substep', 'sampler_substep_normal', 'model', 'model_alpha', ] class ClownOptions_DetailBoost_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}), "method": (DETAIL_BOOST_METHODS, {"default": "model", "tooltip": "Determines whether the sampler or the model underestimates the noise level."}), #"noise_scaling_mode": (['linear'] + NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "Changes the steps where the effect is greatest. Most affect early steps, sinusoidal affects middle steps."}), "mode": (NOISE_MODE_NAMES, {"default": 'hard', "tooltip": "Changes the steps where the effect is greatest. Most affect early steps, sinusoidal affects middle steps."}), "eta": ("FLOAT", {"default": 0.5, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "The strength of the effect of the noise_scaling_mode. Linear ignores this parameter."}), "start_step": ("INT", {"default": 3, "min": 0, "max": MAX_STEPS}), "end_step": ("INT", {"default": 10, "min": -1, "max": MAX_STEPS}), #"noise_scaling_cycles": ("INT", {"default": 1, "min": 1, "max": MAX_STEPS}), #"noise_boost_step": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}), #"noise_boost_substep": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set to positive values to create a sharper, grittier, more detailed image. Set to negative values to soften and deepen the colors."}), #"sampler_scaling_normalize":("BOOLEAN", {"default": False, "tooltip": "Limit saturation and luminosity drift."}), }, "optional": { "weights": ("SIGMAS", ), "etas": ("SIGMAS", ), "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, weight : float = 0.0, method : str = "sampler", mode : str = "linear", eta : float = 0.5, start_step : int = 0, end_step : int = -1, noise_scaling_cycles : int = 1, noise_boost_step : float = 0.0, noise_boost_substep : float = 0.0, sampler_scaling_normalize : bool = False, weights : Optional[Tensor] = None, etas : Optional[Tensor] = None, options = None ): noise_scaling_weight = weight noise_scaling_type = method noise_scaling_mode = mode noise_scaling_eta = eta noise_scaling_start_step = start_step noise_scaling_end_step = end_step noise_scaling_weights = weights noise_scaling_etas = etas options = options if options is not None else {} default_dtype = torch.float64 default_device = torch.device('cuda') if noise_scaling_type.endswith("_normal"): sampler_scaling_normalize = True noise_scaling_type = noise_scaling_type[:-7] if noise_scaling_end_step == -1: noise_scaling_end_step = MAX_STEPS if noise_scaling_weights == None: noise_scaling_weights = initialize_or_scale(None, noise_scaling_weight, MAX_STEPS).to(default_dtype).to(default_device) if noise_scaling_etas == None: noise_scaling_etas = initialize_or_scale(None, noise_scaling_eta, MAX_STEPS).to(default_dtype).to(default_device) noise_scaling_prepend = torch.zeros((noise_scaling_start_step,), dtype=default_dtype, device=default_device) noise_scaling_weights = torch.cat((noise_scaling_prepend, noise_scaling_weights), dim=0) noise_scaling_etas = torch.cat((noise_scaling_prepend, noise_scaling_etas), dim=0) if noise_scaling_weights.shape[-1] > noise_scaling_end_step: noise_scaling_weights = noise_scaling_weights[:noise_scaling_end_step] if noise_scaling_etas.shape[-1] > noise_scaling_end_step: noise_scaling_etas = noise_scaling_etas[:noise_scaling_end_step] noise_scaling_weights = F.pad(noise_scaling_weights, (0, MAX_STEPS), value=0.0) noise_scaling_etas = F.pad(noise_scaling_etas, (0, MAX_STEPS), value=0.0) options['noise_scaling_weight'] = noise_scaling_weight options['noise_scaling_type'] = noise_scaling_type options['noise_scaling_mode'] = noise_scaling_mode options['noise_scaling_eta'] = noise_scaling_eta options['noise_scaling_cycles'] = noise_scaling_cycles options['noise_scaling_weights'] = noise_scaling_weights options['noise_scaling_etas'] = noise_scaling_etas options['noise_boost_step'] = noise_boost_step options['noise_boost_substep'] = noise_boost_substep options['noise_boost_normalize'] = sampler_scaling_normalize """options['DetailBoostOptions'] = DetailBoostOptions( noise_scaling_weight = noise_scaling_weight, noise_scaling_type = noise_scaling_type, noise_scaling_mode = noise_scaling_mode, noise_scaling_eta = noise_scaling_eta, noise_boost_step = noise_boost_step, noise_boost_substep = noise_boost_substep, noise_boost_normalize = noise_boost_normalize, noise_anchor = noise_anchor, s_noise = s_noise, s_noise_substep = s_noise_substep, d_noise = d_noise d_noise_start_step = d_noise_start_step )""" return (options,) class ClownOptions_SigmaScaling_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "s_noise": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}), "s_noise_substep": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Adds extra SDE noise. Values around 1.03-1.07 can lead to a moderate boost in detail and paint textures."}), "noise_anchor_sde": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Typically set to between 1.0 and 0.0. Lower values cerate a grittier, more detailed image."}), "lying": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Downscales the sigma schedule. Values around 0.98-0.95 can lead to a large boost in detail and paint textures."}), "lying_inv": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Upscales the sigma schedule. Will soften the image and deepen colors. Use after d_noise to counteract desaturation."}), "lying_start_step": ("INT", {"default": 0, "min": 0, "max": MAX_STEPS}), "lying_inv_start_step": ("INT", {"default": 1, "min": 0, "max": MAX_STEPS}), }, "optional": { "s_noises": ("SIGMAS", ), "s_noises_substep": ("SIGMAS", ), "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, noise_anchor_sde : float = 1.0, s_noise : float = 1.0, s_noise_substep : float = 1.0, lying : float = 1.0, lying_start_step : int = 0, lying_inv : float = 1.0, lying_inv_start_step : int = 1, s_noises : Optional[Tensor] = None, s_noises_substep : Optional[Tensor] = None, options = None ): options = options if options is not None else {} default_dtype = torch.float64 default_device = torch.device('cuda') options['noise_anchor'] = noise_anchor_sde options['s_noise'] = s_noise options['s_noise_substep'] = s_noise_substep options['d_noise'] = lying options['d_noise_start_step'] = lying_start_step options['d_noise_inv'] = lying_inv options['d_noise_inv_start_step'] = lying_inv_start_step options['s_noises'] = s_noises options['s_noises_substep'] = s_noises_substep return (options,) class ClownOptions_Momentum_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "momentum": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, "tooltip": "Accelerate convergence with positive values when sampling, negative values when unsampling."}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, momentum = 0.0, options = None ): options = options if options is not None else {} options['momentum'] = momentum return (options,) class ClownOptions_ImplicitSteps_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "implicit_type": (IMPLICIT_TYPE_NAMES, {"default": "bongmath"}), "implicit_type_substeps": (IMPLICIT_TYPE_NAMES, {"default": "bongmath"}), "implicit_steps": ("INT", {"default": 0, "min": 0, "max": 10000}), "implicit_substeps": ("INT", {"default": 0, "min": 0, "max": 10000}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, implicit_type = "bongmath", implicit_type_substeps = "bongmath", implicit_steps = 0, implicit_substeps = 0, options = None ): options = options if options is not None else {} options['implicit_type'] = implicit_type options['implicit_type_substeps'] = implicit_type_substeps options['implicit_steps'] = implicit_steps options['implicit_substeps'] = implicit_substeps return (options,) class ClownOptions_Cycles_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "cycles" : ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10000, "step":0.5, "round": 0.5}), "eta_decay_scale" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01, "tooltip": "Multiplies etas by this number after every cycle. May help drive convergence." }), "unsample_eta" : ("FLOAT", {"default": 0.5, "min": -10000, "max": 10000, "step":0.01}), "unsampler_override" : (get_sampler_name_list(), {"default": "none"}), "unsample_cfg" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, cycles = 0, unsample_eta = 0.5, eta_decay_scale = 1.0, unsample_cfg = 1.0, unsampler_override = "none", options = None ): options = options if options is not None else {} options['rebounds'] = int(cycles * 2) options['unsample_eta'] = unsample_eta options['unsampler_name'] = unsampler_override options['eta_decay_scale'] = eta_decay_scale options['unsample_cfg'] = unsample_cfg return (options,) class SharkOptions_StartStep_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "start_at_step": ("INT", {"default": 0, "min": -1, "max": 10000, "step":1,}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, start_at_step = 0, options = None ): options = options if options is not None else {} options['start_at_step'] = start_at_step return (options,) class ClownOptions_Tile_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "tile_width" : ("INT", {"default": 1024, "min": -1, "max": 10000, "step":1,}), "tile_height": ("INT", {"default": 1024, "min": -1, "max": 10000, "step":1,}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, tile_height = 1024, tile_width = 1024, options = None ): options = options if options is not None else {} tile_sizes = options.get('tile_sizes', []) tile_sizes.append((tile_height, tile_width)) options['tile_sizes'] = tile_sizes return (options,) class ClownOptions_Tile_Advanced_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "tile_sizes": ("STRING", {"default": "1024,1024", "multiline": True}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, tile_sizes = "1024,1024", options = None ): options = options if options is not None else {} tiles_height_width = parse_tile_sizes(tile_sizes) options['tile_sizes'] = [(tile[-1], tile[-2]) for tile in ptile] # swap height and width to be consistent... width, height return (options,) class ClownOptions_ExtraOptions_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "extra_options": ("STRING", {"default": "", "multiline": True}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, extra_options = "", options = None ): options = options if options is not None else {} options['extra_options'] = extra_options return (options, ) class ClownOptions_Automation_Beta: @classmethod def INPUT_TYPES(cls): return {"required": {}, "optional": { "etas": ("SIGMAS", ), "etas_substep": ("SIGMAS", ), "s_noises": ("SIGMAS", ), "s_noises_substep": ("SIGMAS", ), "epsilon_scales": ("SIGMAS", ), "frame_weights": ("SIGMAS", ), "options": ("OPTIONS",), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, etas = None, etas_substep = None, s_noises = None, s_noises_substep = None, epsilon_scales = None, frame_weights = None, options = None ): options = options if options is not None else {} frame_weights_mgr = (frame_weights, frame_weights) automation = { "etas" : etas, "etas_substep" : etas_substep, "s_noises" : s_noises, "s_noises_substep" : s_noises_substep, "epsilon_scales" : epsilon_scales, "frame_weights_mgr" : frame_weights_mgr, } options["automation"] = automation return (options, ) class SharkOptions_GuideCond_Beta: @classmethod def INPUT_TYPES(cls): return {"required": {}, "optional": { "positive" : ("CONDITIONING", ), "negative" : ("CONDITIONING", ), "cfg" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "options" : ("OPTIONS",), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, positive = None, negative = None, cfg = 1.0, options = None, ): options = options if options is not None else {} flow_cond = { "yt_positive" : positive, "yt_negative" : negative, "yt_cfg" : cfg, } options["flow_cond"] = flow_cond return (options, ) class SharkOptions_GuideConds_Beta: @classmethod def INPUT_TYPES(cls): return {"required": {}, "optional": { "positive_masked" : ("CONDITIONING", ), "positive_unmasked" : ("CONDITIONING", ), "negative_masked" : ("CONDITIONING", ), "negative_unmasked" : ("CONDITIONING", ), "cfg_masked" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "cfg_unmasked" : ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "options" : ("OPTIONS",), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, positive_masked = None, negative_masked = None, cfg_masked = 1.0, positive_unmasked = None, negative_unmasked = None, cfg_unmasked = 1.0, options = None, ): options = options if options is not None else {} flow_cond = { "yt_positive" : positive_masked, "yt_negative" : negative_masked, "yt_cfg" : cfg_masked, "yt_inv_positive" : positive_unmasked, "yt_inv_negative" : negative_unmasked, "yt_inv_cfg" : cfg_unmasked, } options["flow_cond"] = flow_cond return (options, ) class SharkOptions_Beta: @classmethod def INPUT_TYPES(cls): return { "required": { "noise_type_init": (NOISE_GENERATOR_NAMES_SIMPLE, {"default": "gaussian"}), "s_noise_init": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step":0.01, "round": False, }), "denoise_alt": ("FLOAT", {"default": 1.0, "min": -10000, "max": 10000, "step":0.01}), "channelwise_cfg": ("BOOLEAN", {"default": False}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, noise_type_init = "gaussian", s_noise_init = 1.0, denoise_alt = 1.0, channelwise_cfg = False, options = None ): options = options if options is not None else {} options['noise_type_init'] = noise_type_init options['noise_init_stdev'] = s_noise_init options['denoise_alt'] = denoise_alt options['channelwise_cfg'] = channelwise_cfg return (options,) class SharkOptions_UltraCascade_Latent_Beta: @classmethod def INPUT_TYPES(cls): return { "required": { "width": ("INT", {"default": 60, "min": 1, "max": MAX_RESOLUTION, "step": 1}), "height": ("INT", {"default": 36, "min": 1, "max": MAX_RESOLUTION, "step": 1}), }, "optional": { "options": ("OPTIONS",), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, width : int = 60, height : int = 36, options = None, ): options = options if options is not None else {} options['ultracascade_latent_width'] = width options['ultracascade_latent_height'] = height return (options,) class ClownOptions_SwapSampler_Beta: @classmethod def INPUT_TYPES(cls): return { "required": { "sampler_name": (get_sampler_name_list(), {"default": get_default_sampler_name()}), "swap_below_err": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Swap samplers if the error per step falls below this threshold."}), "swap_at_step": ("INT", {"default": 30, "min": 1, "max": 10000}), "log_err_to_console": ("BOOLEAN", {"default": False}), }, "optional": { "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, sampler_name = "res_3m", swap_below_err = 0.0, swap_at_step = 30, log_err_to_console = False, options = None, ): sampler_name, implicit_sampler_name = process_sampler_name(sampler_name) sampler_name = sampler_name if implicit_sampler_name == "use_explicit" else implicit_sampler_name options = options if options is not None else {} options['rk_swap_type'] = sampler_name options['rk_swap_threshold'] = swap_below_err options['rk_swap_step'] = swap_at_step options['rk_swap_print'] = log_err_to_console return (options,) class ClownOptions_SDE_Mask_Beta: @classmethod def INPUT_TYPES(cls): return { "required": { "max": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Clamp the max value for the mask."}), "min": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Clamp the min value for the mask."}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "mask": ("MASK", ), "options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, max = 1.0, min = 0.0, invert_mask = False, mask = None, options = None, ): options = copy.deepcopy(options) if options is not None else {} if invert_mask: mask = 1-mask mask = ((mask - mask.min()) * (max - min)) / (mask.max() - mask.min()) + min options['sde_mask'] = mask return (options,) class ClownGuide_Mean_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "cutoff": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide": ("LATENT", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight_scheduler = "constant", start_step = 0, end_step = 30, cutoff = 1.0, guide = None, weight = 0.0, channelwise_mode = False, projection_mode = False, weights = None, mask = None, invert_mask = False, guides = None, ): default_dtype = torch.float64 mask = 1-mask if mask is not None else None if end_step == -1: end_step = MAX_STEPS if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} if weight_scheduler == "constant": # and weights == None: weights = initialize_or_scale(None, weight, end_step).to(default_dtype) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) guides = copy.deepcopy(guides) if guides is not None else {} guides['weight_mean'] = weight guides['weights_mean'] = weights guides['guide_mean'] = guide guides['mask_mean'] = mask guides['weight_scheduler_mean'] = weight_scheduler guides['start_step_mean'] = start_step guides['end_step_mean'] = end_step guides['cutoff_mean'] = cutoff return (guides, ) class ClownGuide_Style_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "apply_to": (["positive", "negative"], {"default": "positive", "tooltip": "When using CFG, decides whether to apply the guide to the positive or negative conditioning."}), "method": (["AdaIN", "WCT"], {"default": "WCT"}), "weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}), "synweight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the relative strength of the guide on the opposite conditioning to what was selected: i.e., negative if positive in apply_to. Recommended to avoid CFG burn."}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant", "tooltip": "Selecting any scheduler except constant will cause the strength to gradually decay to zero. Try beta57 vs. linear quadratic."},), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": -1, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide": ("LATENT", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" DESCRIPTION = "Transfer some visual aspects of style from a guide (reference) image. If nothing about style is specified in the prompt, it may just transfer the lighting and color scheme." + \ "If using CFG results in burn, or a very dark/bright image in the preview followed by a bad output, try duplicating and chaining this node, so that the guide may be applied to both positive and negative conditioning." + \ "Currently supported models: SD1.5, SDXL, Stable Cascade, SD3.5, AuraFlow, Flux, HiDream, WAN, and LTXV." def main(self, apply_to = "all", method = "WCT", weight = 1.0, synweight = 1.0, weight_scheduler = "constant", start_step = 0, end_step = 15, invert_mask = False, guide = None, mask = None, weights = None, guides = None, ): default_dtype = torch.float64 mask = 1-mask if mask is not None else None if end_step == -1: end_step = MAX_STEPS if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} if weight_scheduler == "constant": # and weights == None: weights = initialize_or_scale(None, weight, end_step).to(default_dtype) prepend = torch.zeros(start_step).to(weights) weights = torch.cat([prepend, weights]) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) guides = copy.deepcopy(guides) if guides is not None else {} guides['style_method'] = method if apply_to in {"positive", "all"}: guides['weight_style_pos'] = weight guides['weights_style_pos'] = weights guides['synweight_style_pos'] = synweight guides['guide_style_pos'] = guide guides['mask_style_pos'] = mask guides['weight_scheduler_style_pos'] = weight_scheduler guides['start_step_style_pos'] = start_step guides['end_step_style_pos'] = end_step if apply_to in {"negative", "all"}: guides['weight_style_neg'] = weight guides['weights_style_neg'] = weights guides['synweight_style_neg'] = synweight guides['guide_style_neg'] = guide guides['mask_style_neg'] = mask guides['weight_scheduler_style_neg'] = weight_scheduler guides['start_step_style_neg'] = start_step guides['end_step_style_neg'] = end_step return (guides, ) class ClownGuide_AdaIN_MMDiT_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "double_blocks" : ("STRING", {"default": "", "multiline": True}), "double_weights" : ("STRING", {"default": "", "multiline": True}), "single_blocks" : ("STRING", {"default": "20", "multiline": True}), "single_weights" : ("STRING", {"default": "0.5", "multiline": True}), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide": ("LATENT", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight = 1.0, weight_scheduler = "constant", double_weights = "0.1", single_weights = "0.0", double_blocks = "all", single_blocks = "all", start_step = 0, end_step = 15, invert_mask = False, guide = None, mask = None, weights = None, guides = None, ): default_dtype = torch.float64 mask = 1-mask if mask is not None else None double_weights = parse_range_string(double_weights) single_weights = parse_range_string(single_weights) if len(double_weights) == 0: double_weights.append(0.0) if len(single_weights) == 0: single_weights.append(0.0) if len(double_weights) == 1: double_weights = double_weights * 100 if len(single_weights) == 1: single_weights = single_weights * 100 if type(double_weights[0]) == int: double_weights = [float(val) for val in double_weights] if type(single_weights[0]) == int: single_weights = [float(val) for val in single_weights] if double_blocks == "all": double_blocks = [val for val in range(100)] if len(double_weights) == 1: double_weights = [double_weights[0]] * 100 else: double_blocks = parse_range_string(double_blocks) weights_expanded = [0.0] * 100 for b, w in zip(double_blocks, double_weights): weights_expanded[b] = w double_weights = weights_expanded if single_blocks == "all": single_blocks = [val for val in range(100)] if len(single_weights) == 1: single_weights = [single_weights[0]] * 100 else: single_blocks = parse_range_string(single_blocks) weights_expanded = [0.0] * 100 for b, w in zip(single_blocks, single_weights): weights_expanded[b] = w single_weights = weights_expanded if end_step == -1: end_step = MAX_STEPS if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} if weight_scheduler == "constant": # and weights == None: weights = initialize_or_scale(None, weight, end_step).to(default_dtype) prepend = torch.zeros(start_step).to(weights) weights = torch.cat([prepend, weights]) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) guides = copy.deepcopy(guides) if guides is not None else {} guides['weight_adain'] = weight guides['weights_adain'] = weights guides['blocks_adain_mmdit'] = { "double_weights": double_weights, "single_weights": single_weights, "double_blocks" : double_blocks, "single_blocks" : single_blocks, } guides['guide_adain'] = guide guides['mask_adain'] = mask guides['weight_scheduler_adain'] = weight_scheduler guides['start_step_adain'] = start_step guides['end_step_adain'] = end_step return (guides, ) class ClownGuide_AttnInj_MMDiT_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "weight": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide by multiplying all other weights by this value."}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "double_blocks" : ("STRING", {"default": "0,1,3", "multiline": True}), "double_weights" : ("STRING", {"default": "1.0", "multiline": True}), "single_blocks" : ("STRING", {"default": "20", "multiline": True}), "single_weights" : ("STRING", {"default": "0.5", "multiline": True}), "img_q": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "img_k": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "img_v": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_q": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_k": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_v": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "img_q_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "img_k_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "img_v_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_q_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_k_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "txt_v_norm": ("FLOAT", {"default": 0.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set relative injection strength."}), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide": ("LATENT", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), "guides": ("GUIDES", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight = 1.0, weight_scheduler = "constant", double_weights = "0.1", single_weights = "0.0", double_blocks = "all", single_blocks = "all", img_q = 0.0, img_k = 0.0, img_v = 0.0, txt_q = 0.0, txt_k = 0.0, txt_v = 0.0, img_q_norm = 0.0, img_k_norm = 0.0, img_v_norm = 0.0, txt_q_norm = 0.0, txt_k_norm = 0.0, txt_v_norm = 0.0, start_step = 0, end_step = 15, invert_mask = False, guide = None, mask = None, weights = None, guides = None, ): default_dtype = torch.float64 mask = 1-mask if mask is not None else None double_weights = parse_range_string(double_weights) single_weights = parse_range_string(single_weights) if len(double_weights) == 0: double_weights.append(0.0) if len(single_weights) == 0: single_weights.append(0.0) if len(double_weights) == 1: double_weights = double_weights * 100 if len(single_weights) == 1: single_weights = single_weights * 100 if type(double_weights[0]) == int: double_weights = [float(val) for val in double_weights] if type(single_weights[0]) == int: single_weights = [float(val) for val in single_weights] if double_blocks == "all": double_blocks = [val for val in range(100)] if len(double_weights) == 1: double_weights = [double_weights[0]] * 100 else: double_blocks = parse_range_string(double_blocks) weights_expanded = [0.0] * 100 for b, w in zip(double_blocks, double_weights): weights_expanded[b] = w double_weights = weights_expanded if single_blocks == "all": single_blocks = [val for val in range(100)] if len(single_weights) == 1: single_weights = [single_weights[0]] * 100 else: single_blocks = parse_range_string(single_blocks) weights_expanded = [0.0] * 100 for b, w in zip(single_blocks, single_weights): weights_expanded[b] = w single_weights = weights_expanded if end_step == -1: end_step = MAX_STEPS if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} if weight_scheduler == "constant": # and weights == None: weights = initialize_or_scale(None, weight, end_step).to(default_dtype) prepend = torch.zeros(start_step).to(weights) weights = torch.cat([prepend, weights]) weights = F.pad(weights, (0, MAX_STEPS), value=0.0) guides = copy.deepcopy(guides) if guides is not None else {} guides['weight_attninj'] = weight guides['weights_attninj'] = weights guides['blocks_attninj_mmdit'] = { "double_weights": double_weights, "single_weights": single_weights, "double_blocks" : double_blocks, "single_blocks" : single_blocks, } guides['blocks_attninj_qkv'] = { "img_q": img_q, "img_k": img_k, "img_v": img_v, "txt_q": txt_q, "txt_k": txt_k, "txt_v": txt_v, "img_q_norm": img_q_norm, "img_k_norm": img_k_norm, "img_v_norm": img_v_norm, "txt_q_norm": txt_q_norm, "txt_k_norm": txt_k_norm, "txt_v_norm": txt_v_norm, } guides['guide_attninj'] = guide guides['mask_attninj'] = mask guides['weight_scheduler_attninj'] = weight_scheduler guides['start_step_attninj'] = start_step guides['end_step_attninj'] = end_step return (guides, ) class ClownGuide_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}), "channelwise_mode": ("BOOLEAN", {"default": True}), "projection_mode": ("BOOLEAN", {"default": True}), "weight": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "cutoff": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "weight_scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide": ("LATENT", ), "mask": ("MASK", ), "weights": ("SIGMAS", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight_scheduler = "constant", weight_scheduler_unmasked = "constant", start_step = 0, start_step_unmasked = 0, end_step = 30, end_step_unmasked = 30, cutoff = 1.0, cutoff_unmasked = 1.0, guide = None, guide_unmasked = None, weight = 0.0, weight_unmasked = 0.0, guide_mode = "epsilon", channelwise_mode = False, projection_mode = False, weights = None, weights_unmasked = None, mask = None, unmask = None, invert_mask = False, ): CG = ClownGuides_Beta() mask = 1-mask if mask is not None else None if end_step == -1: end_step = MAX_STEPS if guide is not None: raw_x = guide.get('state_info', {}).get('raw_x', None) if False: # raw_x is not None: guide = {'samples': guide['state_info']['raw_x'].clone()} else: guide = {'samples': guide['samples'].clone()} if guide_unmasked is not None: raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()} else: guide_unmasked = {'samples': guide_unmasked['samples'].clone()} guides, = CG.main( weight_scheduler_masked = weight_scheduler, weight_scheduler_unmasked = weight_scheduler_unmasked, start_step_masked = start_step, start_step_unmasked = start_step_unmasked, end_step_masked = end_step, end_step_unmasked = end_step_unmasked, cutoff_masked = cutoff, cutoff_unmasked = cutoff_unmasked, guide_masked = guide, guide_unmasked = guide_unmasked, weight_masked = weight, weight_unmasked = weight_unmasked, guide_mode = guide_mode, channelwise_mode = channelwise_mode, projection_mode = projection_mode, weights_masked = weights, weights_unmasked = weights_unmasked, mask = mask, unmask = unmask, invert_mask = invert_mask ) return (guides, ) #return (guides[0], ) class ClownGuides_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}), "channelwise_mode": ("BOOLEAN", {"default": True}), "projection_mode": ("BOOLEAN", {"default": True}), "weight_masked": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "weight_unmasked": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "cutoff_masked": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "cutoff_unmasked": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "weight_scheduler_masked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "weight_scheduler_unmasked": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "start_step_masked": ("INT", {"default": 0, "min": 0, "max": 10000}), "start_step_unmasked": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step_masked": ("INT", {"default": 15, "min": -1, "max": 10000}), "end_step_unmasked": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_mask": ("BOOLEAN", {"default": False}), }, "optional": { "guide_masked": ("LATENT", ), "guide_unmasked": ("LATENT", ), "mask": ("MASK", ), "weights_masked": ("SIGMAS", ), "weights_unmasked": ("SIGMAS", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight_scheduler_masked = "constant", weight_scheduler_unmasked = "constant", start_step_masked = 0, start_step_unmasked = 0, end_step_masked = 30, end_step_unmasked = 30, cutoff_masked = 1.0, cutoff_unmasked = 1.0, guide_masked = None, guide_unmasked = None, weight_masked = 0.0, weight_unmasked = 0.0, guide_mode = "epsilon", channelwise_mode = False, projection_mode = False, weights_masked = None, weights_unmasked = None, mask = None, unmask = None, invert_mask = False, ): default_dtype = torch.float64 if end_step_masked == -1: end_step_masked = MAX_STEPS if end_step_unmasked == -1: end_step_unmasked = MAX_STEPS if guide_masked is None: weight_scheduler_masked = "constant" start_step_masked = 0 end_step_masked = 30 cutoff_masked = 1.0 guide_masked = None weight_masked = 0.0 weights_masked = None #mask = None if guide_unmasked is None: weight_scheduler_unmasked = "constant" start_step_unmasked = 0 end_step_unmasked = 30 cutoff_unmasked = 1.0 guide_unmasked = None weight_unmasked = 0.0 weights_unmasked = None #unmask = None if guide_masked is not None: raw_x = guide_masked.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_masked = {'samples': guide_masked['state_info']['raw_x'].clone()} else: guide_masked = {'samples': guide_masked['samples'].clone()} if guide_unmasked is not None: raw_x = guide_unmasked.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_unmasked = {'samples': guide_unmasked['state_info']['raw_x'].clone()} else: guide_unmasked = {'samples': guide_unmasked['samples'].clone()} if invert_mask and mask is not None: mask = 1-mask if projection_mode: guide_mode = guide_mode + "_projection" if channelwise_mode: guide_mode = guide_mode + "_cw" if guide_mode == "unsample_cw": guide_mode = "unsample" if guide_mode == "resample_cw": guide_mode = "resample" if weight_scheduler_masked == "constant" and weights_masked == None: weights_masked = initialize_or_scale(None, weight_masked, end_step_masked).to(default_dtype) weights_masked = F.pad(weights_masked, (0, MAX_STEPS), value=0.0) if weight_scheduler_unmasked == "constant" and weights_unmasked == None: weights_unmasked = initialize_or_scale(None, weight_unmasked, end_step_unmasked).to(default_dtype) weights_unmasked = F.pad(weights_unmasked, (0, MAX_STEPS), value=0.0) guides = { "guide_mode" : guide_mode, "weight_masked" : weight_masked, "weight_unmasked" : weight_unmasked, "weights_masked" : weights_masked, "weights_unmasked" : weights_unmasked, "guide_masked" : guide_masked, "guide_unmasked" : guide_unmasked, "mask" : mask, "unmask" : unmask, "weight_scheduler_masked" : weight_scheduler_masked, "weight_scheduler_unmasked" : weight_scheduler_unmasked, "start_step_masked" : start_step_masked, "start_step_unmasked" : start_step_unmasked, "end_step_masked" : end_step_masked, "end_step_unmasked" : end_step_unmasked, "cutoff_masked" : cutoff_masked, "cutoff_unmasked" : cutoff_unmasked } return (guides, ) class ClownGuidesAB_Beta: @classmethod def INPUT_TYPES(cls): return {"required": { "guide_mode": (GUIDE_MODE_NAMES_BETA_SIMPLE, {"default": 'epsilon', "tooltip": "Recommended: epsilon or mean/mean_std with sampler_mode = standard, and unsample/resample with sampler_mode = unsample/resample. Epsilon_dynamic_mean, etc. are only used with two latent inputs and a mask. Blend/hard_light/mean/mean_std etc. require low strengths, start with 0.01-0.02."}), "channelwise_mode": ("BOOLEAN", {"default": False}), "projection_mode": ("BOOLEAN", {"default": False}), "weight_A": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide."}), "weight_B": ("FLOAT", {"default": 0.75, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Set the strength of the guide_bkg."}), "cutoff_A": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "cutoff_B": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step":0.01, "round": False, "tooltip": "Disables the guide for the next step when the denoised image is similar to the guide. Higher values will strengthen the effect."}), "weight_scheduler_A": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "weight_scheduler_B": (["constant"] + get_res4lyf_scheduler_list(), {"default": "constant"},), "start_step_A": ("INT", {"default": 0, "min": 0, "max": 10000}), "start_step_B": ("INT", {"default": 0, "min": 0, "max": 10000}), "end_step_A": ("INT", {"default": 15, "min": -1, "max": 10000}), "end_step_B": ("INT", {"default": 15, "min": -1, "max": 10000}), "invert_masks": ("BOOLEAN", {"default": False}), }, "optional": { "guide_A": ("LATENT", ), "guide_B": ("LATENT", ), "mask_A": ("MASK", ), "mask_B": ("MASK", ), "weights_A": ("SIGMAS", ), "weights_B": ("SIGMAS", ), } } RETURN_TYPES = ("GUIDES",) RETURN_NAMES = ("guides",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_extensions" def main(self, weight_scheduler_A = "constant", weight_scheduler_B = "constant", start_step_A = 0, start_step_B = 0, end_step_A = 30, end_step_B = 30, cutoff_A = 1.0, cutoff_B = 1.0, guide_A = None, guide_B = None, weight_A = 0.0, weight_B = 0.0, guide_mode = "epsilon", channelwise_mode = False, projection_mode = False, weights_A = None, weights_B = None, mask_A = None, mask_B = None, invert_masks : bool = False, ): default_dtype = torch.float64 if end_step_A == -1: end_step_A = MAX_STEPS if end_step_B == -1: end_step_B = MAX_STEPS if guide_A is not None: raw_x = guide_A.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_A = {'samples': guide_A['state_info']['raw_x'].clone()} else: guide_A = {'samples': guide_A['samples'].clone()} if guide_B is not None: raw_x = guide_B.get('state_info', {}).get('raw_x', None) if False: #raw_x is not None: guide_B = {'samples': guide_B['state_info']['raw_x'].clone()} else: guide_B = {'samples': guide_B['samples'].clone()} if guide_A is None: guide_A = guide_B guide_B = None mask_A = mask_B mask_B = None weight_B = 0.0 if guide_B is None: weight_B = 0.0 if mask_A is None and mask_B is not None: mask_A = 1-mask_B if projection_mode: guide_mode = guide_mode + "_projection" if channelwise_mode: guide_mode = guide_mode + "_cw" if guide_mode == "unsample_cw": guide_mode = "unsample" if guide_mode == "resample_cw": guide_mode = "resample" if weight_scheduler_A == "constant" and weights_A == None: weights_A = initialize_or_scale(None, weight_A, end_step_A).to(default_dtype) weights_A = F.pad(weights_A, (0, MAX_STEPS), value=0.0) if weight_scheduler_B == "constant" and weights_B == None: weights_B = initialize_or_scale(None, weight_B, end_step_B).to(default_dtype) weights_B = F.pad(weights_B, (0, MAX_STEPS), value=0.0) if invert_masks: mask_A = 1-mask_A if mask_A is not None else None mask_B = 1-mask_B if mask_B is not None else None guides = { "guide_mode" : guide_mode, "weight_masked" : weight_A, "weight_unmasked" : weight_B, "weights_masked" : weights_A, "weights_unmasked" : weights_B, "guide_masked" : guide_A, "guide_unmasked" : guide_B, "mask" : mask_A, "unmask" : mask_B, "weight_scheduler_masked" : weight_scheduler_A, "weight_scheduler_unmasked" : weight_scheduler_B, "start_step_masked" : start_step_A, "start_step_unmasked" : start_step_B, "end_step_masked" : end_step_A, "end_step_unmasked" : end_step_B, "cutoff_masked" : cutoff_A, "cutoff_unmasked" : cutoff_B } return (guides, ) class ClownOptions_Combine: @classmethod def INPUT_TYPES(s): return { "required": { "options": ("OPTIONS",), }, } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, options, **kwargs): options_mgr = OptionsManager(options, **kwargs) return (options_mgr.as_dict(),) class ClownOptions_Frameweights: @classmethod def INPUT_TYPES(s): return { "required": { "config_name": (FRAME_WEIGHTS_CONFIG_NAMES, {"default": "frame_weights", "tooltip": "Apply to specific type of per-frame weights."}), "dynamics": (FRAME_WEIGHTS_DYNAMICS_NAMES, {"default": "ease_out", "tooltip": "The function type used for the dynamic period. constant: no change, linear: steady change, ease_out: starts fast, ease_in: starts slow"}), "schedule": (FRAME_WEIGHTS_SCHEDULE_NAMES, {"default": "moderate_early", "tooltip": "fast_early: fast change starts immediately, slow_late: slow change starts later"}), "scale": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "The amount of change over the course of the frame weights. 1.0 means that the guides have no influence by the end."}), "reverse": ("BOOLEAN", {"default": False, "tooltip": "Reverse the frame weights"}), }, "optional": { "frame_weights": ("SIGMAS", {"tooltip": "Overrides all other settings EXCEPT reverse."}), "custom_string": ("STRING", {"tooltip": "Overrides all other settings EXCEPT reverse.", "multiline": True}), "options": ("OPTIONS",), }, } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, config_name, dynamics, schedule, scale, reverse, frame_weights = None, custom_string = None, options = None, ): options_mgr = OptionsManager(options if options is not None else {}) frame_weights_mgr = options_mgr.get("frame_weights_mgr") if frame_weights_mgr is None: frame_weights_mgr = FrameWeightsManager() if custom_string is not None and custom_string.strip() == "": custom_string = None frame_weights_mgr.add_weight_config( config_name, dynamics=dynamics, schedule=schedule, scale=scale, is_reversed=reverse, frame_weights=frame_weights, custom_string=custom_string ) options_mgr.update("frame_weights_mgr", frame_weights_mgr) return (options_mgr.as_dict(),) class SharkOptions_GuiderInput: @classmethod def INPUT_TYPES(s): return {"required": {"guider": ("GUIDER", ), }, "optional": {"options": ("OPTIONS", ), } } RETURN_TYPES = ("OPTIONS",) RETURN_NAMES = ("options",) FUNCTION = "main" CATEGORY = "RES4LYF/sampler_options" def main(self, guider, options=None): options_mgr = OptionsManager(options if options is not None else {}) if isinstance(guider, dict): guider = guider.get('samples', None) if isinstance(guider, torch.Tensor): guider = guider.detach().cpu() if options_mgr is None: options_mgr = OptionsManager() options_mgr.update("guider", guider) return (options_mgr.as_dict(), ) ================================================ FILE: sd/attention.py ================================================ import math import sys import torch import torch.nn.functional as F from torch import nn, einsum from einops import rearrange, repeat from typing import Optional import logging from comfy.ldm.modules.diffusionmodules.util import AlphaBlender, timestep_embedding from comfy.ldm.modules.sub_quadratic_attention import efficient_dot_product_attention from comfy import model_management if model_management.xformers_enabled(): import xformers import xformers.ops if model_management.sage_attention_enabled(): try: from sageattention import sageattn except ModuleNotFoundError: logging.error(f"\n\nTo use the `--use-sage-attention` feature, the `sageattention` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install sageattention") exit(-1) if model_management.flash_attention_enabled(): try: from flash_attn import flash_attn_func except ModuleNotFoundError: logging.error(f"\n\nTo use the `--use-flash-attention` feature, the `flash-attn` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install flash-attn") exit(-1) from comfy.cli_args import args import comfy.ops ops = comfy.ops.disable_weight_init from ..style_transfer import apply_scattersort, apply_scattersort_spatial FORCE_UPCAST_ATTENTION_DTYPE = model_management.force_upcast_attention_dtype() def get_attn_precision(attn_precision, current_dtype): if args.dont_upcast_attention: return None if FORCE_UPCAST_ATTENTION_DTYPE is not None and current_dtype in FORCE_UPCAST_ATTENTION_DTYPE: return FORCE_UPCAST_ATTENTION_DTYPE[current_dtype] return attn_precision def exists(val): return val is not None def default(val, d): if exists(val): return val return d # feedforward class GEGLU(nn.Module): def __init__(self, dim_in, dim_out, dtype=None, device=None, operations=ops): super().__init__() self.proj = operations.Linear(dim_in, dim_out * 2, dtype=dtype, device=device) def forward(self, x): x, gate = self.proj(x).chunk(2, dim=-1) return x * F.gelu(gate) class FeedForward(nn.Module): def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0., dtype=None, device=None, operations=ops): super().__init__() inner_dim = int(dim * mult) dim_out = default(dim_out, dim) project_in = nn.Sequential( operations.Linear(dim, inner_dim, dtype=dtype, device=device), nn.GELU() ) if not glu else GEGLU(dim, inner_dim, dtype=dtype, device=device, operations=operations) self.net = nn.Sequential( project_in, nn.Dropout(dropout), operations.Linear(inner_dim, dim_out, dtype=dtype, device=device) ) def forward(self, x): return self.net(x) def Normalize(in_channels, dtype=None, device=None): return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True, dtype=dtype, device=device) def attention_basic(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False): attn_precision = get_attn_precision(attn_precision, q.dtype) if skip_reshape: b, _, _, dim_head = q.shape else: b, _, dim_head = q.shape dim_head //= heads scale = dim_head ** -0.5 h = heads if skip_reshape: q, k, v = map( lambda t: t.reshape(b * heads, -1, dim_head), (q, k, v), ) else: q, k, v = map( lambda t: t.unsqueeze(3) .reshape(b, -1, heads, dim_head) .permute(0, 2, 1, 3) .reshape(b * heads, -1, dim_head) .contiguous(), (q, k, v), ) # force cast to fp32 to avoid overflowing if attn_precision == torch.float32: sim = einsum('b i d, b j d -> b i j', q.float(), k.float()) * scale else: sim = einsum('b i d, b j d -> b i j', q, k) * scale del q, k if exists(mask): if mask.dtype == torch.bool: mask = rearrange(mask, 'b ... -> b (...)') #TODO: check if this bool part matches pytorch attention max_neg_value = -torch.finfo(sim.dtype).max mask = repeat(mask, 'b j -> (b h) () j', h=h) sim.masked_fill_(~mask, max_neg_value) else: if len(mask.shape) == 2: bs = 1 else: bs = mask.shape[0] mask = mask.reshape(bs, -1, mask.shape[-2], mask.shape[-1]).expand(b, heads, -1, -1).reshape(-1, mask.shape[-2], mask.shape[-1]) sim.add_(mask) # attention, what we cannot get enough of sim = sim.softmax(dim=-1) out = einsum('b i j, b j d -> b i d', sim.to(v.dtype), v) if skip_output_reshape: out = ( out.unsqueeze(0) .reshape(b, heads, -1, dim_head) ) else: out = ( out.unsqueeze(0) .reshape(b, heads, -1, dim_head) .permute(0, 2, 1, 3) .reshape(b, -1, heads * dim_head) ) return out def attention_sub_quad(query, key, value, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False): attn_precision = get_attn_precision(attn_precision, query.dtype) if skip_reshape: b, _, _, dim_head = query.shape else: b, _, dim_head = query.shape dim_head //= heads if skip_reshape: query = query.reshape(b * heads, -1, dim_head) value = value.reshape(b * heads, -1, dim_head) key = key.reshape(b * heads, -1, dim_head).movedim(1, 2) else: query = query.unsqueeze(3).reshape(b, -1, heads, dim_head).permute(0, 2, 1, 3).reshape(b * heads, -1, dim_head) value = value.unsqueeze(3).reshape(b, -1, heads, dim_head).permute(0, 2, 1, 3).reshape(b * heads, -1, dim_head) key = key.unsqueeze(3).reshape(b, -1, heads, dim_head).permute(0, 2, 3, 1).reshape(b * heads, dim_head, -1) dtype = query.dtype upcast_attention = attn_precision == torch.float32 and query.dtype != torch.float32 if upcast_attention: bytes_per_token = torch.finfo(torch.float32).bits//8 else: bytes_per_token = torch.finfo(query.dtype).bits//8 batch_x_heads, q_tokens, _ = query.shape _, _, k_tokens = key.shape mem_free_total, _ = model_management.get_free_memory(query.device, True) kv_chunk_size_min = None kv_chunk_size = None query_chunk_size = None for x in [4096, 2048, 1024, 512, 256]: count = mem_free_total / (batch_x_heads * bytes_per_token * x * 4.0) if count >= k_tokens: kv_chunk_size = k_tokens query_chunk_size = x break if query_chunk_size is None: query_chunk_size = 512 if mask is not None: if len(mask.shape) == 2: bs = 1 else: bs = mask.shape[0] mask = mask.reshape(bs, -1, mask.shape[-2], mask.shape[-1]).expand(b, heads, -1, -1).reshape(-1, mask.shape[-2], mask.shape[-1]) hidden_states = efficient_dot_product_attention( query, key, value, query_chunk_size=query_chunk_size, kv_chunk_size=kv_chunk_size, kv_chunk_size_min=kv_chunk_size_min, use_checkpoint=False, upcast_attention=upcast_attention, mask=mask, ) hidden_states = hidden_states.to(dtype) if skip_output_reshape: hidden_states = hidden_states.unflatten(0, (-1, heads)) else: hidden_states = hidden_states.unflatten(0, (-1, heads)).transpose(1,2).flatten(start_dim=2) return hidden_states def attention_split(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False): attn_precision = get_attn_precision(attn_precision, q.dtype) if skip_reshape: b, _, _, dim_head = q.shape else: b, _, dim_head = q.shape dim_head //= heads scale = dim_head ** -0.5 if skip_reshape: q, k, v = map( lambda t: t.reshape(b * heads, -1, dim_head), (q, k, v), ) else: q, k, v = map( lambda t: t.unsqueeze(3) .reshape(b, -1, heads, dim_head) .permute(0, 2, 1, 3) .reshape(b * heads, -1, dim_head) .contiguous(), (q, k, v), ) r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) mem_free_total = model_management.get_free_memory(q.device) if attn_precision == torch.float32: element_size = 4 upcast = True else: element_size = q.element_size() upcast = False gb = 1024 ** 3 tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * element_size modifier = 3 mem_required = tensor_size * modifier steps = 1 if mem_required > mem_free_total: steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2))) # print(f"Expected tensor size:{tensor_size/gb:0.1f}GB, cuda free:{mem_free_cuda/gb:0.1f}GB " # f"torch free:{mem_free_torch/gb:0.1f} total:{mem_free_total/gb:0.1f} steps:{steps}") if steps > 64: max_res = math.floor(math.sqrt(math.sqrt(mem_free_total / 2.5)) / 8) * 64 raise RuntimeError(f'Not enough memory, use lower resolution (max approx. {max_res}x{max_res}). ' f'Need: {mem_required/64/gb:0.1f}GB free, Have:{mem_free_total/gb:0.1f}GB free') if mask is not None: if len(mask.shape) == 2: bs = 1 else: bs = mask.shape[0] mask = mask.reshape(bs, -1, mask.shape[-2], mask.shape[-1]).expand(b, heads, -1, -1).reshape(-1, mask.shape[-2], mask.shape[-1]) # print("steps", steps, mem_required, mem_free_total, modifier, q.element_size(), tensor_size) first_op_done = False cleared_cache = False while True: try: slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1] for i in range(0, q.shape[1], slice_size): end = i + slice_size if upcast: with torch.autocast(enabled=False, device_type = 'cuda'): s1 = einsum('b i d, b j d -> b i j', q[:, i:end].float(), k.float()) * scale else: s1 = einsum('b i d, b j d -> b i j', q[:, i:end], k) * scale if mask is not None: if len(mask.shape) == 2: s1 += mask[i:end] else: if mask.shape[1] == 1: s1 += mask else: s1 += mask[:, i:end] s2 = s1.softmax(dim=-1).to(v.dtype) del s1 first_op_done = True r1[:, i:end] = einsum('b i j, b j d -> b i d', s2, v) del s2 break except model_management.OOM_EXCEPTION as e: if first_op_done == False: model_management.soft_empty_cache(True) if cleared_cache == False: cleared_cache = True logging.warning("out of memory error, emptying cache and trying again") continue steps *= 2 if steps > 64: raise e logging.warning("out of memory error, increasing steps and trying again {}".format(steps)) else: raise e del q, k, v if skip_output_reshape: r1 = ( r1.unsqueeze(0) .reshape(b, heads, -1, dim_head) ) else: r1 = ( r1.unsqueeze(0) .reshape(b, heads, -1, dim_head) .permute(0, 2, 1, 3) .reshape(b, -1, heads * dim_head) ) return r1 BROKEN_XFORMERS = False try: x_vers = xformers.__version__ # XFormers bug confirmed on all versions from 0.0.21 to 0.0.26 (q with bs bigger than 65535 gives CUDA error) BROKEN_XFORMERS = x_vers.startswith("0.0.2") and not x_vers.startswith("0.0.20") except: pass def attention_xformers(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False): b = q.shape[0] dim_head = q.shape[-1] # check to make sure xformers isn't broken disabled_xformers = False if BROKEN_XFORMERS: if b * heads > 65535: disabled_xformers = True if not disabled_xformers: if torch.jit.is_tracing() or torch.jit.is_scripting(): disabled_xformers = True if disabled_xformers: return attention_pytorch(q, k, v, heads, mask, skip_reshape=skip_reshape) if skip_reshape: # b h k d -> b k h d q, k, v = map( lambda t: t.permute(0, 2, 1, 3), (q, k, v), ) # actually do the reshaping else: dim_head //= heads q, k, v = map( lambda t: t.reshape(b, -1, heads, dim_head), (q, k, v), ) if mask is not None: # add a singleton batch dimension if mask.ndim == 2: mask = mask.unsqueeze(0) # add a singleton heads dimension if mask.ndim == 3: mask = mask.unsqueeze(1) # pad to a multiple of 8 pad = 8 - mask.shape[-1] % 8 # the xformers docs says that it's allowed to have a mask of shape (1, Nq, Nk) # but when using separated heads, the shape has to be (B, H, Nq, Nk) # in flux, this matrix ends up being over 1GB # here, we create a mask with the same batch/head size as the input mask (potentially singleton or full) mask_out = torch.empty([mask.shape[0], mask.shape[1], q.shape[1], mask.shape[-1] + pad], dtype=q.dtype, device=q.device) mask_out[..., :mask.shape[-1]] = mask # doesn't this remove the padding again?? mask = mask_out[..., :mask.shape[-1]] mask = mask.expand(b, heads, -1, -1) out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=mask) if skip_output_reshape: out = out.permute(0, 2, 1, 3) else: out = ( out.reshape(b, -1, heads * dim_head) ) return out if model_management.is_nvidia(): #pytorch 2.3 and up seem to have this issue. SDP_BATCH_LIMIT = 2**15 else: #TODO: other GPUs ? SDP_BATCH_LIMIT = 2**31 def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False): if skip_reshape: b, _, _, dim_head = q.shape else: b, _, dim_head = q.shape dim_head //= heads q, k, v = map( lambda t: t.view(b, -1, heads, dim_head).transpose(1, 2), (q, k, v), ) if mask is not None: # add a batch dimension if there isn't already one if mask.ndim == 2: mask = mask.unsqueeze(0) # add a heads dimension if there isn't already one if mask.ndim == 3: mask = mask.unsqueeze(1) if SDP_BATCH_LIMIT >= b: out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False) if not skip_output_reshape: out = ( out.transpose(1, 2).reshape(b, -1, heads * dim_head) ) else: out = torch.empty((b, q.shape[2], heads * dim_head), dtype=q.dtype, layout=q.layout, device=q.device) for i in range(0, b, SDP_BATCH_LIMIT): m = mask if mask is not None: if mask.shape[0] > 1: m = mask[i : i + SDP_BATCH_LIMIT] out[i : i + SDP_BATCH_LIMIT] = torch.nn.functional.scaled_dot_product_attention( q[i : i + SDP_BATCH_LIMIT], k[i : i + SDP_BATCH_LIMIT], v[i : i + SDP_BATCH_LIMIT], attn_mask=m, dropout_p=0.0, is_causal=False ).transpose(1, 2).reshape(-1, q.shape[2], heads * dim_head) return out def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False): if skip_reshape: b, _, _, dim_head = q.shape tensor_layout = "HND" else: b, _, dim_head = q.shape dim_head //= heads q, k, v = map( lambda t: t.view(b, -1, heads, dim_head), (q, k, v), ) tensor_layout = "NHD" if mask is not None: # add a batch dimension if there isn't already one if mask.ndim == 2: mask = mask.unsqueeze(0) # add a heads dimension if there isn't already one if mask.ndim == 3: mask = mask.unsqueeze(1) try: out = sageattn(q, k, v, attn_mask=mask, is_causal=False, tensor_layout=tensor_layout) except Exception as e: logging.error("Error running sage attention: {}, using pytorch attention instead.".format(e)) if tensor_layout == "NHD": q, k, v = map( lambda t: t.transpose(1, 2), (q, k, v), ) return attention_pytorch(q, k, v, heads, mask=mask, skip_reshape=True, skip_output_reshape=skip_output_reshape) if tensor_layout == "HND": if not skip_output_reshape: out = ( out.transpose(1, 2).reshape(b, -1, heads * dim_head) ) else: if skip_output_reshape: out = out.transpose(1, 2) else: out = out.reshape(b, -1, heads * dim_head) return out try: @torch.library.custom_op("flash_attention::flash_attn", mutates_args=()) def flash_attn_wrapper(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, dropout_p: float = 0.0, causal: bool = False) -> torch.Tensor: return flash_attn_func(q, k, v, dropout_p=dropout_p, causal=causal) @flash_attn_wrapper.register_fake def flash_attn_fake(q, k, v, dropout_p=0.0, causal=False): # Output shape is the same as q return q.new_empty(q.shape) except AttributeError as error: FLASH_ATTN_ERROR = error def flash_attn_wrapper(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, dropout_p: float = 0.0, causal: bool = False) -> torch.Tensor: assert False, f"Could not define flash_attn_wrapper: {FLASH_ATTN_ERROR}" def attention_flash(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False): if skip_reshape: b, _, _, dim_head = q.shape else: b, _, dim_head = q.shape dim_head //= heads q, k, v = map( lambda t: t.view(b, -1, heads, dim_head).transpose(1, 2), (q, k, v), ) if mask is not None: # add a batch dimension if there isn't already one if mask.ndim == 2: mask = mask.unsqueeze(0) # add a heads dimension if there isn't already one if mask.ndim == 3: mask = mask.unsqueeze(1) try: assert mask is None out = flash_attn_wrapper( q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), dropout_p=0.0, causal=False, ).transpose(1, 2) except Exception as e: logging.warning(f"Flash Attention failed, using default SDPA: {e}") out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False) if not skip_output_reshape: out = ( out.transpose(1, 2).reshape(b, -1, heads * dim_head) ) return out optimized_attention = attention_basic if model_management.sage_attention_enabled(): logging.info("Using sage attention") optimized_attention = attention_sage elif model_management.xformers_enabled(): logging.info("Using xformers attention") optimized_attention = attention_xformers elif model_management.flash_attention_enabled(): logging.info("Using Flash Attention") optimized_attention = attention_flash elif model_management.pytorch_attention_enabled(): logging.info("Using pytorch attention") optimized_attention = attention_pytorch else: if args.use_split_cross_attention: logging.info("Using split optimization for attention") optimized_attention = attention_split else: logging.info("Using sub quadratic optimization for attention, if you have memory or speed issues try using: --use-split-cross-attention") optimized_attention = attention_sub_quad optimized_attention_masked = optimized_attention def optimized_attention_for_device(device, mask=False, small_input=False): if small_input: if model_management.pytorch_attention_enabled(): return attention_pytorch #TODO: need to confirm but this is probably slightly faster for small inputs in all cases else: return attention_basic if device == torch.device("cpu"): return attention_sub_quad if mask: return optimized_attention_masked return optimized_attention class ReCrossAttention(nn.Module): def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., attn_precision=None, dtype=None, device=None, operations=ops): super().__init__() inner_dim = dim_head * heads context_dim = default(context_dim, query_dim) self.attn_precision = attn_precision self.heads = heads self.dim_head = dim_head self.to_q = operations.Linear(query_dim, inner_dim, bias=False, dtype=dtype, device=device) self.to_k = operations.Linear(context_dim, inner_dim, bias=False, dtype=dtype, device=device) self.to_v = operations.Linear(context_dim, inner_dim, bias=False, dtype=dtype, device=device) self.to_out = nn.Sequential(operations.Linear(inner_dim, query_dim, dtype=dtype, device=device), nn.Dropout(dropout)) def forward(self, x, context=None, value=None, mask=None, style_block=None): q = self.to_q(x) q = style_block(q, "q_proj") #SELF_ATTN = True if context is None else False context = default(context, x) # if context is None, return x k = self.to_k(context) k = style_block(k, "k_proj") if value is not None: v = self.to_v(value) del value else: v = self.to_v(context) v = style_block(v, "v_proj") if mask is None: out = optimized_attention(q, k, v, self.heads, attn_precision=self.attn_precision) else: #if SELF_ATTN and mask.shape[-2] != q.shape[-2]: # mask = F.interpolate(mask[None, None].float(), size=(q.shape[-2], q.shape[-2]), mode='nearest')[0,0].to(mask) #elif mask.shape[-2] != q.shape[-2]: # cross attn # mask = F.interpolate(mask[None, None].float(), size=(q.shape[-2], mask.shape[-1]), mode='nearest')[0,0].to(mask) out = attention_pytorch(q, k, v, self.heads, mask=mask) #out = optimized_attention_masked(q, k, v, self.heads, mask, attn_precision=self.attn_precision) out = style_block(out, "out") return self.to_out(out) class ReBasicTransformerBlock(nn.Module): def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True, ff_in=False, inner_dim=None, disable_self_attn=False, disable_temporal_crossattention=False, switch_temporal_ca_to_sa=False, attn_precision=None, dtype=None, device=None, operations=ops): super().__init__() self.ff_in = ff_in or inner_dim is not None if inner_dim is None: inner_dim = dim self.is_res = inner_dim == dim self.attn_precision = attn_precision if self.ff_in: self.norm_in = operations.LayerNorm(dim, dtype=dtype, device=device) self.ff_in = FeedForward(dim, dim_out=inner_dim, dropout=dropout, glu=gated_ff, dtype=dtype, device=device, operations=operations) self.disable_self_attn = disable_self_attn self.attn1 = ReCrossAttention(query_dim=inner_dim, heads=n_heads, dim_head=d_head, dropout=dropout, context_dim=context_dim if self.disable_self_attn else None, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations) # is a self-attention if not self.disable_self_attn self.ff = FeedForward(inner_dim, dim_out=dim, dropout=dropout, glu=gated_ff, dtype=dtype, device=device, operations=operations) if disable_temporal_crossattention: if switch_temporal_ca_to_sa: raise ValueError else: self.attn2 = None else: context_dim_attn2 = None if not switch_temporal_ca_to_sa: context_dim_attn2 = context_dim self.attn2 = ReCrossAttention(query_dim=inner_dim, context_dim=context_dim_attn2, heads=n_heads, dim_head=d_head, dropout=dropout, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations) # is self-attn if context is none self.norm2 = operations.LayerNorm(inner_dim, dtype=dtype, device=device) self.norm1 = operations.LayerNorm(inner_dim, dtype=dtype, device=device) self.norm3 = operations.LayerNorm(inner_dim, dtype=dtype, device=device) self.n_heads = n_heads self.d_head = d_head self.switch_temporal_ca_to_sa = switch_temporal_ca_to_sa def forward(self, x, context=None, transformer_options={}, style_block=None): extra_options = {} block = transformer_options.get("block", None) block_index = transformer_options.get("block_index", 0) transformer_patches = {} transformer_patches_replace = {} self_mask = transformer_options.get('self_mask') cross_mask = transformer_options.get('cross_mask') if self_mask is not None and cross_mask is not None: if self_mask.shape[-2] == x.shape[-2]: pass elif self_mask.shape[-2] < x.shape[-2]: self_mask = transformer_options.get('self_mask_up') cross_mask = transformer_options.get('cross_mask_up') else: self_mask = transformer_options.get('self_mask_down') cross_mask = transformer_options.get('cross_mask_down') if self_mask.shape[-2] > x.shape[-2]: self_mask = transformer_options.get('self_mask_down2') cross_mask = transformer_options.get('cross_mask_down2') for k in transformer_options: if k == "patches": transformer_patches = transformer_options[k] elif k == "patches_replace": transformer_patches_replace = transformer_options[k] else: extra_options[k] = transformer_options[k] extra_options["n_heads"] = self.n_heads extra_options["dim_head"] = self.d_head extra_options["attn_precision"] = self.attn_precision if self.ff_in: # never true for sdxl? x_skip = x x = self.ff_in(self.norm_in(x)) if self.is_res: x += x_skip n = self.norm1(x) n = style_block(n, "norm1") if self.disable_self_attn: context_attn1 = context else: context_attn1 = None value_attn1 = None if "attn1_patch" in transformer_patches: patch = transformer_patches["attn1_patch"] if context_attn1 is None: context_attn1 = n value_attn1 = context_attn1 for p in patch: n, context_attn1, value_attn1 = p(n, context_attn1, value_attn1, extra_options) if block is not None: transformer_block = (block[0], block[1], block_index) else: transformer_block = None attn1_replace_patch = transformer_patches_replace.get("attn1", {}) block_attn1 = transformer_block if block_attn1 not in attn1_replace_patch: block_attn1 = block if block_attn1 in attn1_replace_patch: if context_attn1 is None: context_attn1 = n value_attn1 = n n = self.attn1.to_q(n) context_attn1 = self.attn1.to_k(context_attn1) value_attn1 = self.attn1.to_v(value_attn1) n = attn1_replace_patch[block_attn1](n, context_attn1, value_attn1, extra_options) n = self.attn1.to_out(n) else: n = self.attn1(n, context=context_attn1, value=value_attn1, mask=self_mask, style_block=style_block.ATTN1) # self attention ##### n = style_block(n, "self_attn") if "attn1_output_patch" in transformer_patches: patch = transformer_patches["attn1_output_patch"] for p in patch: n = p(n, extra_options) x += n ########### x = style_block(x, "self_attn_res") if "middle_patch" in transformer_patches: patch = transformer_patches["middle_patch"] for p in patch: x = p(x, extra_options) if self.attn2 is not None: n = self.norm2(x) n = style_block(n, "norm2") if self.switch_temporal_ca_to_sa: context_attn2 = n else: context_attn2 = context value_attn2 = None if "attn2_patch" in transformer_patches: patch = transformer_patches["attn2_patch"] value_attn2 = context_attn2 for p in patch: n, context_attn2, value_attn2 = p(n, context_attn2, value_attn2, extra_options) attn2_replace_patch = transformer_patches_replace.get("attn2", {}) block_attn2 = transformer_block if block_attn2 not in attn2_replace_patch: block_attn2 = block if block_attn2 in attn2_replace_patch: if value_attn2 is None: value_attn2 = context_attn2 n = self.attn2.to_q(n) context_attn2 = self.attn2.to_k(context_attn2) value_attn2 = self.attn2.to_v(value_attn2) n = attn2_replace_patch[block_attn2](n, context_attn2, value_attn2, extra_options) n = self.attn2.to_out(n) else: n = self.attn2(n, context=context_attn2, value=value_attn2, mask=cross_mask, style_block=style_block.ATTN2) # real cross attention ##### b (h w) c n = style_block(n, "cross_attn") if "attn2_output_patch" in transformer_patches: patch = transformer_patches["attn2_output_patch"] for p in patch: n = p(n, extra_options) x += n ########### x = style_block(x, "cross_attn_res") if self.is_res: # always true with sdxl? x_skip = x if not self.is_res: pass x = self.norm3(x) x = style_block(x, "norm3") x = self.ff(x) x = style_block(x, "ff") if self.is_res: x += x_skip x = style_block(x, "ff_res") return x class ReSpatialTransformer(nn.Module): """ Transformer block for image-like data. First, project the input (aka embedding) and reshape to b, t, d. Then apply standard transformer action. Finally, reshape to image NEW: use_linear for more efficiency instead of the 1x1 convs """ def __init__(self, in_channels, n_heads, d_head, depth=1, dropout=0., context_dim=None, disable_self_attn=False, use_linear=False, use_checkpoint=True, attn_precision=None, dtype=None, device=None, operations=ops): super().__init__() if exists(context_dim) and not isinstance(context_dim, list): context_dim = [context_dim] * depth self.in_channels = in_channels inner_dim = n_heads * d_head self.norm = operations.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True, dtype=dtype, device=device) if not use_linear: self.proj_in = operations.Conv2d(in_channels, inner_dim, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) else: self.proj_in = operations.Linear(in_channels, inner_dim, dtype=dtype, device=device) self.transformer_blocks = nn.ModuleList( [ReBasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim[d], disable_self_attn=disable_self_attn, checkpoint=use_checkpoint, attn_precision=attn_precision, dtype=dtype, device=device, operations=operations) for d in range(depth)] ) if not use_linear: self.proj_out = operations.Conv2d(inner_dim,in_channels, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) else: self.proj_out = operations.Linear(in_channels, inner_dim, dtype=dtype, device=device) self.use_linear = use_linear def forward(self, x, context=None, style_block=None, transformer_options={}): # note: if no context is given, cross-attention defaults to self-attention if not isinstance(context, list): context = [context] * len(self.transformer_blocks) b, c, h, w = x.shape transformer_options["activations_shape"] = list(x.shape) x_in = x x = self.norm(x) x = style_block(x, "spatial_norm_in") if not self.use_linear: x = self.proj_in(x) x = style_block(x, "spatial_proj_in") x = x.movedim(1, 3).flatten(1, 2).contiguous() if self.use_linear: x = self.proj_in(x) x = style_block(x, "spatial_proj_in") for i, block in enumerate(self.transformer_blocks): transformer_options["block_index"] = i x = block(x, context=context[i], style_block=style_block.TFMR, transformer_options=transformer_options) x = style_block(x, "spatial_transformer_block") x = style_block(x, "spatial_transformer") if self.use_linear: x = self.proj_out(x) x = x.reshape(x.shape[0], h, w, x.shape[-1]).movedim(3, 1).contiguous() if not self.use_linear: x = self.proj_out(x) x = style_block(x, "spatial_proj_out") x = x + x_in x = style_block(x, "spatial_res") return x class SpatialVideoTransformer(ReSpatialTransformer): def __init__( self, in_channels, n_heads, d_head, depth=1, dropout=0.0, use_linear=False, context_dim=None, use_spatial_context=False, timesteps=None, merge_strategy: str = "fixed", merge_factor: float = 0.5, time_context_dim=None, ff_in=False, checkpoint=False, time_depth=1, disable_self_attn=False, disable_temporal_crossattention=False, max_time_embed_period: int = 10000, attn_precision=None, dtype=None, device=None, operations=ops ): super().__init__( in_channels, n_heads, d_head, depth=depth, dropout=dropout, use_checkpoint=checkpoint, context_dim=context_dim, use_linear=use_linear, disable_self_attn=disable_self_attn, attn_precision=attn_precision, dtype=dtype, device=device, operations=operations ) self.time_depth = time_depth self.depth = depth self.max_time_embed_period = max_time_embed_period time_mix_d_head = d_head n_time_mix_heads = n_heads time_mix_inner_dim = int(time_mix_d_head * n_time_mix_heads) inner_dim = n_heads * d_head if use_spatial_context: time_context_dim = context_dim self.time_stack = nn.ModuleList( [ BasicTransformerBlock( inner_dim, n_time_mix_heads, time_mix_d_head, dropout=dropout, context_dim=time_context_dim, # timesteps=timesteps, checkpoint=checkpoint, ff_in=ff_in, inner_dim=time_mix_inner_dim, disable_self_attn=disable_self_attn, disable_temporal_crossattention=disable_temporal_crossattention, attn_precision=attn_precision, dtype=dtype, device=device, operations=operations ) for _ in range(self.depth) ] ) assert len(self.time_stack) == len(self.transformer_blocks) self.use_spatial_context = use_spatial_context self.in_channels = in_channels time_embed_dim = self.in_channels * 4 self.time_pos_embed = nn.Sequential( operations.Linear(self.in_channels, time_embed_dim, dtype=dtype, device=device), nn.SiLU(), operations.Linear(time_embed_dim, self.in_channels, dtype=dtype, device=device), ) self.time_mixer = AlphaBlender( alpha=merge_factor, merge_strategy=merge_strategy ) def forward( self, x: torch.Tensor, context: Optional[torch.Tensor] = None, time_context: Optional[torch.Tensor] = None, timesteps: Optional[int] = None, image_only_indicator: Optional[torch.Tensor] = None, transformer_options={} ) -> torch.Tensor: _, _, h, w = x.shape transformer_options["activations_shape"] = list(x.shape) x_in = x spatial_context = None if exists(context): spatial_context = context if self.use_spatial_context: assert ( context.ndim == 3 ), f"n dims of spatial context should be 3 but are {context.ndim}" if time_context is None: time_context = context time_context_first_timestep = time_context[::timesteps] time_context = repeat( time_context_first_timestep, "b ... -> (b n) ...", n=h * w ) elif time_context is not None and not self.use_spatial_context: time_context = repeat(time_context, "b ... -> (b n) ...", n=h * w) if time_context.ndim == 2: time_context = rearrange(time_context, "b c -> b 1 c") x = self.norm(x) if not self.use_linear: x = self.proj_in(x) x = rearrange(x, "b c h w -> b (h w) c") if self.use_linear: x = self.proj_in(x) num_frames = torch.arange(timesteps, device=x.device) num_frames = repeat(num_frames, "t -> b t", b=x.shape[0] // timesteps) num_frames = rearrange(num_frames, "b t -> (b t)") t_emb = timestep_embedding(num_frames, self.in_channels, repeat_only=False, max_period=self.max_time_embed_period).to(x.dtype) emb = self.time_pos_embed(t_emb) emb = emb[:, None, :] for it_, (block, mix_block) in enumerate( zip(self.transformer_blocks, self.time_stack) ): transformer_options["block_index"] = it_ x = block( x, context=spatial_context, transformer_options=transformer_options, ) x_mix = x x_mix = x_mix + emb B, S, C = x_mix.shape x_mix = rearrange(x_mix, "(b t) s c -> (b s) t c", t=timesteps) x_mix = mix_block(x_mix, context=time_context) #TODO: transformer_options x_mix = rearrange( x_mix, "(b s) t c -> (b t) s c", s=S, b=B // timesteps, c=C, t=timesteps ) x = self.time_mixer(x_spatial=x, x_temporal=x_mix, image_only_indicator=image_only_indicator) if self.use_linear: x = self.proj_out(x) x = rearrange(x, "b (h w) c -> b c h w", h=h, w=w) if not self.use_linear: x = self.proj_out(x) out = x + x_in return out ================================================ FILE: sd/openaimodel.py ================================================ from abc import abstractmethod import torch import torch as th import torch.nn as nn import torch.nn.functional as F from einops import rearrange import logging import copy from ..helper import ExtraOptions from comfy.ldm.modules.diffusionmodules.util import ( checkpoint, avg_pool_nd, timestep_embedding, AlphaBlender, ) from comfy.ldm.modules.attention import SpatialTransformer, SpatialVideoTransformer, default from .attention import ReSpatialTransformer, ReBasicTransformerBlock from comfy.ldm.util import exists import comfy.patcher_extension import comfy.ops ops = comfy.ops.disable_weight_init from comfy.ldm.modules.diffusionmodules.openaimodel import TimestepBlock, TimestepEmbedSequential, Upsample, Downsample, ResBlock, VideoResBlock from ..latents import slerp_tensor, interpolate_spd, tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d from ..style_transfer import apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch, apply_scattersort, apply_scattersort_spatial, StyleMMDiT_Model, StyleUNet_Model #This is needed because accelerate makes a copy of transformer_options which breaks "transformer_index" def forward_timestep_embed(ts, x, emb, context=None, transformer_options={}, output_shape=None, time_context=None, num_video_frames=None, image_only_indicator=None, style_block=None): for layer in ts: if isinstance(layer, VideoResBlock): # UNUSED x = layer(x, emb, num_video_frames, image_only_indicator) elif isinstance(layer, TimestepBlock): # ResBlock(TimestepBlock) x = layer(x, emb, style_block.res_block) x = style_block(x, "res") elif isinstance(layer, SpatialVideoTransformer): # UNUSED x = layer(x, context, time_context, num_video_frames, image_only_indicator, transformer_options) if "transformer_index" in transformer_options: transformer_options["transformer_index"] += 1 elif isinstance(layer, ReSpatialTransformer): # USED x = layer(x, context, style_block.spatial_block, transformer_options,) x = style_block(x, "spatial") if "transformer_index" in transformer_options: transformer_options["transformer_index"] += 1 elif isinstance(layer, Upsample): x = layer(x, output_shape=output_shape) x = style_block(x, "resample") elif isinstance(layer, Downsample): x = layer(x) x = style_block(x, "resample") else: if "patches" in transformer_options and "forward_timestep_embed_patch" in transformer_options["patches"]: found_patched = False for class_type, handler in transformer_options["patches"]["forward_timestep_embed_patch"]: if isinstance(layer, class_type): x = handler(layer, x, emb, context, transformer_options, output_shape, time_context, num_video_frames, image_only_indicator) found_patched = True break if found_patched: continue x = layer(x) return x class ReResBlock(TimestepBlock): """ A residual block that can optionally change the number of channels. :param channels: the number of input channels. :param emb_channels: the number of timestep embedding channels. :param dropout: the rate of dropout. :param out_channels: if specified, the number of out channels. :param use_conv: if True and out_channels is specified, use a spatial convolution instead of a smaller 1x1 convolution to change the channels in the skip connection. :param dims: determines if the signal is 1D, 2D, or 3D. :param use_checkpoint: if True, use gradient checkpointing on this module. :param up: if True, use this block for upsampling. :param down: if True, use this block for downsampling. """ def __init__( self, channels, emb_channels, dropout, out_channels=None, use_conv=False, use_scale_shift_norm=False, dims=2, use_checkpoint=False, up=False, down=False, kernel_size=3, exchange_temb_dims=False, skip_t_emb=False, dtype=None, device=None, operations=ops ): super().__init__() self.channels = channels self.emb_channels = emb_channels self.dropout = dropout self.out_channels = out_channels or channels self.use_conv = use_conv self.use_checkpoint = use_checkpoint self.use_scale_shift_norm = use_scale_shift_norm self.exchange_temb_dims = exchange_temb_dims if isinstance(kernel_size, list): padding = [k // 2 for k in kernel_size] else: padding = kernel_size // 2 self.in_layers = nn.Sequential( operations.GroupNorm(32, channels, dtype=dtype, device=device), nn.SiLU(), operations.conv_nd(dims, channels, self.out_channels, kernel_size, padding=padding, dtype=dtype, device=device), ) self.updown = up or down if up: self.h_upd = Upsample(channels, False, dims, dtype=dtype, device=device) self.x_upd = Upsample(channels, False, dims, dtype=dtype, device=device) elif down: self.h_upd = Downsample(channels, False, dims, dtype=dtype, device=device) self.x_upd = Downsample(channels, False, dims, dtype=dtype, device=device) else: self.h_upd = self.x_upd = nn.Identity() self.skip_t_emb = skip_t_emb if self.skip_t_emb: self.emb_layers = None self.exchange_temb_dims = False else: self.emb_layers = nn.Sequential( nn.SiLU(), operations.Linear( emb_channels, 2 * self.out_channels if use_scale_shift_norm else self.out_channels, dtype=dtype, device=device ), ) self.out_layers = nn.Sequential( operations.GroupNorm(32, self.out_channels, dtype=dtype, device=device), nn.SiLU(), nn.Dropout(p=dropout), operations.conv_nd(dims, self.out_channels, self.out_channels, kernel_size, padding=padding, dtype=dtype, device=device) , ) if self.out_channels == channels: self.skip_connection = nn.Identity() elif use_conv: self.skip_connection = operations.conv_nd( dims, channels, self.out_channels, kernel_size, padding=padding, dtype=dtype, device=device ) else: self.skip_connection = operations.conv_nd(dims, channels, self.out_channels, 1, dtype=dtype, device=device) def forward(self, x, emb, style_block=None): """ Apply the block to a Tensor, conditioned on a timestep embedding. :param x: an [N x C x ...] Tensor of features. :param emb: an [N x emb_channels] Tensor of timestep embeddings. :return: an [N x C x ...] Tensor of outputs. """ return checkpoint( self._forward, (x, emb, style_block), self.parameters(), self.use_checkpoint ) def _forward(self, x, emb, style_block=None): #if self.updown: # not used with sdxl? # in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] # h = in_rest(x) # h = self.h_upd(h) # x = self.x_upd(x) # h = in_conv(h) #else: # h = self.in_layers(x) h = self.in_layers[0](x) h = style_block(h, "in_norm") h = self.in_layers[1](h) h = style_block(h, "in_silu") h = self.in_layers[2](h) h = style_block(h, "in_conv") emb_out = None if not self.skip_t_emb: #emb_out = self.emb_layers(emb).type(h.dtype) emb_out = self.emb_layers[0](emb).type(h.dtype) emb_out = style_block(emb_out, "emb_silu") emb_out = self.emb_layers[1](emb_out) emb_out = style_block(emb_out, "emb_linear") while len(emb_out.shape) < len(h.shape): emb_out = emb_out[..., None] if self.use_scale_shift_norm: # not used with sdxl? out_norm, out_rest = self.out_layers[0], self.out_layers[1:] h = out_norm(h) if emb_out is not None: scale, shift = th.chunk(emb_out, 2, dim=1) h *= (1 + scale) h += shift h = out_rest(h) else: if emb_out is not None: if self.exchange_temb_dims: emb_out = emb_out.movedim(1, 2) h = h + emb_out h = style_block(h, "emb_res") #h = self.out_layers(h) h = self.out_layers[0](h) h = style_block(h, "out_norm") h = self.out_layers[1](h) h = style_block(h, "out_silu") h = self.out_layers[3](h) # [2] is dropout h = style_block(h, "out_conv") res_out = self.skip_connection(x) + h res_out = style_block(res_out, "residual") return res_out #return self.skip_connection(x) + h class Timestep(nn.Module): def __init__(self, dim): super().__init__() self.dim = dim def forward(self, t): return timestep_embedding(t, self.dim) def apply_control(h, control, name): if control is not None and name in control and len(control[name]) > 0: ctrl = control[name].pop() if ctrl is not None: try: h += ctrl except: logging.warning("warning control could not be applied {} {}".format(h.shape, ctrl.shape)) return h class ReUNetModel(nn.Module): """ The full UNet model with attention and timestep embedding. :param in_channels: channels in the input Tensor. :param model_channels: base channel count for the model. :param out_channels: channels in the output Tensor. :param num_res_blocks: number of residual blocks per downsample. :param dropout: the dropout probability. :param channel_mult: channel multiplier for each level of the UNet. :param conv_resample: if True, use learned convolutions for upsampling and downsampling. :param dims: determines if the signal is 1D, 2D, or 3D. :param num_classes: if specified (as an int), then this model will be class-conditional with `num_classes` classes. :param use_checkpoint: use gradient checkpointing to reduce memory usage. :param num_heads: the number of attention heads in each attention layer. :param num_heads_channels: if specified, ignore num_heads and instead use a fixed channel width per attention head. :param num_heads_upsample: works with num_heads to set a different number of heads for upsampling. Deprecated. :param use_scale_shift_norm: use a FiLM-like conditioning mechanism. :param resblock_updown: use residual blocks for up/downsampling. :param use_new_attention_order: use a different attention pattern for potentially increased efficiency. """ def __init__( self, image_size, in_channels, model_channels, out_channels, num_res_blocks, dropout = 0, channel_mult = (1, 2, 4, 8), conv_resample = True, dims = 2, num_classes = None, use_checkpoint = False, dtype = th.float32, num_heads = -1, num_head_channels = -1, num_heads_upsample = -1, use_scale_shift_norm = False, resblock_updown = False, use_new_attention_order = False, use_spatial_transformer = False, # custom transformer support transformer_depth = 1, # custom transformer support context_dim = None, # custom transformer support n_embed = None, # custom support for prediction of discrete ids into codebook of first stage vq model legacy = True, disable_self_attentions = None, num_attention_blocks = None, disable_middle_self_attn = False, use_linear_in_transformer = False, adm_in_channels = None, transformer_depth_middle = None, transformer_depth_output = None, use_temporal_resblock = False, use_temporal_attention = False, time_context_dim = None, extra_ff_mix_layer = False, use_spatial_context = False, merge_strategy = None, merge_factor = 0.0, video_kernel_size = None, disable_temporal_crossattention = False, max_ddpm_temb_period = 10000, attn_precision = None, device = None, operations = ops, ): super().__init__() if context_dim is not None: assert use_spatial_transformer, 'Fool!! You forgot to use the spatial transformer for your cross-attention conditioning...' # from omegaconf.listconfig import ListConfig # if type(context_dim) == ListConfig: # context_dim = list(context_dim) if num_heads_upsample == -1: num_heads_upsample = num_heads if num_heads == -1: assert num_head_channels != -1, 'Either num_heads or num_head_channels has to be set' if num_head_channels == -1: assert num_heads != -1, 'Either num_heads or num_head_channels has to be set' self.in_channels = in_channels self.model_channels = model_channels self.out_channels = out_channels if isinstance(num_res_blocks, int): self.num_res_blocks = len(channel_mult) * [num_res_blocks] else: if len(num_res_blocks) != len(channel_mult): raise ValueError("provide num_res_blocks either as an int (globally constant) or " "as a list/tuple (per-level) with the same length as channel_mult") self.num_res_blocks = num_res_blocks if disable_self_attentions is not None: # should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not assert len(disable_self_attentions) == len(channel_mult) if num_attention_blocks is not None: assert len(num_attention_blocks) == len(self.num_res_blocks) transformer_depth = transformer_depth[:] transformer_depth_output = transformer_depth_output[:] self.dropout = dropout self.channel_mult = channel_mult self.conv_resample = conv_resample self.num_classes = num_classes self.use_checkpoint = use_checkpoint self.dtype = dtype self.num_heads = num_heads self.num_head_channels = num_head_channels self.num_heads_upsample = num_heads_upsample self.use_temporal_resblocks = use_temporal_resblock self.predict_codebook_ids = n_embed is not None self.default_num_video_frames = None time_embed_dim = model_channels * 4 self.time_embed = nn.Sequential( operations.Linear(model_channels, time_embed_dim, dtype=self.dtype, device=device), nn.SiLU(), operations.Linear(time_embed_dim, time_embed_dim, dtype=self.dtype, device=device), ) if self.num_classes is not None: if isinstance(self.num_classes, int): self.label_emb = nn.Embedding(num_classes, time_embed_dim, dtype=self.dtype, device=device) elif self.num_classes == "continuous": logging.debug("setting up linear c_adm embedding layer") self.label_emb = nn.Linear(1, time_embed_dim) elif self.num_classes == "sequential": assert adm_in_channels is not None self.label_emb = nn.Sequential( nn.Sequential( operations.Linear(adm_in_channels, time_embed_dim, dtype=self.dtype, device=device), nn.SiLU(), operations.Linear(time_embed_dim, time_embed_dim, dtype=self.dtype, device=device), ) ) else: raise ValueError() self.input_blocks = nn.ModuleList( [ TimestepEmbedSequential( operations.conv_nd(dims, in_channels, model_channels, 3, padding=1, dtype=self.dtype, device=device) ) ] ) self._feature_size = model_channels input_block_chans = [model_channels] ch = model_channels ds = 1 def get_attention_layer( ch, num_heads, dim_head, depth=1, context_dim=None, use_checkpoint=False, disable_self_attn=False, ): if use_temporal_attention: return SpatialVideoTransformer( ch, num_heads, dim_head, depth = depth, context_dim = context_dim, time_context_dim = time_context_dim, dropout = dropout, ff_in = extra_ff_mix_layer, use_spatial_context = use_spatial_context, merge_strategy = merge_strategy, merge_factor = merge_factor, checkpoint = use_checkpoint, use_linear = use_linear_in_transformer, disable_self_attn = disable_self_attn, disable_temporal_crossattention = disable_temporal_crossattention, max_time_embed_period = max_ddpm_temb_period, attn_precision = attn_precision, dtype=self.dtype, device=device, operations=operations, ) else: return SpatialTransformer( ch, num_heads, dim_head, depth=depth, context_dim=context_dim, disable_self_attn=disable_self_attn, use_linear=use_linear_in_transformer, use_checkpoint=use_checkpoint, attn_precision=attn_precision, dtype=self.dtype, device=device, operations=operations ) def get_resblock( merge_factor, merge_strategy, video_kernel_size, ch, time_embed_dim, dropout, out_channels, dims, use_checkpoint, use_scale_shift_norm, down = False, up = False, dtype = None, device = None, operations = ops ): if self.use_temporal_resblocks: return VideoResBlock( merge_factor = merge_factor, merge_strategy = merge_strategy, video_kernel_size = video_kernel_size, channels = ch, emb_channels = time_embed_dim, dropout = dropout, out_channels = out_channels, dims = dims, use_checkpoint = use_checkpoint, use_scale_shift_norm = use_scale_shift_norm, down = down, up = up, dtype=dtype, device=device, operations=operations, ) else: return ResBlock( channels = ch, emb_channels = time_embed_dim, dropout = dropout, out_channels = out_channels, use_checkpoint = use_checkpoint, dims = dims, use_scale_shift_norm = use_scale_shift_norm, down = down, up = up, dtype=dtype, device=device, operations=operations, ) for level, mult in enumerate(channel_mult): for nr in range(self.num_res_blocks[level]): layers = [ get_resblock( merge_factor = merge_factor, merge_strategy = merge_strategy, video_kernel_size = video_kernel_size, ch = ch, time_embed_dim = time_embed_dim, dropout = dropout, out_channels = mult * model_channels, dims = dims, use_checkpoint = use_checkpoint, use_scale_shift_norm = use_scale_shift_norm, dtype=self.dtype, device=device, operations=operations, ) ] ch = mult * model_channels num_transformers = transformer_depth.pop(0) if num_transformers > 0: if num_head_channels == -1: dim_head = ch // num_heads else: num_heads = ch // num_head_channels dim_head = num_head_channels if legacy: #num_heads = 1 dim_head = ch // num_heads if use_spatial_transformer else num_head_channels if exists(disable_self_attentions): disabled_sa = disable_self_attentions[level] else: disabled_sa = False if not exists(num_attention_blocks) or nr < num_attention_blocks[level]: layers.append(get_attention_layer( ch, num_heads, dim_head, depth=num_transformers, context_dim=context_dim, disable_self_attn=disabled_sa, use_checkpoint=use_checkpoint) ) self.input_blocks.append(TimestepEmbedSequential(*layers)) self._feature_size += ch input_block_chans.append(ch) if level != len(channel_mult) - 1: out_ch = ch self.input_blocks.append( TimestepEmbedSequential( get_resblock( merge_factor = merge_factor, merge_strategy = merge_strategy, video_kernel_size = video_kernel_size, ch = ch, time_embed_dim = time_embed_dim, dropout = dropout, out_channels = out_ch, dims = dims, use_checkpoint = use_checkpoint, use_scale_shift_norm = use_scale_shift_norm, down = True, dtype=self.dtype, device=device, operations=operations, ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch, dtype=self.dtype, device=device, operations=operations) ) ) ch = out_ch input_block_chans.append(ch) ds *= 2 self._feature_size += ch if num_head_channels == -1: dim_head = ch // num_heads else: num_heads = ch // num_head_channels dim_head = num_head_channels if legacy: #num_heads = 1 dim_head = ch // num_heads if use_spatial_transformer else num_head_channels mid_block = [ get_resblock( merge_factor = merge_factor, merge_strategy = merge_strategy, video_kernel_size = video_kernel_size, ch = ch, time_embed_dim = time_embed_dim, dropout = dropout, out_channels = None, dims = dims, use_checkpoint = use_checkpoint, use_scale_shift_norm = use_scale_shift_norm, dtype=self.dtype, device=device, operations=operations, )] self.middle_block = None if transformer_depth_middle >= -1: if transformer_depth_middle >= 0: mid_block += [get_attention_layer( # always uses a self-attn ch, num_heads, dim_head, depth=transformer_depth_middle, context_dim=context_dim, disable_self_attn=disable_middle_self_attn, use_checkpoint=use_checkpoint ), get_resblock( merge_factor = merge_factor, merge_strategy = merge_strategy, video_kernel_size = video_kernel_size, ch = ch, time_embed_dim = time_embed_dim, dropout = dropout, out_channels = None, dims = dims, use_checkpoint = use_checkpoint, use_scale_shift_norm = use_scale_shift_norm, dtype=self.dtype, device=device, operations=operations, )] self.middle_block = TimestepEmbedSequential(*mid_block) self._feature_size += ch self.output_blocks = nn.ModuleList([]) for level, mult in list(enumerate(channel_mult))[::-1]: for i in range(self.num_res_blocks[level] + 1): ich = input_block_chans.pop() layers = [ get_resblock( merge_factor = merge_factor, merge_strategy = merge_strategy, video_kernel_size = video_kernel_size, ch = ch + ich, time_embed_dim = time_embed_dim, dropout = dropout, out_channels = model_channels * mult, dims = dims, use_checkpoint = use_checkpoint, use_scale_shift_norm = use_scale_shift_norm, dtype=self.dtype, device=device, operations=operations, ) ] ch = model_channels * mult num_transformers = transformer_depth_output.pop() if num_transformers > 0: if num_head_channels == -1: dim_head = ch // num_heads else: num_heads = ch // num_head_channels dim_head = num_head_channels if legacy: #num_heads = 1 dim_head = ch // num_heads if use_spatial_transformer else num_head_channels if exists(disable_self_attentions): disabled_sa = disable_self_attentions[level] else: disabled_sa = False if not exists(num_attention_blocks) or i < num_attention_blocks[level]: layers.append( get_attention_layer( ch, num_heads, dim_head, depth=num_transformers, context_dim=context_dim, disable_self_attn=disabled_sa, use_checkpoint=use_checkpoint ) ) if level and i == self.num_res_blocks[level]: out_ch = ch layers.append( get_resblock( merge_factor = merge_factor, merge_strategy = merge_strategy, video_kernel_size = video_kernel_size, ch = ch, time_embed_dim = time_embed_dim, dropout = dropout, out_channels = out_ch, dims = dims, use_checkpoint = use_checkpoint, use_scale_shift_norm = use_scale_shift_norm, up = True, dtype=self.dtype, device=device, operations=operations, ) if resblock_updown else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch, dtype=self.dtype, device=device, operations=operations) ) ds //= 2 self.output_blocks.append(TimestepEmbedSequential(*layers)) self._feature_size += ch self.out = nn.Sequential( operations.GroupNorm(32, ch, dtype=self.dtype, device=device), nn.SiLU(), operations.conv_nd(dims, model_channels, out_channels, 3, padding=1, dtype=self.dtype, device=device), ) if self.predict_codebook_ids: self.id_predictor = nn.Sequential( operations.GroupNorm(32, ch, dtype=self.dtype, device=device), operations.conv_nd(dims, model_channels, n_embed, 1, dtype=self.dtype, device=device), #nn.LogSoftmax(dim=1) # change to cross_entropy and produce non-normalized logits ) def forward(self, x, timesteps=None, context=None, y=None, control=None, transformer_options={}, **kwargs): return comfy.patcher_extension.WrapperExecutor.new_class_executor( self._forward, self, comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options) ).execute(x, timesteps, context, y, control, transformer_options, **kwargs) def _forward(self, x, timesteps=None, context=None, y=None, control=None, transformer_options={}, **kwargs): """ Apply the model to an input batch. :param x: an [N x C x ...] Tensor of inputs. :param timesteps: a 1-D batch of timesteps. :param context: conditioning plugged in via crossattn :param y: an [N] Tensor of labels, if class-conditional. :return: an [N x C x ...] Tensor of outputs. """ h_len, w_len = x.shape[-2:] img_len = h_len * w_len transformer_options["original_shape"] = list(x.shape) transformer_options["transformer_index"] = 0 transformer_patches = transformer_options.get("patches", {}) SIGMA = transformer_options['sigmas'].to(x) # timestep[0].unsqueeze(0) #/ 1000 img_slice = slice(None, -1) #slice(None, img_len) # for the sake of cross attn... :-1 txt_slice = slice(None, -1) EO = transformer_options.get("ExtraOptions", ExtraOptions("")) if EO is not None: EO.mute = True if EO("zero_heads"): HEADS = 0 else: HEADS = 10 # self.input_blocks[4][1].transformer_blocks[0].attn2.heads # HEADS = 10 StyleMMDiT = transformer_options.get('StyleMMDiT', StyleUNet_Model()) StyleMMDiT.set_len(h_len, w_len, img_slice, txt_slice, HEADS=HEADS) StyleMMDiT.Retrojector = self.Retrojector if hasattr(self, "Retrojector") else None transformer_options['StyleMMDiT'] = None x_tmp = transformer_options.get("x_tmp") if x_tmp is not None: x_tmp = x_tmp.clone() / ((SIGMA ** 2 + 1) ** 0.5) x_tmp = x_tmp.expand_as(x) # (x.shape[0], -1, -1, -1) # .clone().to(x) y0_style, img_y0_style = None, None x_orig, timesteps_orig, y_orig, context_orig = clone_inputs(x, timesteps, y, context) h_orig = x_orig.clone() weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0) floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0) #floor = min(floor, weight) mask_zero, mask_up_zero, mask_down_zero, mask_down2_zero = None, None, None, None txt_len = context.shape[1] # mask_obj[0].text_len z_ = transformer_options.get("z_") # initial noise and/or image+noise from start of rk_sampler_beta() rk_row = transformer_options.get("row") # for "smart noise" if z_ is not None: x_init = z_[rk_row].to(x) elif 'x_init' in transformer_options: x_init = transformer_options.get('x_init').to(x) # recon loop to extract exact noise pred for scattersort guide assembly RECON_MODE = StyleMMDiT.noise_mode == "recon" recon_iterations = 2 if StyleMMDiT.noise_mode == "recon" else 1 for recon_iter in range(recon_iterations): y0_style = StyleMMDiT.guides y0_style_active = True if type(y0_style) == torch.Tensor else False RECON_MODE = True if StyleMMDiT.noise_mode == "recon" and recon_iter == 0 else False ISIGMA = SIGMA if StyleMMDiT.noise_mode == "recon" and recon_iter == 1: ISIGMA = SIGMA * EO("ISIGMA_FACTOR", 1.0) model_sampling = transformer_options.get('model_sampling') timesteps_orig = model_sampling.timestep(ISIGMA).expand_as(timesteps_orig) x_recon = x_tmp if x_tmp is not None else x_orig #noise_prediction = x_recon + (1-SIGMA.to(x_recon)) * eps.to(x_recon) noise_prediction = eps.to(x_recon) denoised = x_recon * ((SIGMA.to(x_recon) ** 2 + 1) ** 0.5) - SIGMA.to(x_recon) * eps.to(x_recon) denoised = StyleMMDiT.apply_recon_lure(denoised, y0_style.to(x_recon)) # .to(denoised) new_x = (denoised + ISIGMA.to(x_recon) * noise_prediction) / ((ISIGMA.to(x_recon) ** 2 + 1) ** 0.5) h_orig = new_x.clone().to(x) x_init = noise_prediction elif StyleMMDiT.noise_mode == "bonanza": x_init = torch.randn_like(x_init) if y0_style_active: if y0_style.sum() == 0.0 and y0_style.std() == 0.0: y0_style_noised = x.clone() else: y0_style_noised = (y0_style + ISIGMA.to(y0_style) * x_init.expand_as(x).to(y0_style)) / ((ISIGMA.to(y0_style) ** 2 + 1) ** 0.5) #x_init.expand(x.shape[0],-1,-1,-1).to(y0_style)) out_list = [] for cond_iter in range(len(transformer_options['cond_or_uncond'])): UNCOND = transformer_options['cond_or_uncond'][cond_iter] == 1 bsz_style = y0_style.shape[0] if y0_style_active else 0 bsz = 1 if RECON_MODE else bsz_style + 1 h, timesteps, context = clone_inputs(h_orig[cond_iter].unsqueeze(0), timesteps_orig[cond_iter].unsqueeze(0), context_orig[cond_iter].unsqueeze(0)) y = y_orig[cond_iter].unsqueeze(0).clone() if y_orig is not None else None mask, mask_up, mask_down, mask_down2 = None, None, None, None if not UNCOND and 'AttnMask' in transformer_options: # and weight != 0: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') mask_up = transformer_options['AttnMask'].mask_up.to('cuda') mask_down = transformer_options['AttnMask'].mask_down.to('cuda') if hasattr(transformer_options['AttnMask'], "mask_down2"): mask_down2 = transformer_options['AttnMask'].mask_down2.to('cuda') if weight == 0: context = transformer_options['RegContext'].context.to(context.dtype).to(context.device) mask, mask_up, mask_down, mask_down2 = None, None, None, None else: context = transformer_options['RegContext'].context.to(context.dtype).to(context.device) txt_len = context.shape[1] if mask_zero is None: mask_zero = torch.ones_like(mask) mask_zero[:, :txt_len] = mask[:, :txt_len] if mask_up_zero is None: mask_up_zero = torch.ones_like(mask_up) mask_up_zero[:, :txt_len] = mask_up[:, :txt_len] if mask_down_zero is None: mask_down_zero = torch.ones_like(mask_down) mask_down_zero[:, :txt_len] = mask_down[:, :txt_len] if mask_down2_zero is None and mask_down2 is not None: mask_down2_zero = torch.ones_like(mask_down2) mask_down2_zero[:, :txt_len] = mask_down2[:, :txt_len] if UNCOND and 'AttnMask_neg' in transformer_options: # and weight != 0: AttnMask = transformer_options['AttnMask_neg'] mask = transformer_options['AttnMask_neg'].attn_mask.mask.to('cuda') mask_up = transformer_options['AttnMask_neg'].mask_up.to('cuda') mask_down = transformer_options['AttnMask_neg'].mask_down.to('cuda') if hasattr(transformer_options['AttnMask_neg'], "mask_down2"): mask_down2 = transformer_options['AttnMask_neg'].mask_down2.to('cuda') if weight == 0: context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device) mask, mask_up, mask_down, mask_down2 = None, None, None, None else: context = transformer_options['RegContext_neg'].context.to(context.dtype).to(context.device) txt_len = context.shape[1] if mask_zero is None: mask_zero = torch.ones_like(mask) mask_zero[:, :txt_len] = mask[:, :txt_len] if mask_up_zero is None: mask_up_zero = torch.ones_like(mask_up) mask_up_zero[:, :txt_len] = mask_up[:, :txt_len] if mask_down_zero is None: mask_down_zero = torch.ones_like(mask_down) mask_down_zero[:, :txt_len] = mask_down[:, :txt_len] if mask_down2_zero is None and mask_down2 is not None: mask_down2_zero = torch.ones_like(mask_down2) mask_down2_zero[:, :txt_len] = mask_down2[:, :txt_len] elif UNCOND and 'AttnMask' in transformer_options: AttnMask = transformer_options['AttnMask'] mask = transformer_options['AttnMask'].attn_mask.mask.to('cuda') mask_up = transformer_options['AttnMask'].mask_up.to('cuda') mask_down = transformer_options['AttnMask'].mask_down.to('cuda') if hasattr(transformer_options['AttnMask'], "mask_down2"): mask_down2 = transformer_options['AttnMask'].mask_down2.to('cuda') A = context B = transformer_options['RegContext'].context context = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :] txt_len = context.shape[1] if mask_zero is None: mask_zero = torch.ones_like(mask) mask_zero[:, :txt_len] = mask[:, :txt_len] if mask_up_zero is None: mask_up_zero = torch.ones_like(mask_up) mask_up_zero[:, :txt_len] = mask_up[:, :txt_len] if mask_down_zero is None: mask_down_zero = torch.ones_like(mask_down) mask_down_zero[:, :txt_len] = mask_down[:, :txt_len] if mask_down2_zero is None and mask_down2 is not None: mask_down2_zero = torch.ones_like(mask_down2) mask_down2_zero[:, :txt_len] = mask_down2[:, :txt_len] if weight == 0: # ADDED 5/23/2025 mask, mask_up, mask_down, mask_down2 = None, None, None, None if mask is not None: if mask is not None and not type(mask[0][0] .item()) == bool: mask = mask .to(x.dtype) if mask_up is not None and not type(mask_up[0][0] .item()) == bool: mask_up = mask_up .to(x.dtype) if mask_down is not None and not type(mask_down[0][0] .item()) == bool: mask_down = mask_down .to(x.dtype) if mask_down2 is not None and not type(mask_down2[0][0] .item()) == bool: mask_down2 = mask_down2 .to(x.dtype) if mask_zero is not None and not type(mask_zero[0][0] .item()) == bool: mask_zero = mask_zero .to(x.dtype) if mask_up_zero is not None and not type(mask_up_zero[0][0] .item()) == bool: mask_up_zero = mask_up_zero .to(x.dtype) if mask_down_zero is not None and not type(mask_down_zero[0][0] .item()) == bool: mask_down_zero = mask_down_zero .to(x.dtype) if mask_down2_zero is not None and not type(mask_down2_zero[0][0].item()) == bool: mask_down2_zero = mask_down2_zero.to(x.dtype) transformer_options['cross_mask'] = mask [:,:txt_len] transformer_options['self_mask'] = mask [:,txt_len:] transformer_options['cross_mask_up'] = mask_up [:,:txt_len] transformer_options['self_mask_up'] = mask_up [:,txt_len:] transformer_options['cross_mask_down'] = mask_down [:,:txt_len] transformer_options['self_mask_down'] = mask_down [:,txt_len:] transformer_options['cross_mask_down2'] = mask_down2[:,:txt_len] if mask_down2 is not None else None transformer_options['self_mask_down2'] = mask_down2[:,txt_len:] if mask_down2 is not None else None #h = x if y0_style_active and not RECON_MODE: if mask is None: context, y, _ = StyleMMDiT.apply_style_conditioning( UNCOND = UNCOND, base_context = context, base_y = y, base_llama3 = None, ) else: context = context.repeat(bsz_style + 1, 1, 1) y = y.repeat(bsz_style + 1, 1) if y is not None else None h = torch.cat([h, y0_style_noised[cond_iter:cond_iter+1]], dim=0).to(h) total_layers = len(self.input_blocks) + len(self.middle_block) + len(self.output_blocks) num_video_frames = kwargs.get("num_video_frames", self.default_num_video_frames) image_only_indicator = kwargs.get("image_only_indicator", None) time_context = kwargs.get("time_context", None) assert (y is not None) == ( self.num_classes is not None ), "must specify y if and only if the model is class-conditional" hs, hs_adain = [], [] t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False).to(x.dtype) emb = self.time_embed(t_emb) if "emb_patch" in transformer_patches: patch = transformer_patches["emb_patch"] for p in patch: emb = p(emb, self.model_channels, transformer_options) if self.num_classes is not None: assert y.shape[0] == h.shape[0] emb = emb + self.label_emb(y) #for id, module in enumerate(self.input_blocks): for id, (module, style_block) in enumerate(zip(self.input_blocks, StyleMMDiT.input_blocks)): transformer_options["block"] = ("input", id) if mask is not None: transformer_options['cross_mask'] = mask [:,:txt_len] transformer_options['self_mask'] = mask [:,txt_len:] transformer_options['cross_mask_up'] = mask_up [:,:txt_len] transformer_options['self_mask_up'] = mask_up [:,txt_len:] transformer_options['cross_mask_down'] = mask_down [:,:txt_len] transformer_options['self_mask_down'] = mask_down [:,txt_len:] transformer_options['cross_mask_down2'] = mask_down2[:,:txt_len] if mask_down2 is not None else None transformer_options['self_mask_down2'] = mask_down2[:,txt_len:] if mask_down2 is not None else None if weight > 0 and mask is not None and weight < id/total_layers: transformer_options['cross_mask'] = None transformer_options['self_mask'] = None elif weight < 0 and mask is not None and abs(weight) < (1 - id/total_layers): transformer_options['cross_mask'] = None transformer_options['self_mask'] = None elif floor > 0 and mask is not None and floor > id/total_layers: transformer_options['cross_mask'] = mask_zero [:,:txt_len] transformer_options['self_mask'] = mask_zero [:,txt_len:] transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len] transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:] transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len] transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:] transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None elif floor < 0 and mask is not None and abs(floor) > (1 - id/total_layers): transformer_options['cross_mask'] = mask_zero [:,:txt_len] transformer_options['self_mask'] = mask_zero [:,txt_len:] transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len] transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:] transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len] transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:] transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None h = forward_timestep_embed(module, h, emb, context, transformer_options, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator, style_block=style_block) if id == 0: h = StyleMMDiT(h, "proj_in") h = apply_control(h, control, 'input') if "input_block_patch" in transformer_patches: patch = transformer_patches["input_block_patch"] for p in patch: h = p(h, transformer_options) hs.append(h) if "input_block_patch_after_skip" in transformer_patches: patch = transformer_patches["input_block_patch_after_skip"] for p in patch: h = p(h, transformer_options) transformer_options["block"] = ("middle", 0) if self.middle_block is not None: style_block = StyleMMDiT.middle_blocks[0] if mask is not None: transformer_options['cross_mask'] = mask [:,:txt_len] transformer_options['self_mask'] = mask [:,txt_len:] transformer_options['cross_mask_up'] = mask_up [:,:txt_len] transformer_options['self_mask_up'] = mask_up [:,txt_len:] transformer_options['cross_mask_down'] = mask_down [:,:txt_len] transformer_options['self_mask_down'] = mask_down [:,txt_len:] transformer_options['cross_mask_down2'] = mask_down2[:,:txt_len] if mask_down2 is not None else None transformer_options['self_mask_down2'] = mask_down2[:,txt_len:] if mask_down2 is not None else None if weight > 0 and mask is not None and weight < (len(self.input_blocks) + 1)/total_layers: transformer_options['cross_mask'] = None transformer_options['self_mask'] = None elif weight < 0 and mask is not None and abs(weight) < (1 - (len(self.input_blocks) + 1)/total_layers): transformer_options['cross_mask'] = None transformer_options['self_mask'] = None elif floor > 0 and mask is not None and floor > (len(self.input_blocks) + 1)/total_layers: transformer_options['cross_mask'] = mask_zero [:,:txt_len] transformer_options['self_mask'] = mask_zero [:,txt_len:] transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len] transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:] transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len] transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:] transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None elif floor < 0 and mask is not None and abs(floor) > (1 - (len(self.input_blocks) + 1)/total_layers): transformer_options['cross_mask'] = mask_zero [:,:txt_len] transformer_options['self_mask'] = mask_zero [:,txt_len:] transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len] transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:] transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len] transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:] transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None h = forward_timestep_embed(self.middle_block, h, emb, context, transformer_options, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator, style_block=style_block) h = apply_control(h, control, 'middle') #for id, module in enumerate(self.output_blocks): for id, (module, style_block) in enumerate(zip(self.output_blocks, StyleMMDiT.output_blocks)): transformer_options["block"] = ("output", id) hsp = hs.pop() hsp = apply_control(hsp, control, 'output') if "output_block_patch" in transformer_patches: patch = transformer_patches["output_block_patch"] for p in patch: h, hsp = p(h, hsp, transformer_options) h = th.cat([h, hsp], dim=1) del hsp if len(hs) > 0: output_shape = hs[-1].shape else: output_shape = None if mask is not None: transformer_options['cross_mask'] = mask [:,:txt_len] transformer_options['self_mask'] = mask [:,txt_len:] transformer_options['cross_mask_up'] = mask_up [:,:txt_len] transformer_options['self_mask_up'] = mask_up [:,txt_len:] transformer_options['cross_mask_down'] = mask_down [:,:txt_len] transformer_options['self_mask_down'] = mask_down [:,txt_len:] transformer_options['cross_mask_down2'] = mask_down2[:,:txt_len] if mask_down2 is not None else None transformer_options['self_mask_down2'] = mask_down2[:,txt_len:] if mask_down2 is not None else None if weight > 0 and mask is not None and weight < (len(self.input_blocks) + 1 + id)/total_layers: transformer_options['cross_mask'] = None transformer_options['self_mask'] = None elif weight < 0 and mask is not None and abs(weight) < (1 - (len(self.input_blocks) + 1 + id)/total_layers): transformer_options['cross_mask'] = None transformer_options['self_mask'] = None elif floor > 0 and mask is not None and floor > (len(self.input_blocks) + 1 + id)/total_layers: transformer_options['cross_mask'] = mask_zero [:,:txt_len] transformer_options['self_mask'] = mask_zero [:,txt_len:] transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len] transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:] transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len] transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:] transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None elif floor < 0 and mask is not None and abs(floor) > (1 - (len(self.input_blocks) + 1 + id)/total_layers): transformer_options['cross_mask'] = mask_zero [:,:txt_len] transformer_options['self_mask'] = mask_zero [:,txt_len:] transformer_options['cross_mask_up'] = mask_up_zero [:,:txt_len] transformer_options['self_mask_up'] = mask_up_zero [:,txt_len:] transformer_options['cross_mask_down'] = mask_down_zero [:,:txt_len] transformer_options['self_mask_down'] = mask_down_zero [:,txt_len:] transformer_options['cross_mask_down2'] = mask_down2_zero[:,:txt_len] if mask_down2_zero is not None else None transformer_options['self_mask_down2'] = mask_down2_zero[:,txt_len:] if mask_down2_zero is not None else None h = forward_timestep_embed(module, h, emb, context, transformer_options, output_shape, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator, style_block=style_block) h = h.type(x.dtype) if self.predict_codebook_ids: eps = self.id_predictor(h) else: eps = self.out(h) eps = StyleMMDiT(eps, "proj_out") out_list.append(eps[0:1]) eps = torch.stack(out_list, dim=0).squeeze(dim=1) if recon_iter == 1: denoised = new_x * ((ISIGMA ** 2 + 1) ** 0.5) - ISIGMA.to(new_x) * eps.to(new_x) if x_tmp is not None: eps = (x_tmp * ((SIGMA ** 2 + 1) ** 0.5) - denoised.to(x_tmp)) / SIGMA.to(x_tmp) else: eps = (x_orig * ((SIGMA ** 2 + 1) ** 0.5) - denoised.to(x_orig)) / SIGMA.to(x_orig) y0_style_pos = transformer_options.get("y0_style_pos") y0_style_neg = transformer_options.get("y0_style_neg") y0_style_pos_weight = transformer_options.get("y0_style_pos_weight", 0.0) y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight", 0.0) y0_style_pos_synweight *= y0_style_pos_weight y0_style_neg_weight = transformer_options.get("y0_style_neg_weight", 0.0) y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight", 0.0) y0_style_neg_synweight *= y0_style_neg_weight freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method") freqsep_sigma = transformer_options.get("freqsep_sigma") freqsep_kernel_size = transformer_options.get("freqsep_kernel_size") freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size") freqsep_stride = transformer_options.get("freqsep_stride") freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight") freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight") freqsep_mask = transformer_options.get("freqsep_mask") dtype = eps.dtype if self.style_dtype is None else self.style_dtype h_len //= self.Retrojector.patch_size w_len //= self.Retrojector.patch_size if y0_style_pos is not None: y0_style_pos_weight = transformer_options.get("y0_style_pos_weight") y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight") y0_style_pos_synweight *= y0_style_pos_weight y0_style_pos_mask = transformer_options.get("y0_style_pos_mask") y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge") y0_style_pos = y0_style_pos.to(dtype) #x = x.to(dtype) x = x_orig.clone().to(torch.float64) * ((SIGMA ** 2 + 1) ** 0.5) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) # 2,4,96,168 -> 2,16128,320 y0_adain_embed = self.Retrojector.embed(y0_style_pos) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if EO("scattersort_median_LP"): denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7)) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7)) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad) denoised_spatial = denoised_spatial_LP + denoised_spatial_HP denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": if freqsep_mask is not None: freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float() freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact') if hasattr(self, "adain_tile"): tile_h, tile_w = self.adain_tile denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if self.adain_flag: h_off = tile_h // 2 w_off = tile_w // 2 denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] self.adain_flag = False else: h_off = 0 w_off = 0 self.adain_flag = True tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w)) y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w)) tiles_out = [] for i in range(tiles.shape[0]): tile = tiles[i].unsqueeze(0) y0_tile = y0_tiles[i].unsqueeze(0) tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) tile = adain_seq_inplace(tile, y0_tile) tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w)) tiles_out_tensor = torch.cat(tiles_out, dim=0) tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides) if h_off == 0: denoised_pretile = tiles_out_tensor else: denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"): denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median_pw": denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight) elif freqsep_lowpass_method == "gaussian_pw": denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median": denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size) elif freqsep_lowpass_method == "gaussian": denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP if EO("adain_fs_uhp"): y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP #denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) else: denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) if transformer_options.get('y0_standard_guide') is not None: y0_standard_guide = transformer_options.get('y0_standard_guide') y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide) f_cs = self.StyleWCT.get(y0_standard_guide_embed) self.y0_standard_guide = self.Retrojector.unembed(f_cs) if transformer_options.get('y0_inv_standard_guide') is not None: y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide') y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide) f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed) self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs) denoised_approx = self.Retrojector.unembed(denoised_embed) eps = (x - denoised_approx) / sigma if not UNCOND: if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) else: eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) elif eps.shape[0] == 1 and UNCOND: eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) eps = eps.float() if y0_style_neg is not None: y0_style_neg_weight = transformer_options.get("y0_style_neg_weight") y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight") y0_style_neg_synweight *= y0_style_neg_weight y0_style_neg_mask = transformer_options.get("y0_style_neg_mask") y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge") y0_style_neg = y0_style_neg.to(dtype) #x = x.to(dtype) x = x_orig.clone().to(torch.float64) * ((SIGMA ** 2 + 1) ** 0.5) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) y0_adain_embed = self.Retrojector.embed(y0_style_neg) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) denoised_approx = self.Retrojector.unembed(denoised_embed) if UNCOND: eps = (x - denoised_approx) / sigma eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) elif eps.shape[0] == 1 and not UNCOND: eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0]) eps = eps.float() return eps def clone_inputs_unsafe(*args, index: int=None): if index is None: return tuple(x.clone() for x in args) else: return tuple(x[index].unsqueeze(0).clone() for x in args) def clone_inputs(*args, index: int = None): if index is None: return tuple(x.clone() if x is not None else None for x in args) else: return tuple(x[index].unsqueeze(0).clone() if x is not None else None for x in args) ================================================ FILE: sd35/mmdit.py ================================================ from functools import partial from typing import Dict, Optional, List import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import copy from comfy.ldm.modules.attention import optimized_attention from comfy.ldm.modules.attention import attention_pytorch #as optimized_attention from einops import rearrange, repeat from comfy.ldm.modules.diffusionmodules.util import timestep_embedding import comfy.ops import comfy.ldm.common_dit from ..helper import ExtraOptions from ..latents import tile_latent, untile_latent, gaussian_blur_2d, median_blur_2d from ..style_transfer import apply_scattersort_masked, apply_scattersort_tiled, adain_seq_inplace, adain_patchwise_row_batch_med, adain_patchwise_row_batch #from .attention import optimized_attention #from .util import timestep_embedding #import ops #import common_dit def default(x, y): if x is not None: return x return y class Mlp(nn.Module): """ MLP as used in Vision Transformer, MLP-Mixer and related networks """ def __init__( self, in_features, hidden_features = None, out_features = None, act_layer = nn.GELU, norm_layer = None, bias = True, drop = 0., use_conv = False, dtype = None, device = None, operations = None, ): super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features drop_probs = drop linear_layer = partial(operations.Conv2d, kernel_size=1) if use_conv else operations.Linear self.fc1 = linear_layer(in_features, hidden_features, bias =bias, dtype=dtype, device=device) self.act = act_layer() self.drop1 = nn.Dropout(drop_probs) self.norm = norm_layer(hidden_features) if norm_layer is not None else nn.Identity() self.fc2 = linear_layer(hidden_features, out_features, bias=bias, dtype=dtype, device=device) self.drop2 = nn.Dropout(drop_probs) def forward(self, x): x = self.fc1 (x) x = self.act (x) x = self.drop1(x) x = self.norm (x) x = self.fc2 (x) x = self.drop2(x) return x class PatchEmbed(nn.Module): """ 2D Image to Patch Embedding """ dynamic_img_pad: torch.jit.Final[bool] def __init__( self, img_size : Optional[int] = 224, patch_size : int = 16, in_chans : int = 3, embed_dim : int = 768, norm_layer = None, flatten : bool = True, bias : bool = True, strict_img_size : bool = True, dynamic_img_pad : bool = True, padding_mode ='circular', conv3d = False, dtype = None, device = None, operations = None, ): super().__init__() try: len(patch_size) self.patch_size = patch_size except: if conv3d: self.patch_size = (patch_size, patch_size, patch_size) else: self.patch_size = (patch_size, patch_size) self.padding_mode = padding_mode # flatten spatial dim and transpose to channels last, kept for bwd compat self.flatten = flatten self.strict_img_size = strict_img_size self.dynamic_img_pad = dynamic_img_pad if conv3d: self.proj = operations.Conv3d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias=bias, dtype=dtype, device=device) else: self.proj = operations.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias=bias, dtype=dtype, device=device) self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() def forward(self, x): if self.dynamic_img_pad: x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size, padding_mode=self.padding_mode) x = self.proj(x) if self.flatten: x = x.flatten(2).transpose(1, 2) # NCHW -> NLC x = self.norm(x) return x def modulate(x, shift, scale): if shift is None: shift = torch.zeros_like(scale) return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1) ################################################################################# # Sine/Cosine Positional Embedding Functions # ################################################################################# def get_2d_sincos_pos_embed( embed_dim, grid_size, cls_token = False, extra_tokens = 0, scaling_factor = None, offset = None, ): """ grid_size: int of the grid height and width return: pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token) """ grid_h = np.arange(grid_size, dtype=np.float32) grid_w = np.arange(grid_size, dtype=np.float32) grid = np.meshgrid(grid_w, grid_h) # here w goes first grid = np.stack(grid, axis=0) if scaling_factor is not None: grid = grid / scaling_factor if offset is not None: grid = grid - offset grid = grid.reshape([2, 1, grid_size, grid_size]) pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid) if cls_token and extra_tokens > 0: pos_embed = np.concatenate( [np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0 ) return pos_embed def get_2d_sincos_pos_embed_from_grid(embed_dim, grid): assert embed_dim % 2 == 0 # use half of dimensions to encode grid_h emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2) emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2) emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D) return emb def get_1d_sincos_pos_embed_from_grid(embed_dim, pos): """ embed_dim: output dimension for each position pos: a list of positions to be encoded: size (M,) out: (M, D) """ assert embed_dim % 2 == 0 omega = np.arange(embed_dim // 2, dtype=np.float64) omega /= embed_dim / 2.0 omega = 1.0 / 10000**omega # (D/2,) pos = pos.reshape(-1) # (M,) out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product emb_sin = np.sin(out) # (M, D/2) emb_cos = np.cos(out) # (M, D/2) emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D) return emb def get_1d_sincos_pos_embed_from_grid_torch(embed_dim, pos, device=None, dtype=torch.float32): omega = torch.arange(embed_dim // 2, device=device, dtype=dtype) omega /= embed_dim / 2.0 omega = 1.0 / 10000**omega # (D/2,) pos = pos.reshape(-1) # (M,) out = torch.einsum("m,d->md", pos, omega) # (M, D/2), outer product emb_sin = torch.sin(out) # (M, D/2) emb_cos = torch.cos(out) # (M, D/2) emb = torch.cat([emb_sin, emb_cos], dim=1) # (M, D) return emb def get_2d_sincos_pos_embed_torch(embed_dim, w, h, val_center=7.5, val_magnitude=7.5, device=None, dtype=torch.float32): small = min(h, w) val_h = (h / small) * val_magnitude val_w = (w / small) * val_magnitude grid_h, grid_w = torch.meshgrid(torch.linspace(-val_h + val_center, val_h + val_center, h, device=device, dtype=dtype), torch.linspace(-val_w + val_center, val_w + val_center, w, device=device, dtype=dtype), indexing='ij') emb_h = get_1d_sincos_pos_embed_from_grid_torch(embed_dim // 2, grid_h, device=device, dtype=dtype) emb_w = get_1d_sincos_pos_embed_from_grid_torch(embed_dim // 2, grid_w, device=device, dtype=dtype) emb = torch.cat([emb_w, emb_h], dim=1) # (H*W, D) return emb ################################################################################# # Embedding Layers for Timesteps and Class Labels # ################################################################################# class TimestepEmbedder(nn.Module): """ Embeds scalar timesteps into vector representations. """ def __init__(self, hidden_size, frequency_embedding_size=256, dtype=None, device=None, operations=None): super().__init__() self.mlp = nn.Sequential( operations.Linear(frequency_embedding_size, hidden_size, bias=True, dtype=dtype, device=device), nn.SiLU(), operations.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device=device), ) self.frequency_embedding_size = frequency_embedding_size def forward(self, t, dtype, **kwargs): t_freq = timestep_embedding(t, self.frequency_embedding_size).to(dtype) t_emb = self.mlp(t_freq) return t_emb class VectorEmbedder(nn.Module): """ Embeds a flat vector of dimension input_dim """ def __init__(self, input_dim: int, hidden_size: int, dtype=None, device=None, operations=None): super().__init__() self.mlp = nn.Sequential( operations.Linear(input_dim, hidden_size, bias=True, dtype=dtype, device=device), nn.SiLU(), operations.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device=device), ) def forward(self, x: torch.Tensor) -> torch.Tensor: emb = self.mlp(x) return emb ################################################################################# # Core DiT Model # ################################################################################# def split_qkv(qkv, head_dim): qkv = qkv.reshape(qkv.shape[0], qkv.shape[1], 3, -1, head_dim).movedim(2, 0) return qkv[0], qkv[1], qkv[2] class SelfAttention(nn.Module): ATTENTION_MODES = ("xformers", "torch", "torch-hb", "math", "debug") def __init__( self, dim : int, num_heads : int = 8, qkv_bias : bool = False, qk_scale : Optional[float] = None, proj_drop : float = 0.0, attn_mode : str = "xformers", pre_only : bool = False, qk_norm : Optional[str] = None, rmsnorm : bool = False, dtype = None, device = None, operations = None, ): super().__init__() self.num_heads = num_heads self.head_dim = dim // num_heads self.qkv = operations.Linear(dim, dim * 3, bias=qkv_bias, dtype=dtype, device=device) if not pre_only: self.proj = operations.Linear(dim, dim, dtype=dtype, device=device) self.proj_drop = nn.Dropout(proj_drop) assert attn_mode in self.ATTENTION_MODES self.attn_mode = attn_mode self.pre_only = pre_only if qk_norm == "rms": self.ln_q = RMSNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) self.ln_k = RMSNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) elif qk_norm == "ln": self.ln_q = operations.LayerNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) self.ln_k = operations.LayerNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) elif qk_norm is None: self.ln_q = nn.Identity() self.ln_k = nn.Identity() else: raise ValueError(qk_norm) def pre_attention(self, x: torch.Tensor) -> torch.Tensor: B, L, C = x.shape qkv = self.qkv(x) q, k, v = split_qkv(qkv, self.head_dim) q = self.ln_q(q).reshape(q.shape[0], q.shape[1], -1) k = self.ln_k(k).reshape(q.shape[0], q.shape[1], -1) return (q, k, v) def post_attention(self, x: torch.Tensor) -> torch.Tensor: assert not self.pre_only x = self.proj (x) x = self.proj_drop(x) return x def forward(self, x: torch.Tensor) -> torch.Tensor: q, k, v = self.pre_attention(x) x = optimized_attention( q, k, v, heads=self.num_heads ) x = self.post_attention(x) return x class RMSNorm(torch.nn.Module): def __init__( self, dim: int, elementwise_affine: bool = False, eps: float = 1e-6, device=None, dtype=None ): """ Initialize the RMSNorm normalization layer. Args: dim (int): The dimension of the input tensor. eps (float, optional): A small value added to the denominator for numerical stability. Default is 1e-6. Attributes: eps (float): A small value added to the denominator for numerical stability. weight (nn.Parameter): Learnable scaling parameter. """ super().__init__() self.eps = eps self.learnable_scale = elementwise_affine if self.learnable_scale: self.weight = nn.Parameter(torch.empty(dim, device=device, dtype=dtype)) else: self.register_parameter("weight", None) def forward(self, x): return comfy.ldm.common_dit.rms_norm(x, self.weight, self.eps) class SwiGLUFeedForward(nn.Module): def __init__( self, dim : int, hidden_dim : int, multiple_of : int, ffn_dim_multiplier : Optional[float] = None, ): """ Initialize the FeedForward module. Args: dim (int): Input dimension. hidden_dim (int): Hidden dimension of the feedforward layer. multiple_of (int): Value to ensure hidden dimension is a multiple of this value. ffn_dim_multiplier (float, optional): Custom multiplier for hidden dimension. Defaults to None. Attributes: w1 (ColumnParallelLinear): Linear transformation for the first layer. w2 (RowParallelLinear): Linear transformation for the second layer. w3 (ColumnParallelLinear): Linear transformation for the third layer. """ super().__init__() hidden_dim = int(2 * hidden_dim / 3) # custom dim factor multiplier if ffn_dim_multiplier is not None: hidden_dim = int(ffn_dim_multiplier * hidden_dim) hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of) self.w1 = nn.Linear(dim, hidden_dim, bias=False) self.w2 = nn.Linear(hidden_dim, dim, bias=False) self.w3 = nn.Linear(dim, hidden_dim, bias=False) def forward(self, x): return self.w2(nn.functional.silu(self.w1(x)) * self.w3(x)) class DismantledBlock(nn.Module): """ A DiT block with gated adaptive layer norm (adaLN) conditioning. """ ATTENTION_MODES = ("xformers", "torch", "torch-hb", "math", "debug") def __init__( self, hidden_size : int, num_heads : int, mlp_ratio : float = 4.0, attn_mode : str = "xformers", qkv_bias : bool = False, pre_only : bool = False, rmsnorm : bool = False, scale_mod_only : bool = False, swiglu : bool = False, qk_norm : Optional[str] = None, x_block_self_attn : bool = False, dtype = None, device = None, operations = None, **block_kwargs, ): super().__init__() assert attn_mode in self.ATTENTION_MODES if not rmsnorm: self.norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) else: self.norm1 = RMSNorm(hidden_size, elementwise_affine=False, eps=1e-6) self.attn = SelfAttention( dim = hidden_size, num_heads = num_heads, qkv_bias = qkv_bias, attn_mode = attn_mode, pre_only = pre_only, qk_norm = qk_norm, rmsnorm = rmsnorm, dtype = dtype, device = device, operations = operations ) if x_block_self_attn: assert not pre_only assert not scale_mod_only self.x_block_self_attn = True self.attn2 = SelfAttention( dim = hidden_size, num_heads = num_heads, qkv_bias = qkv_bias, attn_mode = attn_mode, pre_only = False, qk_norm = qk_norm, rmsnorm = rmsnorm, dtype = dtype, device = device, operations = operations ) else: self.x_block_self_attn = False if not pre_only: if not rmsnorm: self.norm2 = operations.LayerNorm( hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device ) else: self.norm2 = RMSNorm(hidden_size, elementwise_affine=False, eps=1e-6) mlp_hidden_dim = int(hidden_size * mlp_ratio) if not pre_only: if not swiglu: self.mlp = Mlp( in_features = hidden_size, hidden_features = mlp_hidden_dim, act_layer = lambda: nn.GELU(approximate = "tanh"), drop = 0, dtype = dtype, device = device, operations = operations ) else: self.mlp = SwiGLUFeedForward( dim = hidden_size, hidden_dim = mlp_hidden_dim, multiple_of = 256, ) self.scale_mod_only = scale_mod_only if x_block_self_attn: assert not pre_only assert not scale_mod_only n_mods = 9 elif not scale_mod_only: n_mods = 6 if not pre_only else 2 else: n_mods = 4 if not pre_only else 1 self.adaLN_modulation = nn.Sequential( nn.SiLU(), operations.Linear(hidden_size, n_mods * hidden_size, bias=True, dtype=dtype, device=device) ) self.pre_only = pre_only def pre_attention(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor: if not self.pre_only: if not self.scale_mod_only: ( shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp, ) = self.adaLN_modulation(c).chunk(6, dim=1) else: shift_msa = None shift_mlp = None ( scale_msa, gate_msa, scale_mlp, gate_mlp, ) = self.adaLN_modulation( c ).chunk(4, dim=1) qkv = self.attn.pre_attention(modulate(self.norm1(x), shift_msa, scale_msa)) return qkv, ( x, gate_msa, shift_mlp, scale_mlp, gate_mlp, ) else: if not self.scale_mod_only: ( shift_msa, scale_msa, ) = self.adaLN_modulation( c ).chunk(2, dim=1) else: shift_msa = None scale_msa = self.adaLN_modulation(c) qkv = self.attn.pre_attention(modulate(self.norm1(x), shift_msa, scale_msa)) return qkv, None def post_attention(self, attn, x, gate_msa, shift_mlp, scale_mlp, gate_mlp): assert not self.pre_only x = x + gate_msa.unsqueeze(1) * self.attn.post_attention(attn) x = x + gate_mlp.unsqueeze(1) * self.mlp( modulate(self.norm2(x), shift_mlp, scale_mlp) ) return x def pre_attention_x(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor: assert self.x_block_self_attn ( shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp, shift_msa2, scale_msa2, gate_msa2, ) = self.adaLN_modulation(c).chunk(9, dim=1) x_norm = self.norm1(x) qkv = self.attn .pre_attention(modulate(x_norm, shift_msa, scale_msa )) qkv2 = self.attn2.pre_attention(modulate(x_norm, shift_msa2, scale_msa2)) return qkv, qkv2, ( x, gate_msa, shift_mlp, scale_mlp, gate_mlp, gate_msa2, ) def post_attention_x(self, attn, attn2, x, gate_msa, shift_mlp, scale_mlp, gate_mlp, gate_msa2): assert not self.pre_only attn1 = self.attn .post_attention(attn) attn2 = self.attn2.post_attention(attn2) out1 = gate_msa .unsqueeze(1) * attn1 out2 = gate_msa2.unsqueeze(1) * attn2 x = x + out1 x = x + out2 x = x + gate_mlp.unsqueeze(1) * self.mlp( modulate(self.norm2(x), shift_mlp, scale_mlp) ) return x def forward(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor: assert not self.pre_only if self.x_block_self_attn: qkv, qkv2, intermediates = self.pre_attention_x(x, c) attn, _ = optimized_attention( qkv[0], qkv[1], qkv[2], num_heads=self.attn.num_heads, ) attn2, _ = optimized_attention( qkv2[0], qkv2[1], qkv2[2], num_heads=self.attn2.num_heads, ) return self.post_attention_x(attn, attn2, *intermediates) else: qkv, intermediates = self.pre_attention (x, c) attn = optimized_attention( qkv[0], qkv[1], qkv[2], heads=self.attn.num_heads, ) return self.post_attention(attn, *intermediates) def block_mixing(*args, use_checkpoint=True, **kwargs): if use_checkpoint: return torch.utils.checkpoint.checkpoint( _block_mixing, *args, use_reentrant=False, **kwargs ) else: return _block_mixing(*args, **kwargs) # context_qkv = Tuple[Tensor,Tensor,Tensor] 2,154,1536 2,154,1536 2,154,24,64 x_qkv 2,4096,1536, ..., 2,4096,24,64 def _block_mixing(context, x, context_block, x_block, c, mask=None): context_qkv, context_intermediates = context_block.pre_attention(context, c) if x_block.x_block_self_attn: # x_qkv2 = self-attn? x_qkv, x_qkv2, x_intermediates = x_block.pre_attention_x(x, c) else: x_qkv, x_intermediates = x_block.pre_attention (x, c) o = [] for t in range(3): o.append(torch.cat((context_qkv[t], x_qkv[t]), dim=1)) qkv = tuple(o) if mask is not None: attn = attention_pytorch( #1,4186,1536 qkv[0], qkv[1], qkv[2], heads = x_block.attn.num_heads, mask = mask #> 0 if mask is not None else None, ) else: attn = optimized_attention( #1,4186,1536 qkv[0], qkv[1], qkv[2], heads = x_block.attn.num_heads, mask = None #> 0 if mask is not None else None, ) context_attn, x_attn = ( attn[:, : context_qkv[0].shape[1] ], attn[:, context_qkv[0].shape[1] : ], ) if not context_block.pre_only: context = context_block.post_attention(context_attn, *context_intermediates) else: context = None if x_block.x_block_self_attn: attn2 = optimized_attention( # x_qkv2 2,4096,1536 x_qkv2[0], x_qkv2[1], x_qkv2[2], heads = x_block.attn2.num_heads, ) x = x_block.post_attention_x(x_attn, attn2, *x_intermediates) else: x = x_block.post_attention (x_attn, *x_intermediates) return context, x class ReJointBlock(nn.Module): """just a small wrapper to serve as a fsdp unit""" def __init__( self, *args, **kwargs, ): super().__init__() pre_only = kwargs.pop("pre_only") qk_norm = kwargs.pop("qk_norm", None ) x_block_self_attn = kwargs.pop("x_block_self_attn", False) self.context_block = DismantledBlock(*args, pre_only=pre_only, qk_norm=qk_norm, **kwargs) self.x_block = DismantledBlock(*args, pre_only=False, qk_norm=qk_norm, x_block_self_attn=x_block_self_attn, **kwargs) def forward(self, *args, **kwargs): # context_block, x_block are DismantledBlock return block_mixing( # args = Tuple[Tensor,Tensor] 2,154,1536 2,4096,1536 *args, context_block=self.context_block, x_block=self.x_block, **kwargs ) class FinalLayer(nn.Module): """ The final layer of DiT. """ def __init__( self, hidden_size : int, patch_size : int, out_channels : int, total_out_channels : Optional[int] = None, dtype = None, device = None, operations = None, ): super().__init__() self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.linear = ( operations.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device) if (total_out_channels is None) else operations.Linear(hidden_size, total_out_channels, bias=True, dtype=dtype, device=device) ) self.adaLN_modulation = nn.Sequential( nn.SiLU(), operations.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device) ) def forward(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor: shift, scale = self.adaLN_modulation(c).chunk(2, dim=1) x = modulate(self.norm_final(x), shift, scale) x = self.linear(x) return x class SelfAttentionContext(nn.Module): def __init__(self, dim, heads=8, dim_head=64, dtype=None, device=None, operations=None): super().__init__() dim_head = dim // heads inner_dim = dim self.heads = heads self.dim_head = dim_head self.qkv = operations.Linear(dim, dim * 3, bias=True, dtype=dtype, device=device) self.proj = operations.Linear(inner_dim, dim, dtype=dtype, device=device) def forward(self, x): qkv = self.qkv(x) q, k, v = split_qkv(qkv, self.dim_head) x = optimized_attention(q.reshape(q.shape[0], q.shape[1], -1), k, v, heads=self.heads) return self.proj(x) class ContextProcessorBlock(nn.Module): def __init__(self, context_size, dtype=None, device=None, operations=None): super().__init__() self.norm1 = operations.LayerNorm(context_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.attn = SelfAttentionContext(context_size, dtype=dtype, device=device, operations=operations) self.norm2 = operations.LayerNorm(context_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) self.mlp = Mlp(in_features=context_size, hidden_features=(context_size * 4), act_layer=lambda: nn.GELU(approximate="tanh"), drop=0, dtype=dtype, device=device, operations=operations) def forward(self, x): x += self.attn(self.norm1(x)) x += self.mlp (self.norm2(x)) return x class ContextProcessor(nn.Module): def __init__(self, context_size, num_layers, dtype=None, device=None, operations=None): super().__init__() self.layers = torch.nn.ModuleList([ContextProcessorBlock(context_size, dtype=dtype, device=device, operations=operations) for i in range(num_layers)]) self.norm = operations.LayerNorm(context_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) def forward(self, x): for i, l in enumerate(self.layers): x = l(x) return self.norm(x) class MMDiT(nn.Module): """ Diffusion model with a Transformer backbone. """ def __init__( self, input_size : int = 32, patch_size : int = 2, in_channels : int = 4, depth : int = 28, # hidden_size : Optional[int] = None, # num_heads : Optional[int] = None, mlp_ratio : float = 4.0, learn_sigma : bool = False, adm_in_channels : Optional[int] = None, context_embedder_config : Optional[Dict] = None, compile_core : bool = False, use_checkpoint : bool = False, register_length : int = 0, attn_mode : str = "torch", rmsnorm : bool = False, scale_mod_only : bool = False, swiglu : bool = False, out_channels : Optional[int] = None, pos_embed_scaling_factor : Optional[float] = None, pos_embed_offset : Optional[float] = None, pos_embed_max_size : Optional[int] = None, num_patches = None, qk_norm : Optional[str] = None, qkv_bias : bool = True, context_processor_layers = None, x_block_self_attn : bool = False, x_block_self_attn_layers : Optional[List[int]] = [], context_size = 4096, num_blocks = None, final_layer = True, skip_blocks = False, dtype = None, #TODO device = None, operations = None, ): super().__init__() self.dtype = dtype self.learn_sigma = learn_sigma self.in_channels = in_channels default_out_channels = in_channels * 2 if learn_sigma else in_channels self.out_channels = default(out_channels, default_out_channels) self.patch_size = patch_size self.pos_embed_scaling_factor = pos_embed_scaling_factor self.pos_embed_offset = pos_embed_offset self.pos_embed_max_size = pos_embed_max_size self.x_block_self_attn_layers = x_block_self_attn_layers # hidden_size = default(hidden_size, 64 * depth) # num_heads = default(num_heads, hidden_size // 64) # apply magic --> this defines a head_size of 64 self.hidden_size = 64 * depth num_heads = depth if num_blocks is None: num_blocks = depth self.depth = depth self.num_heads = num_heads self.x_embedder = PatchEmbed( input_size, patch_size, in_channels, self.hidden_size, bias = True, strict_img_size = self.pos_embed_max_size is None, dtype = dtype, device = device, operations = operations ) self.t_embedder = TimestepEmbedder(self.hidden_size, dtype=dtype, device=device, operations=operations) self.y_embedder = None if adm_in_channels is not None: assert isinstance(adm_in_channels, int) self.y_embedder = VectorEmbedder(adm_in_channels, self.hidden_size, dtype=dtype, device=device, operations=operations) if context_processor_layers is not None: self.context_processor = ContextProcessor(context_size, context_processor_layers, dtype=dtype, device=device, operations=operations) else: self.context_processor = None self.context_embedder = nn.Identity() if context_embedder_config is not None: if context_embedder_config["target"] == "torch.nn.Linear": self.context_embedder = operations.Linear(**context_embedder_config["params"], dtype=dtype, device=device) self.register_length = register_length if self.register_length > 0: self.register = nn.Parameter(torch.randn(1, register_length, self.hidden_size, dtype=dtype, device=device)) # num_patches = self.x_embedder.num_patches # Will use fixed sin-cos embedding: # just use a buffer already if num_patches is not None: self.register_buffer( "pos_embed", torch.empty(1, num_patches, self.hidden_size, dtype=dtype, device=device), ) else: self.pos_embed = None self.use_checkpoint = use_checkpoint if not skip_blocks: self.joint_blocks = nn.ModuleList( [ ReJointBlock( self.hidden_size, num_heads, mlp_ratio = mlp_ratio, qkv_bias = qkv_bias, attn_mode = attn_mode, pre_only = (i == num_blocks - 1) and final_layer, rmsnorm = rmsnorm, scale_mod_only = scale_mod_only, swiglu = swiglu, qk_norm = qk_norm, x_block_self_attn = (i in self.x_block_self_attn_layers) or x_block_self_attn, dtype = dtype, device = device, operations = operations, ) for i in range(num_blocks) ] ) if final_layer: self.final_layer = FinalLayer(self.hidden_size, patch_size, self.out_channels, dtype=dtype, device=device, operations=operations) if compile_core: assert False self.forward_core_with_concat = torch.compile(self.forward_core_with_concat) def cropped_pos_embed(self, hw, device=None): p = self.x_embedder.patch_size[0] h, w = hw # patched size h = (h + 1) // p w = (w + 1) // p if self.pos_embed is None: return get_2d_sincos_pos_embed_torch(self.hidden_size, w, h, device=device) assert self.pos_embed_max_size is not None assert h <= self.pos_embed_max_size, (h, self.pos_embed_max_size) assert w <= self.pos_embed_max_size, (w, self.pos_embed_max_size) top = (self.pos_embed_max_size - h) // 2 left = (self.pos_embed_max_size - w) // 2 spatial_pos_embed = rearrange( self.pos_embed, "1 (h w) c -> 1 h w c", h = self.pos_embed_max_size, w = self.pos_embed_max_size, ) spatial_pos_embed = spatial_pos_embed[:, top : top + h, left : left + w, :] spatial_pos_embed = rearrange(spatial_pos_embed, "1 h w c -> 1 (h w) c") # print(spatial_pos_embed, top, left, h, w) # # t = get_2d_sincos_pos_embed_torch(self.hidden_size, w, h, 7.875, 7.875, device=device) #matches exactly for 1024 res # t = get_2d_sincos_pos_embed_torch(self.hidden_size, w, h, 7.5, 7.5, device=device) #scales better # # print(t) # return t return spatial_pos_embed def unpatchify(self, x, hw=None): """ x: (N, T, patch_size**2 * C) imgs: (N, H, W, C) """ c = self.out_channels p = self.x_embedder.patch_size[0] if hw is None: h = w = int(x.shape[1] ** 0.5) else: h, w = hw h = (h + 1) // p w = (w + 1) // p assert h * w == x.shape[1] x = x.reshape(shape=(x.shape[0], h, w, p, p, c)) x = torch.einsum("nhwpqc->nchpwq", x) imgs = x.reshape(shape=(x.shape[0], c, h * p, w * p)) return imgs def forward_core_with_concat( self, x : torch.Tensor, c_mod : torch.Tensor, c_mod_base : torch.Tensor, context : Optional[torch.Tensor] = None, context_base : Optional[torch.Tensor] = None, control = None, transformer_options = {}, ) -> torch.Tensor: patches_replace = transformer_options.get("patches_replace", {}) if self.register_length > 0: context = torch.cat( ( repeat(self.register, "1 ... -> b ...", b=x.shape[0]), default(context, torch.Tensor([]).type_as(x)), ), 1, ) weight = transformer_options['reg_cond_weight'] if 'reg_cond_weight' in transformer_options else 0.0 floor = transformer_options['reg_cond_floor'] if 'reg_cond_floor' in transformer_options else 0.0 floor = min(floor, weight) if type(weight) == float or type(weight) == int: pass else: weight = weight.item() AttnMask = transformer_options.get('AttnMask') mask = None if AttnMask is not None and weight > 0: mask = AttnMask.get(weight=weight) #mask_obj[0](transformer_options, weight.item()) mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False if not mask_type_bool: mask = mask.to(x.dtype) text_len = context.shape[1] # mask_obj[0].text_len mask[text_len:,text_len:] = torch.clamp(mask[text_len:,text_len:], min=floor.to(mask.device)) #ORIGINAL SELF-ATTN REGION BLEED #reg_cond_mask = reg_cond_mask_expanded.unsqueeze(0).clone() if reg_cond_mask_expanded is not None else None mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False if weight <= 0.0: mask = None context = context_base c_mod = c_mod_base # context is B, L', D # x is B, L, D blocks_replace = patches_replace.get("dit", {}) blocks = len(self.joint_blocks) for i in range(blocks): if mask_type_bool and weight < (i / (blocks-1)) and mask is not None: mask = mask.to(x.dtype) # torch.ones((*mask.shape,), dtype=mask.dtype, device=mask.device) #(mask == mask) #set all to false if ("double_block", i) in blocks_replace: def block_wrap(args): out = {} out["txt"], out["img"] = self.joint_blocks[i](args["txt"], args["img"], c=args["vec"]) return out out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": c_mod}, {"original_block": block_wrap}) context = out["txt"] x = out["img"] else: context, x = self.joint_blocks[i]( context, x, c = c_mod, use_checkpoint = self.use_checkpoint, mask = mask, ) if control is not None: control_o = control.get("output") if i < len(control_o): add = control_o[i] if add is not None: x += add x = self.final_layer(x, c_mod) # (N, T, patch_size ** 2 * out_channels) return x def forward( self, x : torch.Tensor, t : torch.Tensor, y : Optional[torch.Tensor] = None, context: Optional[torch.Tensor] = None, control = None, transformer_options = {}, ) -> torch.Tensor: """ Forward pass of DiT. x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images) t: (N,) tensor of diffusion timesteps y: (N,) tensor of class labels """ SIGMA = t[0].clone() / 1000 EO = transformer_options.get("ExtraOptions", ExtraOptions("")) if EO is not None: EO.mute = True y0_style_pos = transformer_options.get("y0_style_pos") y0_style_neg = transformer_options.get("y0_style_neg") y0_style_pos_weight = transformer_options.get("y0_style_pos_weight", 0.0) y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight", 0.0) y0_style_pos_synweight *= y0_style_pos_weight y0_style_neg_weight = transformer_options.get("y0_style_neg_weight", 0.0) y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight", 0.0) y0_style_neg_synweight *= y0_style_neg_weight weight = -1 * transformer_options.get("regional_conditioning_weight", 0.0) floor = -1 * transformer_options.get("regional_conditioning_floor", 0.0) freqsep_lowpass_method = transformer_options.get("freqsep_lowpass_method") freqsep_sigma = transformer_options.get("freqsep_sigma") freqsep_kernel_size = transformer_options.get("freqsep_kernel_size") freqsep_inner_kernel_size = transformer_options.get("freqsep_inner_kernel_size") freqsep_stride = transformer_options.get("freqsep_stride") freqsep_lowpass_weight = transformer_options.get("freqsep_lowpass_weight") freqsep_highpass_weight= transformer_options.get("freqsep_highpass_weight") freqsep_mask = transformer_options.get("freqsep_mask") x_orig = x.clone() y_orig = y.clone() h,w = x.shape[-2:] h_len = ((h + (self.patch_size // 2)) // self.patch_size) # h_len 96 w_len = ((w + (self.patch_size // 2)) // self.patch_size) # w_len 96 out_list = [] for i in range(len(transformer_options['cond_or_uncond'])): UNCOND = transformer_options['cond_or_uncond'][i] == 1 x = x_orig.clone() y = y_orig.clone() context_base = context[i][None,...].clone() if UNCOND: #transformer_options['reg_cond_weight'] = -1 #context_tmp = context[i][None,...].clone() transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0) #transformer_options['regional_conditioning_weight'] transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0) #transformer_options['regional_conditioning_floor'] #if "regional_conditioning_floor" in transformer_options else 0.0 transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig') AttnMask = transformer_options.get('AttnMask', None) RegContext = transformer_options.get('RegContext', None) if AttnMask is not None and transformer_options['reg_cond_weight'] > 0.0: AttnMask.attn_mask_recast(x.dtype) context_tmp = RegContext.get().to(context.dtype) #context_tmp = 0 * context_tmp.clone() A = context[i][None,...].clone() B = context_tmp context_tmp = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :] else: context_tmp = context[i][None,...].clone() elif UNCOND == False: transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0) #transformer_options['regional_conditioning_weight'] transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0) #transformer_options['regional_conditioning_floor'] #if "regional_conditioning_floor" in transformer_options else 0.0 transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig') AttnMask = transformer_options.get('AttnMask', None) RegContext = transformer_options.get('RegContext', None) if AttnMask is not None and transformer_options['reg_cond_weight'] > 0.0: AttnMask.attn_mask_recast(x.dtype) context_tmp = RegContext.get().to(context.dtype) else: context_tmp = context[i][None,...].clone() if context_tmp is None: context_tmp = context[i][None,...].clone() #context = context_tmp if self.context_processor is not None: context_tmp = self.context_processor(context_tmp) hw = x.shape[-2:] x = self.x_embedder(x) + comfy.ops.cast_to_input(self.cropped_pos_embed(hw, device=x.device), x) c = self.t_embedder(t, dtype=x.dtype) # (N, D) # c is like vec... if y is not None and self.y_embedder is not None: y = self.y_embedder(y_orig.clone()) # (N, D) c = c + y # (N, D) # vec = vec + y (y = pooled_output 1,2048) if context_tmp is not None: context_tmp = self.context_embedder(context_tmp) if self.context_processor is not None: context_base = self.context_processor(context_base) #hw = x.shape[-2:] #x = self.x_embedder(x) + comfy.ops.cast_to_input(self.cropped_pos_embed(hw, device=x.device), x) c_base = self.t_embedder(t, dtype=x.dtype) # (N, D) # c is like vec... if y is not None and self.y_embedder is not None: y = self.y_embedder(y_orig.clone()) # (N, D) c_base = c_base + y # (N, D) # vec = vec + y (y = pooled_output 1,2048) if context_base is not None: context_base = self.context_embedder(context_base) x = self.forward_core_with_concat( x[i][None,...], c[i][None,...], c_base[i][None,...], context_tmp, context_base, #context[i][None,...].clone(), control, transformer_options, ) x = self.unpatchify(x, hw=hw) # (N, out_channels, H, W) out_list.append(x) x = torch.stack(out_list, dim=0).squeeze(dim=1) eps = x[:,:,:hw[-2],:hw[-1]] dtype = eps.dtype if self.style_dtype is None else self.style_dtype if y0_style_pos is not None: y0_style_pos_weight = transformer_options.get("y0_style_pos_weight") y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight") y0_style_pos_synweight *= y0_style_pos_weight y0_style_pos_mask = transformer_options.get("y0_style_pos_mask") y0_style_pos_mask_edge = transformer_options.get("y0_style_pos_mask_edge") y0_style_pos = y0_style_pos.to(dtype) x = x_orig.clone().to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) y0_adain_embed = self.Retrojector.embed(y0_style_pos) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if EO("scattersort_median_LP"): denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=EO("scattersort_median_LP",7)) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=EO("scattersort_median_LP",7)) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_LP = apply_scattersort_tiled(denoised_spatial_LP, y0_adain_spatial_LP, tile_h, tile_w, pad) denoised_spatial = denoised_spatial_LP + denoised_spatial_HP denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_pos_mask, y0_style_pos_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": if freqsep_mask is not None: freqsep_mask = freqsep_mask.view(1, 1, *freqsep_mask.shape[-2:]).float() freqsep_mask = F.interpolate(freqsep_mask.float(), size=(h_len, w_len), mode='nearest-exact') if hasattr(self, "adain_tile"): tile_h, tile_w = self.adain_tile denoised_pretile = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_pretile = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if self.adain_flag: h_off = tile_h // 2 w_off = tile_w // 2 denoised_pretile = denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] self.adain_flag = False else: h_off = 0 w_off = 0 self.adain_flag = True tiles, orig_shape, grid, strides = tile_latent(denoised_pretile, tile_size=(tile_h,tile_w)) y0_tiles, orig_shape, grid, strides = tile_latent(y0_adain_pretile, tile_size=(tile_h,tile_w)) tiles_out = [] for i in range(tiles.shape[0]): tile = tiles[i].unsqueeze(0) y0_tile = y0_tiles[i].unsqueeze(0) tile = rearrange(tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) y0_tile = rearrange(y0_tile, "b c h w -> b (h w) c", h=tile_h, w=tile_w) tile = adain_seq_inplace(tile, y0_tile) tiles_out.append(rearrange(tile, "b (h w) c -> b c h w", h=tile_h, w=tile_w)) tiles_out_tensor = torch.cat(tiles_out, dim=0) tiles_out_tensor = untile_latent(tiles_out_tensor, orig_shape, grid, strides) if h_off == 0: denoised_pretile = tiles_out_tensor else: denoised_pretile[:,:,h_off:-h_off, w_off:-w_off] = tiles_out_tensor denoised_embed = rearrange(denoised_pretile, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None and freqsep_lowpass_method.endswith("pw"): #EO("adain_pw"): denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median_pw": denoised_spatial_new = adain_patchwise_row_batch_med(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size, use_median_blur=True, lowpass_weight=freqsep_lowpass_weight, highpass_weight=freqsep_highpass_weight) elif freqsep_lowpass_method == "gaussian_pw": denoised_spatial_new = adain_patchwise_row_batch(denoised_spatial.clone(), y0_adain_spatial.clone().repeat(denoised_spatial.shape[0],1,1,1), sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) elif freqsep_lowpass_method is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) if freqsep_lowpass_method == "median": denoised_spatial_LP = median_blur_2d(denoised_spatial, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = median_blur_2d(y0_adain_spatial, kernel_size=freqsep_kernel_size) elif freqsep_lowpass_method == "gaussian": denoised_spatial_LP = gaussian_blur_2d(denoised_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) y0_adain_spatial_LP = gaussian_blur_2d(y0_adain_spatial, sigma=freqsep_sigma, kernel_size=freqsep_kernel_size) denoised_spatial_HP = denoised_spatial - denoised_spatial_LP if EO("adain_fs_uhp"): y0_adain_spatial_HP = y0_adain_spatial - y0_adain_spatial_LP denoised_spatial_ULP = gaussian_blur_2d(denoised_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) y0_adain_spatial_ULP = gaussian_blur_2d(y0_adain_spatial, sigma=EO("adain_fs_uhp_sigma", 1.0), kernel_size=EO("adain_fs_uhp_kernel_size", 3)) denoised_spatial_UHP = denoised_spatial_HP - denoised_spatial_ULP y0_adain_spatial_UHP = y0_adain_spatial_HP - y0_adain_spatial_ULP #denoised_spatial_HP = y0_adain_spatial_ULP + denoised_spatial_UHP denoised_spatial_HP = denoised_spatial_ULP + y0_adain_spatial_UHP denoised_spatial_new = freqsep_lowpass_weight * y0_adain_spatial_LP + freqsep_highpass_weight * denoised_spatial_HP denoised_embed = rearrange(denoised_spatial_new, "b c h w -> b (h w) c", h=h_len, w=w_len) else: denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) if transformer_options.get('y0_standard_guide') is not None: y0_standard_guide = transformer_options.get('y0_standard_guide') y0_standard_guide_embed = self.Retrojector.embed(y0_standard_guide) f_cs = self.StyleWCT.get(y0_standard_guide_embed) self.y0_standard_guide = self.Retrojector.unembed(f_cs) if transformer_options.get('y0_inv_standard_guide') is not None: y0_inv_standard_guide = transformer_options.get('y0_inv_standard_guide') y0_inv_standard_guide_embed = self.Retrojector.embed(y0_inv_standard_guide) f_cs = self.StyleWCT.get(y0_inv_standard_guide_embed) self.y0_inv_standard_guide = self.Retrojector.unembed(f_cs) denoised_approx = self.Retrojector.unembed(denoised_embed) eps = (x - denoised_approx) / sigma if not UNCOND: if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) else: eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) elif eps.shape[0] == 1 and UNCOND: eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) eps = eps.float() if y0_style_neg is not None: y0_style_neg_weight = transformer_options.get("y0_style_neg_weight") y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight") y0_style_neg_synweight *= y0_style_neg_weight y0_style_neg_mask = transformer_options.get("y0_style_neg_mask") y0_style_neg_mask_edge = transformer_options.get("y0_style_neg_mask_edge") y0_style_neg = y0_style_neg.to(dtype) x = x_orig.clone().to(dtype) eps = eps.to(dtype) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps denoised_embed = self.Retrojector.embed(denoised) y0_adain_embed = self.Retrojector.embed(y0_style_neg) if transformer_options['y0_style_method'] == "scattersort": tile_h, tile_w = transformer_options.get('y0_style_tile_height'), transformer_options.get('y0_style_tile_width') pad = transformer_options.get('y0_style_tile_padding') if pad is not None and tile_h is not None and tile_w is not None: denoised_spatial = rearrange(denoised_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) y0_adain_spatial = rearrange(y0_adain_embed, "b (h w) c -> b c h w", h=h_len, w=w_len) denoised_spatial = apply_scattersort_tiled(denoised_spatial, y0_adain_spatial, tile_h, tile_w, pad) denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") else: denoised_embed = apply_scattersort_masked(denoised_embed, y0_adain_embed, y0_style_neg_mask, y0_style_neg_mask_edge, h_len, w_len) elif transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = self.Retrojector.embed(self.Retrojector.unembed(denoised_embed)) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": self.StyleWCT.set(y0_adain_embed) denoised_embed = self.StyleWCT.get(denoised_embed) denoised_approx = self.Retrojector.unembed(denoised_embed) if UNCOND: eps = (x - denoised_approx) / sigma eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) elif eps.shape[0] == 1 and not UNCOND: eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0]) eps = eps.float() return eps dtype = eps.dtype if self.style_dtype is None else self.style_dtype pinv_dtype = torch.float32 if dtype != torch.float64 else dtype W_inv = None #if eps.shape[0] == 2 or (eps.shape[0] == 1 and not UNCOND): if y0_style_pos is not None: y0_style_pos_weight = transformer_options.get("y0_style_pos_weight") y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight") y0_style_pos_synweight *= y0_style_pos_weight y0_style_pos = y0_style_pos.to(torch.float64) x = x_orig.to(torch.float64) eps = eps.to(torch.float64) eps_orig = eps.clone() sigma = SIGMA# t_orig[0].to(torch.float64) / 1000 denoised = x - sigma * eps hw = denoised.shape[-2:] features = 1536# denoised_embed.shape[-1] # should be 1536 W_conv = self.x_embedder.proj.weight.to(torch.float64) # [1536, 16, 2, 2] W_flat = W_conv.view(features, -1).to(torch.float64) # [1536, 64] W_pinv = torch.linalg.pinv(W_flat) # [64, 1536] x_embedder64 = copy.deepcopy(self.x_embedder.proj).to(denoised) #y = self.x_embedder.proj(denoised.to(torch.float16)).float() y = x_embedder64(denoised) B, C_out, H_out, W_out = y.shape y_flat = y.view(B, C_out, -1) # [B, 1536, N] y_flat = y_flat.permute(0, 2, 1) # [B, N, 1536] bias = self.x_embedder.proj.bias.to(torch.float64) # [1536] denoised_embed = y_flat - bias.view(1, 1, -1) #y = self.x_embedder.proj(y0_style_pos.to(torch.float16)).float() y = x_embedder64(y0_style_pos) B, C_out, H_out, W_out = y.shape y_flat = y.view(B, C_out, -1) # [B, 1536, N] y_flat = y_flat.permute(0, 2, 1) # [B, N , 1536] bias = self.x_embedder.proj.bias.to(torch.float64) # [1536] y0_adain_embed = y_flat - bias.view(1, 1, -1) #denoised_embed = adain_seq(denoised_embed, y0_adain_embed) if transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) """for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype) # not going to work! needs denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)""" elif transformer_options['y0_style_method'] == "WCT": if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: self.y0_adain_embed = y0_adain_embed f_s = y0_adain_embed[0].clone() self.mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - self.mu_s cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T self.y0_color = whiten.to(f_s_centered) for wct_i in range(eps.shape[0]): f_c = denoised_embed[wct_i].clone() mu_c = f_c.mean(dim=0, keepdim=True) f_c_centered = f_c - mu_c cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) inv_sqrt_eig = S_eig.clamp(min=0).rsqrt() whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T whiten = whiten.to(f_c_centered) f_c_whitened = f_c_centered @ whiten.T f_cs = f_c_whitened @ self.y0_color.T + self.mu_s denoised_embed[wct_i] = f_cs x_patches = denoised_embed @ W_pinv.T # [B,N,64] x_patches = x_patches.permute(0, 2, 1) # [B,64,N] x_reconstructed = torch.nn.functional.fold( x_patches, # [B, 64, N] output_size=(H_out * 2, W_out * 2), # restore original input shape kernel_size=2, stride=2 ) denoised_approx = x_reconstructed #.view(B, 16, H_out * 2, W_out * 2) eps = (x - denoised_approx) / sigma #if eps.shape[0] == 2: # eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) # eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) #else: # eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) if not UNCOND: if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) else: eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) elif eps.shape[0] == 1 and UNCOND: eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) eps = eps.float() #if eps.shape[0] == 2 or (eps.shape[0] == 1 and UNCOND): if y0_style_neg is not None: y0_style_neg_weight = transformer_options.get("y0_style_neg_weight") y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight") y0_style_neg_synweight *= y0_style_neg_weight y0_style_neg = y0_style_neg.to(torch.float64) x = x_orig.to(torch.float64) eps = eps.to(torch.float64) eps_orig = eps.clone() sigma = SIGMA# t_orig[0].to(torch.float64) / 1000 denoised = x - sigma * eps hw = denoised.shape[-2:] features = 1536# denoised_embed.shape[-1] # should be 1536 W_conv = self.x_embedder.proj.weight.float() # [1536, 16, 2, 2] W_flat = W_conv.view(features, -1).float() # [1536, 64] W_pinv = torch.linalg.pinv(W_flat) # [64, 1536] y = self.x_embedder.proj(denoised.to(torch.float16)).float() B, C_out, H_out, W_out = y.shape y_flat = y.view(B, C_out, -1) # [B, 1536, N] y_flat = y_flat.permute(0, 2, 1) # [B, N, 1536] bias = self.x_embedder.proj.bias.float() # [1536] denoised_embed = y_flat - bias.view(1, 1, -1) y = self.x_embedder.proj(y0_style_neg.to(torch.float16)).float() B, C_out, H_out, W_out = y.shape y_flat = y.view(B, C_out, -1) # [B, 1536, N] y_flat = y_flat.permute(0, 2, 1) # [B, N , 1536] bias = self.x_embedder.proj.bias.float() # [1536] y0_adain_embed = y_flat - bias.view(1, 1, -1) #denoised_embed = adain_seq(denoised_embed, y0_adain_embed) if transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) """for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype) denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed)""" elif transformer_options['y0_style_method'] == "WCT": if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: self.y0_adain_embed = y0_adain_embed f_s = y0_adain_embed[0].clone() self.mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - self.mu_s cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T self.y0_color = whiten.to(f_s_centered) for wct_i in range(eps.shape[0]): f_c = denoised_embed[wct_i].clone() mu_c = f_c.mean(dim=0, keepdim=True) f_c_centered = f_c - mu_c cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) inv_sqrt_eig = S_eig.clamp(min=0).rsqrt() whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T whiten = whiten.to(f_c_centered) f_c_whitened = f_c_centered @ whiten.T f_cs = f_c_whitened @ self.y0_color.T + self.mu_s denoised_embed[wct_i] = f_cs x_patches = denoised_embed @ W_pinv.T # [B,N,64] x_patches = x_patches.permute(0, 2, 1) # [B,64,N] x_reconstructed = torch.nn.functional.fold( x_patches, # [B, 64, N] output_size=(H_out * 2, W_out * 2), # restore original input shape kernel_size=2, stride=2 ) denoised_approx = x_reconstructed #.view(B, 16, H_out * 2, W_out * 2) #eps = (x - denoised_approx) / sigma #eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) #if eps.shape[0] == 2: # eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) if UNCOND: eps = (x - denoised_approx) / sigma eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) elif eps.shape[0] == 1 and not UNCOND: eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0]) eps = eps.float() return eps class ReOpenAISignatureMMDITWrapper(MMDiT): def forward( self, x : torch.Tensor, timesteps : torch.Tensor, context : Optional[torch.Tensor] = None, y : Optional[torch.Tensor] = None, control = None, transformer_options = {}, **kwargs, ) -> torch.Tensor: return super().forward(x, timesteps, context=context, y=y, control=control, transformer_options=transformer_options) def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: mean_c = content.mean(1, keepdim=True) std_c = content.std (1, keepdim=True).add_(eps) # in-place add mean_s = style.mean (1, keepdim=True) std_s = style.std (1, keepdim=True).add_(eps) content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain return content def adain_seq(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: return ((content - content.mean(1, keepdim=True)) / (content.std(1, keepdim=True) + eps)) * (style.std(1, keepdim=True) + eps) + style.mean(1, keepdim=True) ================================================ FILE: sigmas.py ================================================ import torch import numpy as np from math import * import builtins from scipy.interpolate import CubicSpline from scipy import special, stats import torch.nn.functional as F import torch.nn as nn import torch.optim as optim import math from comfy.k_diffusion.sampling import get_sigmas_polyexponential, get_sigmas_karras import comfy.samplers from torch import Tensor, nn from typing import Optional, Callable, Tuple, Dict, Any, Union, TYPE_CHECKING, TypeVar from .res4lyf import RESplain from .helper import get_res4lyf_scheduler_list def rescale_linear(input, input_min, input_max, output_min, output_max): output = ((input - input_min) / (input_max - input_min)) * (output_max - output_min) + output_min; return output class set_precision_sigmas: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", ), "precision": (["16", "32", "64"], ), "set_default": ("BOOLEAN", {"default": False}) }, } RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("passthrough",) CATEGORY = "RES4LYF/precision" FUNCTION = "main" def main(self, precision="32", sigmas=None, set_default=False): match precision: case "16": if set_default is True: torch.set_default_dtype(torch.float16) sigmas = sigmas.to(torch.float16) case "32": if set_default is True: torch.set_default_dtype(torch.float32) sigmas = sigmas.to(torch.float32) case "64": if set_default is True: torch.set_default_dtype(torch.float64) sigmas = sigmas.to(torch.float64) return (sigmas, ) class SimpleInterpolator(nn.Module): def __init__(self): super(SimpleInterpolator, self).__init__() self.net = nn.Sequential( nn.Linear(1, 16), nn.ReLU(), nn.Linear(16, 32), nn.ReLU(), nn.Linear(32, 1) ) def forward(self, x): return self.net(x) def train_interpolator(model, sigma_schedule, steps, epochs=5000, lr=0.01): with torch.inference_mode(False): model = SimpleInterpolator() sigma_schedule = sigma_schedule.clone() criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=lr) x_train = torch.linspace(0, 1, steps=steps).unsqueeze(1) y_train = sigma_schedule.unsqueeze(1) # disable inference mode for training model.train() for epoch in range(epochs): optimizer.zero_grad() # fwd pass outputs = model(x_train) loss = criterion(outputs, y_train) loss.backward() optimizer.step() return model def interpolate_sigma_schedule_model(sigma_schedule, target_steps): model = SimpleInterpolator() sigma_schedule = sigma_schedule.float().detach() # train on original sigma schedule trained_model = train_interpolator(model, sigma_schedule, len(sigma_schedule)) # generate target steps for interpolation x_interpolated = torch.linspace(0, 1, target_steps).unsqueeze(1) # inference w/o gradients trained_model.eval() with torch.no_grad(): interpolated_sigma = trained_model(x_interpolated).squeeze() return interpolated_sigma class sigmas_interpolate: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_in": ("SIGMAS", {"forceInput": True}), "output_length": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}), "mode": (["linear", "nearest", "polynomial", "exponential", "power", "model"],), "order": ("INT", {"default": 8, "min": 1,"max": 64,"step": 1}), "rescale_after": ("BOOLEAN", {"default": True, "tooltip": "Rescale the output to the original min/max range after interpolation."}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) CATEGORY = "RES4LYF/sigmas" DESCRIPTION = "Interpolate the sigmas schedule to a new length clamping the start and end values." def interpolate_sigma_schedule_poly(self, sigma_schedule, target_steps): order = self.order sigma_schedule_np = sigma_schedule.cpu().numpy() # orig steps (assuming even spacing) original_steps = np.linspace(0, 1, len(sigma_schedule_np)) # fit polynomial of the given order coefficients = np.polyfit(original_steps, sigma_schedule_np, deg=order) # generate new steps where we want to interpolate the data target_steps_np = np.linspace(0, 1, target_steps) # eval polynomial at new steps interpolated_sigma_np = np.polyval(coefficients, target_steps_np) interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype) return interpolated_sigma def interpolate_sigma_schedule_constrained(self, sigma_schedule, target_steps): sigma_schedule_np = sigma_schedule.cpu().numpy() # orig steps original_steps = np.linspace(0, 1, len(sigma_schedule_np)) # target steps for interpolation target_steps_np = np.linspace(0, 1, target_steps) # fit cubic spline with fixed start and end values cs = CubicSpline(original_steps, sigma_schedule_np, bc_type=((1, 0.0), (1, 0.0))) # eval spline at the target steps interpolated_sigma_np = cs(target_steps_np) interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype) return interpolated_sigma def interpolate_sigma_schedule_exp(self, sigma_schedule, target_steps): # transform to log space log_sigma_schedule = torch.log(sigma_schedule) # define the original and target step ranges original_steps = torch.linspace(0, 1, steps=len(sigma_schedule)) target_steps = torch.linspace(0, 1, steps=target_steps) # interpolate in log space interpolated_log_sigma = F.interpolate( log_sigma_schedule.unsqueeze(0).unsqueeze(0), # Add fake batch and channel dimensions size=target_steps.shape[0], mode='linear', align_corners=True ).squeeze() # transform back to exponential space interpolated_sigma_schedule = torch.exp(interpolated_log_sigma) return interpolated_sigma_schedule def interpolate_sigma_schedule_power(self, sigma_schedule, target_steps): sigma_schedule_np = sigma_schedule.cpu().numpy() original_steps = np.linspace(1, len(sigma_schedule_np), len(sigma_schedule_np)) # power regression using a log-log transformation log_x = np.log(original_steps) log_y = np.log(sigma_schedule_np) # linear regression on log-log data coefficients = np.polyfit(log_x, log_y, deg=1) # degree 1 for linear fit in log-log space a = np.exp(coefficients[1]) # a = "b" = intercept (exp because of the log transform) b = coefficients[0] # b = "m" = slope target_steps_np = np.linspace(1, len(sigma_schedule_np), target_steps) # power law prediction: y = a * x^b interpolated_sigma_np = a * (target_steps_np ** b) interpolated_sigma = torch.tensor(interpolated_sigma_np, device=sigma_schedule.device, dtype=sigma_schedule.dtype) return interpolated_sigma def interpolate_sigma_schedule_linear(self, sigma_schedule, target_steps): return F.interpolate(sigma_schedule.unsqueeze(0).unsqueeze(0), target_steps, mode='linear').squeeze(0).squeeze(0) def interpolate_sigma_schedule_nearest(self, sigma_schedule, target_steps): return F.interpolate(sigma_schedule.unsqueeze(0).unsqueeze(0), target_steps, mode='nearest').squeeze(0).squeeze(0) def interpolate_nearest_neighbor(self, sigma_schedule, target_steps): original_steps = torch.linspace(0, 1, steps=len(sigma_schedule)) target_steps = torch.linspace(0, 1, steps=target_steps) # interpolate original -> target steps using nearest neighbor indices = torch.searchsorted(original_steps, target_steps) indices = torch.clamp(indices, 0, len(sigma_schedule) - 1) # clamp indices to valid range # set nearest neighbor via indices interpolated_sigma = sigma_schedule[indices] return interpolated_sigma def main(self, sigmas_in, output_length, mode, order, rescale_after=True): self.order = order sigmas_in = sigmas_in.clone().to(sigmas_in.dtype) start = sigmas_in[0] end = sigmas_in[-1] if mode == "linear": interpolate = self.interpolate_sigma_schedule_linear if mode == "nearest": interpolate = self.interpolate_nearest_neighbor elif mode == "polynomial": interpolate = self.interpolate_sigma_schedule_poly elif mode == "exponential": interpolate = self.interpolate_sigma_schedule_exp elif mode == "power": interpolate = self.interpolate_sigma_schedule_power elif mode == "model": with torch.inference_mode(False): interpolate = interpolate_sigma_schedule_model sigmas_interp = interpolate(sigmas_in, output_length) if rescale_after: sigmas_interp = ((sigmas_interp - sigmas_interp.min()) * (start - end)) / (sigmas_interp.max() - sigmas_interp.min()) + end return (sigmas_interp,) class sigmas_noise_inversion: # flip sigmas for unsampling, and pad both fwd/rev directions with null bytes to disable noise scaling, etc from the model. # will cause model to return epsilon prediction instead of calculated denoised latent image. def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS","SIGMAS",) RETURN_NAMES = ("sigmas_fwd","sigmas_rev",) CATEGORY = "RES4LYF/sigmas" DESCRIPTION = "For use with unsampling. Connect sigmas_fwd to the unsampling (first) node, and sigmas_rev to the sampling (second) node." def main(self, sigmas): sigmas = sigmas.clone().to(sigmas.dtype) null = torch.tensor([0.0], device=sigmas.device, dtype=sigmas.dtype) sigmas_fwd = torch.flip(sigmas, dims=[0]) sigmas_fwd = torch.cat([sigmas_fwd, null]) sigmas_rev = torch.cat([null, sigmas]) sigmas_rev = torch.cat([sigmas_rev, null]) return (sigmas_fwd, sigmas_rev,) def compute_sigma_next_variance_floor(sigma): return (-1 + torch.sqrt(1 + 4 * sigma)) / 2 class sigmas_variance_floor: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" DESCRIPTION = ("Process a sigma schedule so that any steps that are too large for variance-locked SDE sampling are replaced with the maximum permissible value." "Will be very difficult to approach sigma = 0 due to the nature of the math, as steps become very small much below approximately sigma = 0.15 to 0.2.") def main(self, sigmas): dtype = sigmas.dtype sigmas = sigmas.clone().to(sigmas.dtype) for i in range(len(sigmas) - 1): sigma_next = (-1 + torch.sqrt(1 + 4 * sigmas[i])) / 2 if sigmas[i+1] < sigma_next and sigmas[i+1] > 0.0: print("swapped i+1 with sigma_next+0.001: ", sigmas[i+1], sigma_next + 0.001) sigmas[i+1] = sigma_next + 0.001 return (sigmas.to(dtype),) class sigmas_from_text: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "text": ("STRING", {"default": "", "multiline": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) CATEGORY = "RES4LYF/sigmas" def main(self, text): text_list = [float(val) for val in text.replace(",", " ").split()] #text_list = [float(val.strip()) for val in text.split(",")] sigmas = torch.tensor(text_list) #.to('cuda').to(torch.float64) return (sigmas,) class sigmas_concatenate: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_1": ("SIGMAS", {"forceInput": True}), "sigmas_2": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas_1, sigmas_2): return (torch.cat((sigmas_1, sigmas_2.to(sigmas_1))),) class sigmas_truncate: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "sigmas_until": ("INT", {"default": 10, "min": 0,"max": 1000,"step": 1}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, sigmas_until): sigmas = sigmas.clone() return (sigmas[:sigmas_until],) class sigmas_start: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "sigmas_until": ("INT", {"default": 10, "min": 0,"max": 1000,"step": 1}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, sigmas_until): sigmas = sigmas.clone() return (sigmas[sigmas_until:],) class sigmas_split: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "sigmas_start": ("INT", {"default": 0, "min": 0,"max": 1000,"step": 1}), "sigmas_end": ("INT", {"default": 1000, "min": 0,"max": 1000,"step": 1}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, sigmas_start, sigmas_end): sigmas = sigmas.clone() return (sigmas[sigmas_start:sigmas_end],) sigmas_stop_step = sigmas_end - sigmas_start return (sigmas[sigmas_start:][:sigmas_stop_step],) class sigmas_pad: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "value": ("FLOAT", {"default": 0.0, "min": -10000,"max": 10000,"step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, value): sigmas = sigmas.clone() return (torch.cat((sigmas, torch.tensor([value], dtype=sigmas.dtype))),) class sigmas_unpad: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas): sigmas = sigmas.clone() return (sigmas[:-1],) class sigmas_set_floor: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), "new_floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}) } } RETURN_TYPES = ("SIGMAS",) FUNCTION = "set_floor" CATEGORY = "RES4LYF/sigmas" def set_floor(self, sigmas, floor, new_floor): sigmas = sigmas.clone() sigmas[sigmas <= floor] = new_floor return (sigmas,) class sigmas_delete_below_floor: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "floor": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}) } } RETURN_TYPES = ("SIGMAS",) FUNCTION = "delete_below_floor" CATEGORY = "RES4LYF/sigmas" def delete_below_floor(self, sigmas, floor): sigmas = sigmas.clone() return (sigmas[sigmas >= floor],) class sigmas_delete_value: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "value": ("FLOAT", {"default": 0.0, "min": -1000,"max": 1000,"step": 0.01}) } } RETURN_TYPES = ("SIGMAS",) FUNCTION = "delete_value" CATEGORY = "RES4LYF/sigmas" def delete_value(self, sigmas, value): return (sigmas[sigmas != value],) class sigmas_delete_consecutive_duplicates: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_1": ("SIGMAS", {"forceInput": True}) } } RETURN_TYPES = ("SIGMAS",) FUNCTION = "delete_consecutive_duplicates" CATEGORY = "RES4LYF/sigmas" def delete_consecutive_duplicates(self, sigmas_1): mask = sigmas_1[:-1] != sigmas_1[1:] mask = torch.cat((mask, torch.tensor([True]))) return (sigmas_1[mask],) class sigmas_cleanup: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "sigmin": ("FLOAT", {"default": 0.0291675, "min": 0,"max": 1000,"step": 0.01}) } } RETURN_TYPES = ("SIGMAS",) FUNCTION = "cleanup" CATEGORY = "RES4LYF/sigmas" def cleanup(self, sigmas, sigmin): sigmas_culled = sigmas[sigmas >= sigmin] mask = sigmas_culled[:-1] != sigmas_culled[1:] mask = torch.cat((mask, torch.tensor([True]))) filtered_sigmas = sigmas_culled[mask] return (torch.cat((filtered_sigmas,torch.tensor([0]))),) class sigmas_mult: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "multiplier": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}) }, "optional": { "sigmas2": ("SIGMAS", {"forceInput": False}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, multiplier, sigmas2=None): if sigmas2 is not None: return (sigmas * sigmas2 * multiplier,) else: return (sigmas * multiplier,) class sigmas_modulus: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "divisor": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, divisor): return (sigmas % divisor,) class sigmas_quotient: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "divisor": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, divisor): return (sigmas // divisor,) class sigmas_add: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "addend": ("FLOAT", {"default": 1, "min": -1000,"max": 1000,"step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, addend): return (sigmas + addend,) class sigmas_power: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "power": ("FLOAT", {"default": 1, "min": -100,"max": 100,"step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, power): return (sigmas ** power,) class sigmas_abs: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas): return (abs(sigmas),) class sigmas2_mult: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_1": ("SIGMAS", {"forceInput": True}), "sigmas_2": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas_1, sigmas_2): return (sigmas_1 * sigmas_2,) class sigmas2_add: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_1": ("SIGMAS", {"forceInput": True}), "sigmas_2": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas_1, sigmas_2): return (sigmas_1 + sigmas_2,) class sigmas_rescale: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "start": ("FLOAT", {"default": 1.0, "min": -10000,"max": 10000,"step": 0.01}), "end": ("FLOAT", {"default": 0.0, "min": -10000,"max": 10000,"step": 0.01}), "sigmas": ("SIGMAS", ), }, "optional": { } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas_rescaled",) CATEGORY = "RES4LYF/sigmas" DESCRIPTION = ("Can be used to set denoise. Results are generally better than with the approach used by KSampler and most nodes with denoise values " "(which slice the sigmas schedule according to step count, not the noise level). Will also flip the sigma schedule if the start and end values are reversed." ) def main(self, start=0, end=-1, sigmas=None): s_out_1 = ((sigmas - sigmas.min()) * (start - end)) / (sigmas.max() - sigmas.min()) + end return (s_out_1,) class sigmas_count: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", ), } } FUNCTION = "main" RETURN_TYPES = ("INT",) RETURN_NAMES = ("count",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas=None): return (len(sigmas),) class sigmas_math1: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "start": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}), "stop": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}), "trim": ("INT", {"default": 0, "min": -10000,"max": 0,"step": 1}), "x": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "y": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "z": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "f1": ("STRING", {"default": "s", "multiline": True}), "rescale" : ("BOOLEAN", {"default": False}), "max1": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}), "min1": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), }, "optional": { "a": ("SIGMAS", {"forceInput": False}), "b": ("SIGMAS", {"forceInput": False}), "c": ("SIGMAS", {"forceInput": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, start=0, stop=0, trim=0, a=None, b=None, c=None, x=1.0, y=1.0, z=1.0, f1="s", rescale=False, min1=1.0, max1=1.0): if stop == 0: t_lens = [len(tensor) for tensor in [a, b, c] if tensor is not None] t_len = stop = min(t_lens) if t_lens else 0 else: stop = stop + 1 t_len = stop - start stop = stop + trim t_len = t_len + trim t_a = t_b = t_c = None if a is not None: t_a = a[start:stop] if b is not None: t_b = b[start:stop] if c is not None: t_c = c[start:stop] t_s = torch.arange(0.0, t_len) t_x = torch.full((t_len,), x) t_y = torch.full((t_len,), y) t_z = torch.full((t_len,), z) eval_namespace = {"__builtins__": None, "round": builtins.round, "np": np, "a": t_a, "b": t_b, "c": t_c, "x": t_x, "y": t_y, "z": t_z, "s": t_s, "torch": torch} eval_namespace.update(np.__dict__) s_out_1 = eval(f1, eval_namespace) if rescale == True: s_out_1 = ((s_out_1 - min(s_out_1)) * (max1 - min1)) / (max(s_out_1) - min(s_out_1)) + min1 return (s_out_1,) class sigmas_math3: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "start": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}), "stop": ("INT", {"default": 0, "min": 0,"max": 10000,"step": 1}), "trim": ("INT", {"default": 0, "min": -10000,"max": 0,"step": 1}), }, "optional": { "a": ("SIGMAS", {"forceInput": False}), "b": ("SIGMAS", {"forceInput": False}), "c": ("SIGMAS", {"forceInput": False}), "x": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "y": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "z": ("FLOAT", {"default": 1, "min": -10000,"max": 10000,"step": 0.01}), "f1": ("STRING", {"default": "s", "multiline": True}), "rescale1" : ("BOOLEAN", {"default": False}), "max1": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}), "min1": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), "f2": ("STRING", {"default": "s", "multiline": True}), "rescale2" : ("BOOLEAN", {"default": False}), "max2": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}), "min2": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), "f3": ("STRING", {"default": "s", "multiline": True}), "rescale3" : ("BOOLEAN", {"default": False}), "max3": ("FLOAT", {"default": 14.614642, "min": -10000,"max": 10000,"step": 0.01}), "min3": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS","SIGMAS","SIGMAS") CATEGORY = "RES4LYF/sigmas" def main(self, start=0, stop=0, trim=0, a=None, b=None, c=None, x=1.0, y=1.0, z=1.0, f1="s", f2="s", f3="s", rescale1=False, rescale2=False, rescale3=False, min1=1.0, max1=1.0, min2=1.0, max2=1.0, min3=1.0, max3=1.0): if stop == 0: t_lens = [len(tensor) for tensor in [a, b, c] if tensor is not None] t_len = stop = min(t_lens) if t_lens else 0 else: stop = stop + 1 t_len = stop - start stop = stop + trim t_len = t_len + trim t_a = t_b = t_c = None if a is not None: t_a = a[start:stop] if b is not None: t_b = b[start:stop] if c is not None: t_c = c[start:stop] t_s = torch.arange(0.0, t_len) t_x = torch.full((t_len,), x) t_y = torch.full((t_len,), y) t_z = torch.full((t_len,), z) eval_namespace = {"__builtins__": None, "np": np, "a": t_a, "b": t_b, "c": t_c, "x": t_x, "y": t_y, "z": t_z, "s": t_s, "torch": torch} eval_namespace.update(np.__dict__) s_out_1 = eval(f1, eval_namespace) s_out_2 = eval(f2, eval_namespace) s_out_3 = eval(f3, eval_namespace) if rescale1 == True: s_out_1 = ((s_out_1 - min(s_out_1)) * (max1 - min1)) / (max(s_out_1) - min(s_out_1)) + min1 if rescale2 == True: s_out_2 = ((s_out_2 - min(s_out_2)) * (max2 - min2)) / (max(s_out_2) - min(s_out_2)) + min2 if rescale3 == True: s_out_3 = ((s_out_3 - min(s_out_3)) * (max3 - min3)) / (max(s_out_3) - min(s_out_3)) + min3 return s_out_1, s_out_2, s_out_3 class sigmas_iteration_karras: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps_up": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}), "steps_down": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}), "rho_up": ("FLOAT", {"default": 3, "min": -10000,"max": 10000,"step": 0.01}), "rho_down": ("FLOAT", {"default": 4, "min": -10000,"max": 10000,"step": 0.01}), "s_min_start": ("FLOAT", {"default":0.0291675, "min": -10000,"max": 10000,"step": 0.01}), "s_max": ("FLOAT", {"default": 2, "min": -10000,"max": 10000,"step": 0.01}), "s_min_end": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), }, "optional": { "momentums": ("SIGMAS", {"forceInput": False}), "sigmas": ("SIGMAS", {"forceInput": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS","SIGMAS") RETURN_NAMES = ("momentums","sigmas") CATEGORY = "RES4LYF/schedulers" def main(self, steps_up, steps_down, rho_up, rho_down, s_min_start, s_max, s_min_end, sigmas=None, momentums=None): s_up = get_sigmas_karras(steps_up, s_min_start, s_max, rho_up) s_down = get_sigmas_karras(steps_down, s_min_end, s_max, rho_down) s_up = s_up[:-1] s_down = s_down[:-1] s_up = torch.flip(s_up, dims=[0]) sigmas_new = torch.cat((s_up, s_down), dim=0) momentums_new = torch.cat((s_up, -1*s_down), dim=0) if sigmas is not None: sigmas = torch.cat([sigmas, sigmas_new]) else: sigmas = sigmas_new if momentums is not None: momentums = torch.cat([momentums, momentums_new]) else: momentums = momentums_new return (momentums,sigmas) class sigmas_iteration_polyexp: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps_up": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}), "steps_down": ("INT", {"default": 30, "min": 0,"max": 10000,"step": 1}), "rho_up": ("FLOAT", {"default": 0.6, "min": -10000,"max": 10000,"step": 0.01}), "rho_down": ("FLOAT", {"default": 0.8, "min": -10000,"max": 10000,"step": 0.01}), "s_min_start": ("FLOAT", {"default":0.0291675, "min": -10000,"max": 10000,"step": 0.01}), "s_max": ("FLOAT", {"default": 2, "min": -10000,"max": 10000,"step": 0.01}), "s_min_end": ("FLOAT", {"default": 0.0291675, "min": -10000,"max": 10000,"step": 0.01}), }, "optional": { "momentums": ("SIGMAS", {"forceInput": False}), "sigmas": ("SIGMAS", {"forceInput": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS","SIGMAS") RETURN_NAMES = ("momentums","sigmas") CATEGORY = "RES4LYF/schedulers" def main(self, steps_up, steps_down, rho_up, rho_down, s_min_start, s_max, s_min_end, sigmas=None, momentums=None): s_up = get_sigmas_polyexponential(steps_up, s_min_start, s_max, rho_up) s_down = get_sigmas_polyexponential(steps_down, s_min_end, s_max, rho_down) s_up = s_up[:-1] s_down = s_down[:-1] s_up = torch.flip(s_up, dims=[0]) sigmas_new = torch.cat((s_up, s_down), dim=0) momentums_new = torch.cat((s_up, -1*s_down), dim=0) if sigmas is not None: sigmas = torch.cat([sigmas, sigmas_new]) else: sigmas = sigmas_new if momentums is not None: momentums = torch.cat([momentums, momentums_new]) else: momentums = momentums_new return (momentums,sigmas) class tan_scheduler: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 20, "min": 0,"max": 100000,"step": 1}), "offset": ("FLOAT", {"default": 20, "min": 0,"max": 100000,"step": 0.1}), "slope": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}), "start": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}), "end": ("FLOAT", {"default": 20, "min": -100000,"max": 100000,"step": 0.1}), "sgm" : ("BOOLEAN", {"default": False}), "pad" : ("BOOLEAN", {"default": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/schedulers" def main(self, steps, slope, offset, start, end, sgm, pad): smax = ((2/pi)*atan(-slope*(0-offset))+1)/2 smin = ((2/pi)*atan(-slope*((steps-1)-offset))+1)/2 srange = smax-smin sscale = start - end if sgm: steps+=1 sigmas = [ ( (((2/pi)*atan(-slope*(x-offset))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)] if sgm: sigmas = sigmas[:-1] if pad: sigmas = torch.tensor(sigmas+[0]) else: sigmas = torch.tensor(sigmas) return (sigmas,) class tan_scheduler_2stage: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}), "midpoint": ("INT", {"default": 20, "min": 0,"max": 100000,"step": 1}), "pivot_1": ("INT", {"default": 10, "min": 0,"max": 100000,"step": 1}), "pivot_2": ("INT", {"default": 30, "min": 0,"max": 100000,"step": 1}), "slope_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.1}), "slope_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.1}), "start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.1}), "middle": ("FLOAT", {"default": 0.5, "min": -100000,"max": 100000,"step": 0.1}), "end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.1}), "pad" : ("BOOLEAN", {"default": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) CATEGORY = "RES4LYF/schedulers" def get_tan_sigmas(self, steps, slope, pivot, start, end): smax = ((2/pi)*atan(-slope*(0-pivot))+1)/2 smin = ((2/pi)*atan(-slope*((steps-1)-pivot))+1)/2 srange = smax-smin sscale = start - end sigmas = [ ( (((2/pi)*atan(-slope*(x-pivot))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)] return sigmas def main(self, steps, midpoint, start, middle, end, pivot_1, pivot_2, slope_1, slope_2, pad): steps += 2 stage_2_len = steps - midpoint stage_1_len = steps - stage_2_len tan_sigmas_1 = self.get_tan_sigmas(stage_1_len, slope_1, pivot_1, start, middle) tan_sigmas_2 = self.get_tan_sigmas(stage_2_len, slope_2, pivot_2 - stage_1_len, middle, end) tan_sigmas_1 = tan_sigmas_1[:-1] if pad: tan_sigmas_2 = tan_sigmas_2+[0] tan_sigmas = torch.tensor(tan_sigmas_1 + tan_sigmas_2) return (tan_sigmas,) class tan_scheduler_2stage_simple: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}), "pivot_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}), "pivot_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}), "slope_1": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}), "slope_2": ("FLOAT", {"default": 1, "min": -100000,"max": 100000,"step": 0.01}), "start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}), "middle": ("FLOAT", {"default": 0.5, "min": -100000,"max": 100000,"step": 0.01}), "end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.01}), "pad" : ("BOOLEAN", {"default": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) CATEGORY = "RES4LYF/schedulers" def get_tan_sigmas(self, steps, slope, pivot, start, end): smax = ((2/pi)*atan(-slope*(0-pivot))+1)/2 smin = ((2/pi)*atan(-slope*((steps-1)-pivot))+1)/2 srange = smax-smin sscale = start - end sigmas = [ ( (((2/pi)*atan(-slope*(x-pivot))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)] return sigmas def main(self, steps, start=1.0, middle=0.5, end=0.0, pivot_1=0.6, pivot_2=0.6, slope_1=0.2, slope_2=0.2, pad=False, model_sampling=None): steps += 2 midpoint = int( (steps*pivot_1 + steps*pivot_2) / 2 ) pivot_1 = int(steps * pivot_1) pivot_2 = int(steps * pivot_2) slope_1 = slope_1 / (steps/40) slope_2 = slope_2 / (steps/40) stage_2_len = steps - midpoint stage_1_len = steps - stage_2_len tan_sigmas_1 = self.get_tan_sigmas(stage_1_len, slope_1, pivot_1, start, middle) tan_sigmas_2 = self.get_tan_sigmas(stage_2_len, slope_2, pivot_2 - stage_1_len, middle, end) tan_sigmas_1 = tan_sigmas_1[:-1] if pad: tan_sigmas_2 = tan_sigmas_2+[0] tan_sigmas = torch.tensor(tan_sigmas_1 + tan_sigmas_2) return (tan_sigmas,) class linear_quadratic_advanced: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "model": ("MODEL",), "steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}), "denoise": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}), "inflection_percent": ("FLOAT", {"default": 0.5, "min": 0,"max": 1,"step": 0.01}), "threshold_noise": ("FLOAT", {"default": 0.025, "min": 0.001,"max": 1.000,"step": 0.001}), }, # "optional": { # } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) CATEGORY = "RES4LYF/schedulers" def main(self, steps, denoise, inflection_percent, threshold_noise, model=None): sigmas = get_sigmas(model, "linear_quadratic", steps, denoise, 0.0, inflection_percent, threshold_noise) return (sigmas, ) class constant_scheduler: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 40, "min": 0,"max": 100000,"step": 1}), "value_start": ("FLOAT", {"default": 1.0, "min": -100000,"max": 100000,"step": 0.01}), "value_end": ("FLOAT", {"default": 0.0, "min": -100000,"max": 100000,"step": 0.01}), "cutoff_percent": ("FLOAT", {"default": 1.0, "min": 0,"max": 1,"step": 0.01}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) CATEGORY = "RES4LYF/schedulers" def main(self, steps, value_start, value_end, cutoff_percent): sigmas = torch.ones(steps + 1) * value_start cutoff_step = int(round(steps * cutoff_percent)) + 1 sigmas = torch.concat((sigmas[:cutoff_step], torch.ones(steps + 1 - cutoff_step) * value_end), dim=0) return (sigmas,) class ClownScheduler: @classmethod def INPUT_TYPES(cls): return { "required": { "pad_start_value": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "start_value": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "end_value": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "pad_end_value": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "scheduler": (["constant"] + get_res4lyf_scheduler_list(), {"default": "beta57"},), "scheduler_start_step": ("INT", {"default": 0, "min": 0, "max": 10000}), "scheduler_end_step": ("INT", {"default": 30, "min": -1, "max": 10000}), "total_steps": ("INT", {"default": 100, "min": -1, "max": 10000}), "flip_schedule": ("BOOLEAN", {"default": False}), }, "optional": { "model": ("MODEL", ), } } RETURN_TYPES = ("SIGMAS",) RETURN_NAMES = ("sigmas",) FUNCTION = "main" CATEGORY = "RES4LYF/schedulers" def create_callback(self, **kwargs): def callback(model): kwargs["model"] = model schedule, = self.prepare_schedule(**kwargs) return schedule return callback def main(self, model = None, pad_start_value : float = 1.0, start_value : float = 0.0, end_value : float = 1.0, pad_end_value = None, denoise : int = 1.0, scheduler = None, scheduler_start_step : int = 0, scheduler_end_step : int = 30, total_steps : int = 60, flip_schedule = False, ) -> Tuple[Tensor]: if model is None: callback = self.create_callback(pad_start_value = pad_start_value, start_value = start_value, end_value = end_value, pad_end_value = pad_end_value, scheduler = scheduler, start_step = scheduler_start_step, end_step = scheduler_end_step, flip_schedule = flip_schedule, ) else: default_dtype = torch.float64 default_device = torch.device("cuda") if scheduler_end_step == -1: scheduler_total_steps = total_steps - scheduler_start_step else: scheduler_total_steps = scheduler_end_step - scheduler_start_step if total_steps == -1: total_steps = scheduler_start_step + scheduler_end_step end_pad_steps = total_steps - scheduler_end_step if scheduler != "constant": values = get_sigmas(model, scheduler, scheduler_total_steps, denoise).to(dtype=default_dtype, device=default_device) values = ((values - values.min()) * (start_value - end_value)) / (values.max() - values.min()) + end_value else: values = torch.linspace(start_value, end_value, scheduler_total_steps, dtype=default_dtype, device=default_device) if flip_schedule: values = torch.flip(values, dims=[0]) prepend = torch.full((scheduler_start_step,), pad_start_value, dtype=default_dtype, device=default_device) postpend = torch.full((end_pad_steps,), pad_end_value, dtype=default_dtype, device=default_device) values = torch.cat((prepend, values, postpend), dim=0) #ositive[0][1]['callback_regional'] = callback return (values,) def prepare_schedule(self, model = None, pad_start_value : float = 1.0, start_value : float = 0.0, end_value : float = 1.0, pad_end_value = None, weight_scheduler = None, start_step : int = 0, end_step : int = 30, flip_schedule = False, ) -> Tuple[Tensor]: default_dtype = torch.float64 default_device = torch.device("cuda") return (None,) def get_sigmas_simple_exponential(model, steps): s = model.model_sampling sigs = [] ss = len(s.sigmas) / steps for x in range(steps): sigs += [float(s.sigmas[-(1 + int(x * ss))])] sigs += [0.0] sigs = torch.FloatTensor(sigs) exp = torch.exp(torch.log(torch.linspace(1, 0, steps + 1))) return sigs * exp extra_schedulers = { "simple_exponential": get_sigmas_simple_exponential } def get_sigmas(model, scheduler, steps, denoise, shift=0.0, lq_inflection_percent=0.5, lq_threshold_noise=0.025): #adapted from comfyui total_steps = steps if denoise < 1.0: if denoise <= 0.0: return (torch.FloatTensor([]),) total_steps = int(steps/denoise) try: model_sampling = model.get_model_object("model_sampling") except: if hasattr(model, "model"): model_sampling = model.model.model_sampling elif hasattr(model, "inner_model"): model_sampling = model.inner_model.inner_model.model_sampling else: raise Exception("get_sigmas: Could not get model_sampling") if shift > 1e-6: import copy model_sampling = copy.deepcopy(model_sampling) model_sampling.set_parameters(shift=shift) RESplain("model_sampling shift manually set to " + str(shift), debug=True) if scheduler == "beta57": sigmas = comfy.samplers.beta_scheduler(model_sampling, total_steps, alpha=0.5, beta=0.7).cpu() elif scheduler == "linear_quadratic": linear_steps = int(total_steps * lq_inflection_percent) sigmas = comfy.samplers.linear_quadratic_schedule(model_sampling, total_steps, threshold_noise=lq_threshold_noise, linear_steps=linear_steps).cpu() else: sigmas = comfy.samplers.calculate_sigmas(model_sampling, scheduler, total_steps).cpu() sigmas = sigmas[-(steps + 1):] return sigmas #/// Adam Kormendi /// Inspired from Unreal Engine Maths /// # Sigmoid Function class sigmas_sigmoid: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "variant": (["logistic", "tanh", "softsign", "hardswish", "mish", "swish"], {"default": "logistic"}), "gain": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}), "offset": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.01}), "normalize_output": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, variant, gain, offset, normalize_output): # Apply gain and offset x = gain * (sigmas + offset) if variant == "logistic": result = 1.0 / (1.0 + torch.exp(-x)) elif variant == "tanh": result = torch.tanh(x) elif variant == "softsign": result = x / (1.0 + torch.abs(x)) elif variant == "hardswish": result = x * torch.minimum(torch.maximum(x + 3, torch.zeros_like(x)), torch.tensor(6.0)) / 6.0 elif variant == "mish": result = x * torch.tanh(torch.log(1.0 + torch.exp(x))) elif variant == "swish": result = x * torch.sigmoid(x) if normalize_output: # Normalize to [min(sigmas), max(sigmas)] result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() return (result,) # ----- Easing Function ----- class sigmas_easing: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "easing_type": (["sine", "quad", "cubic", "quart", "quint", "expo", "circ", "back", "elastic", "bounce"], {"default": "cubic"}), "easing_mode": (["in", "out", "in_out"], {"default": "in_out"}), "normalize_input": ("BOOLEAN", {"default": True}), "normalize_output": ("BOOLEAN", {"default": True}), "strength": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, easing_type, easing_mode, normalize_input, normalize_output, strength): # Normalize input to [0, 1] if requested if normalize_input: t = (sigmas - sigmas.min()) / (sigmas.max() - sigmas.min()) else: t = torch.clamp(sigmas, 0.0, 1.0) # Apply strength t_orig = t.clone() t = t ** strength # Apply easing function based on type and mode if easing_mode == "in": result = self._ease_in(t, easing_type) elif easing_mode == "out": result = self._ease_out(t, easing_type) else: # in_out result = self._ease_in_out(t, easing_type) # Normalize output if requested if normalize_output: if normalize_input: result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() else: result = ((result - result.min()) / (result.max() - result.min())) return (result,) def _ease_in(self, t, easing_type): if easing_type == "sine": return 1 - torch.cos((t * math.pi) / 2) elif easing_type == "quad": return t * t elif easing_type == "cubic": return t * t * t elif easing_type == "quart": return t * t * t * t elif easing_type == "quint": return t * t * t * t * t elif easing_type == "expo": return torch.where(t == 0, torch.zeros_like(t), torch.pow(2, 10 * t - 10)) elif easing_type == "circ": return 1 - torch.sqrt(1 - torch.pow(t, 2)) elif easing_type == "back": c1 = 1.70158 c3 = c1 + 1 return c3 * t * t * t - c1 * t * t elif easing_type == "elastic": c4 = (2 * math.pi) / 3 return torch.where( t == 0, torch.zeros_like(t), torch.where( t == 1, torch.ones_like(t), -torch.pow(2, 10 * t - 10) * torch.sin((t * 10 - 10.75) * c4) ) ) elif easing_type == "bounce": return 1 - self._ease_out_bounce(1 - t) def _ease_out(self, t, easing_type): if easing_type == "sine": return torch.sin((t * math.pi) / 2) elif easing_type == "quad": return 1 - (1 - t) * (1 - t) elif easing_type == "cubic": return 1 - torch.pow(1 - t, 3) elif easing_type == "quart": return 1 - torch.pow(1 - t, 4) elif easing_type == "quint": return 1 - torch.pow(1 - t, 5) elif easing_type == "expo": return torch.where(t == 1, torch.ones_like(t), 1 - torch.pow(2, -10 * t)) elif easing_type == "circ": return torch.sqrt(1 - torch.pow(t - 1, 2)) elif easing_type == "back": c1 = 1.70158 c3 = c1 + 1 return 1 + c3 * torch.pow(t - 1, 3) + c1 * torch.pow(t - 1, 2) elif easing_type == "elastic": c4 = (2 * math.pi) / 3 return torch.where( t == 0, torch.zeros_like(t), torch.where( t == 1, torch.ones_like(t), torch.pow(2, -10 * t) * torch.sin((t * 10 - 0.75) * c4) + 1 ) ) elif easing_type == "bounce": return self._ease_out_bounce(t) def _ease_in_out(self, t, easing_type): if easing_type == "sine": return -(torch.cos(math.pi * t) - 1) / 2 elif easing_type == "quad": return torch.where(t < 0.5, 2 * t * t, 1 - torch.pow(-2 * t + 2, 2) / 2) elif easing_type == "cubic": return torch.where(t < 0.5, 4 * t * t * t, 1 - torch.pow(-2 * t + 2, 3) / 2) elif easing_type == "quart": return torch.where(t < 0.5, 8 * t * t * t * t, 1 - torch.pow(-2 * t + 2, 4) / 2) elif easing_type == "quint": return torch.where(t < 0.5, 16 * t * t * t * t * t, 1 - torch.pow(-2 * t + 2, 5) / 2) elif easing_type == "expo": return torch.where( t < 0.5, torch.pow(2, 20 * t - 10) / 2, (2 - torch.pow(2, -20 * t + 10)) / 2 ) elif easing_type == "circ": return torch.where( t < 0.5, (1 - torch.sqrt(1 - torch.pow(2 * t, 2))) / 2, (torch.sqrt(1 - torch.pow(-2 * t + 2, 2)) + 1) / 2 ) elif easing_type == "back": c1 = 1.70158 c2 = c1 * 1.525 return torch.where( t < 0.5, (torch.pow(2 * t, 2) * ((c2 + 1) * 2 * t - c2)) / 2, (torch.pow(2 * t - 2, 2) * ((c2 + 1) * (t * 2 - 2) + c2) + 2) / 2 ) elif easing_type == "elastic": c5 = (2 * math.pi) / 4.5 return torch.where( t < 0.5, -(torch.pow(2, 20 * t - 10) * torch.sin((20 * t - 11.125) * c5)) / 2, (torch.pow(2, -20 * t + 10) * torch.sin((20 * t - 11.125) * c5)) / 2 + 1 ) elif easing_type == "bounce": return torch.where( t < 0.5, (1 - self._ease_out_bounce(1 - 2 * t)) / 2, (1 + self._ease_out_bounce(2 * t - 1)) / 2 ) def _ease_out_bounce(self, t): n1 = 7.5625 d1 = 2.75 mask1 = t < 1 / d1 mask2 = t < 2 / d1 mask3 = t < 2.5 / d1 result = torch.zeros_like(t) result = torch.where(mask1, n1 * t * t, result) result = torch.where(mask2 & ~mask1, n1 * (t - 1.5 / d1) * (t - 1.5 / d1) + 0.75, result) result = torch.where(mask3 & ~mask2, n1 * (t - 2.25 / d1) * (t - 2.25 / d1) + 0.9375, result) result = torch.where(~mask3, n1 * (t - 2.625 / d1) * (t - 2.625 / d1) + 0.984375, result) return result # ----- Hyperbolic Function ----- class sigmas_hyperbolic: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "function": (["sinh", "cosh", "tanh", "asinh", "acosh", "atanh"], {"default": "tanh"}), "scale": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}), "normalize_output": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, function, scale, normalize_output): # Apply scaling x = sigmas * scale if function == "sinh": result = torch.sinh(x) elif function == "cosh": result = torch.cosh(x) elif function == "tanh": result = torch.tanh(x) elif function == "asinh": result = torch.asinh(x) elif function == "acosh": # Domain of acosh is [1, inf) result = torch.acosh(torch.clamp(x, min=1.0)) elif function == "atanh": # Domain of atanh is (-1, 1) result = torch.atanh(torch.clamp(x, min=-0.99, max=0.99)) if normalize_output: # Normalize to [min(sigmas), max(sigmas)] result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() return (result,) # ----- Gaussian Distribution Function ----- class sigmas_gaussian: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "mean": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.01}), "std": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}), "operation": (["pdf", "cdf", "inverse_cdf", "transform", "modulate"], {"default": "transform"}), "normalize_output": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, mean, std, operation, normalize_output): # Standardize values (z-score) z = (sigmas - sigmas.mean()) / sigmas.std() if operation == "pdf": # Probability density function result = (1 / (std * math.sqrt(2 * math.pi))) * torch.exp(-0.5 * ((sigmas - mean) / std) ** 2) elif operation == "cdf": # Cumulative distribution function result = 0.5 * (1 + torch.erf((sigmas - mean) / (std * math.sqrt(2)))) elif operation == "inverse_cdf": # Inverse CDF (quantile function) # First normalize to [0.01, 0.99] to avoid numerical issues normalized = ((sigmas - sigmas.min()) / (sigmas.max() - sigmas.min())) * 0.98 + 0.01 result = mean + std * torch.sqrt(2) * torch.erfinv(2 * normalized - 1) elif operation == "transform": # Transform to Gaussian distribution with specified mean and std result = z * std + mean elif operation == "modulate": # Modulate with a Gaussian curve centered at mean result = sigmas * torch.exp(-0.5 * ((sigmas - mean) / std) ** 2) if normalize_output: # Normalize to [min(sigmas), max(sigmas)] result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() return (result,) # ----- Percentile Function ----- class sigmas_percentile: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "percentile_min": ("FLOAT", {"default": 5.0, "min": 0.0, "max": 49.0, "step": 0.1}), "percentile_max": ("FLOAT", {"default": 95.0, "min": 51.0, "max": 100.0, "step": 0.1}), "target_min": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "target_max": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "clip_outliers": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, percentile_min, percentile_max, target_min, target_max, clip_outliers): # Convert to numpy for percentile computation sigmas_np = sigmas.cpu().numpy() # Compute percentiles p_min = np.percentile(sigmas_np, percentile_min) p_max = np.percentile(sigmas_np, percentile_max) # Convert back to tensor p_min = torch.tensor(p_min, device=sigmas.device, dtype=sigmas.dtype) p_max = torch.tensor(p_max, device=sigmas.device, dtype=sigmas.dtype) # Map values from [p_min, p_max] to [target_min, target_max] if clip_outliers: sigmas_clipped = torch.clamp(sigmas, p_min, p_max) result = ((sigmas_clipped - p_min) / (p_max - p_min)) * (target_max - target_min) + target_min else: result = ((sigmas - p_min) / (p_max - p_min)) * (target_max - target_min) + target_min return (result,) # ----- Kernel Smooth Function ----- class sigmas_kernel_smooth: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "kernel": (["gaussian", "box", "triangle", "epanechnikov", "cosine"], {"default": "gaussian"}), "kernel_size": ("INT", {"default": 5, "min": 3, "max": 51, "step": 2}), # Must be odd "sigma": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, kernel, kernel_size, sigma): # Ensure kernel_size is odd if kernel_size % 2 == 0: kernel_size += 1 # Define kernel weights if kernel == "gaussian": # Gaussian kernel kernel_1d = self._gaussian_kernel(kernel_size, sigma) elif kernel == "box": # Box (uniform) kernel kernel_1d = torch.ones(kernel_size, device=sigmas.device, dtype=sigmas.dtype) / kernel_size elif kernel == "triangle": # Triangle kernel x = torch.linspace(-(kernel_size//2), kernel_size//2, kernel_size, device=sigmas.device, dtype=sigmas.dtype) kernel_1d = (1.0 - torch.abs(x) / (kernel_size//2)) kernel_1d = kernel_1d / kernel_1d.sum() elif kernel == "epanechnikov": # Epanechnikov kernel x = torch.linspace(-(kernel_size//2), kernel_size//2, kernel_size, device=sigmas.device, dtype=sigmas.dtype) x = x / (kernel_size//2) # Scale to [-1, 1] kernel_1d = 0.75 * (1 - x**2) kernel_1d = kernel_1d / kernel_1d.sum() elif kernel == "cosine": # Cosine kernel x = torch.linspace(-(kernel_size//2), kernel_size//2, kernel_size, device=sigmas.device, dtype=sigmas.dtype) x = x / (kernel_size//2) * (math.pi/2) # Scale to [-π/2, π/2] kernel_1d = torch.cos(x) kernel_1d = kernel_1d / kernel_1d.sum() # Pad input to handle boundary conditions pad_size = kernel_size // 2 padded = F.pad(sigmas.unsqueeze(0).unsqueeze(0), (pad_size, pad_size), mode='reflect') # Apply convolution smoothed = F.conv1d(padded, kernel_1d.unsqueeze(0).unsqueeze(0)) return (smoothed.squeeze(),) def _gaussian_kernel(self, kernel_size, sigma): # Generate 1D Gaussian kernel x = torch.linspace(-(kernel_size//2), kernel_size//2, kernel_size) kernel = torch.exp(-x**2 / (2*sigma**2)) return kernel / kernel.sum() # ----- Quantile Normalization ----- class sigmas_quantile_norm: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "target_distribution": (["uniform", "normal", "exponential", "logistic", "custom"], {"default": "uniform"}), "num_quantiles": ("INT", {"default": 100, "min": 10, "max": 1000, "step": 10}), }, "optional": { "reference_sigmas": ("SIGMAS", {"forceInput": False}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, target_distribution, num_quantiles, reference_sigmas=None): # Convert to numpy for processing sigmas_np = sigmas.cpu().numpy() # Sort values sorted_values = np.sort(sigmas_np) # Create rank for each value (fractional rank) ranks = np.zeros_like(sigmas_np) for i, val in enumerate(sigmas_np): ranks[i] = np.searchsorted(sorted_values, val, side='right') / len(sorted_values) # Generate target distribution if target_distribution == "uniform": # Uniform distribution between min and max of sigmas target_values = np.linspace(sigmas_np.min(), sigmas_np.max(), num_quantiles) elif target_distribution == "normal": # Normal distribution with same mean and std as sigmas target_values = np.random.normal(sigmas_np.mean(), sigmas_np.std(), num_quantiles) target_values.sort() elif target_distribution == "exponential": # Exponential distribution with lambda=1/mean target_values = np.random.exponential(1/max(1e-6, sigmas_np.mean()), num_quantiles) target_values.sort() elif target_distribution == "logistic": # Logistic distribution target_values = np.random.logistic(0, 1, num_quantiles) target_values.sort() # Rescale to match sigmas range target_values = (target_values - target_values.min()) / (target_values.max() - target_values.min()) target_values = target_values * (sigmas_np.max() - sigmas_np.min()) + sigmas_np.min() elif target_distribution == "custom" and reference_sigmas is not None: # Use provided reference distribution reference_np = reference_sigmas.cpu().numpy() target_values = np.sort(reference_np) if len(target_values) < num_quantiles: # Interpolate if reference is smaller old_indices = np.linspace(0, len(target_values)-1, len(target_values)) new_indices = np.linspace(0, len(target_values)-1, num_quantiles) target_values = np.interp(new_indices, old_indices, target_values) else: # Subsample if reference is larger indices = np.linspace(0, len(target_values)-1, num_quantiles, dtype=int) target_values = target_values[indices] else: # Default to uniform target_values = np.linspace(sigmas_np.min(), sigmas_np.max(), num_quantiles) # Map each value to its corresponding quantile in the target distribution result_np = np.interp(ranks, np.linspace(0, 1, len(target_values)), target_values) # Convert back to tensor result = torch.tensor(result_np, device=sigmas.device, dtype=sigmas.dtype) return (result,) # ----- Adaptive Step Function ----- class sigmas_adaptive_step: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "adaptation_type": (["gradient", "curvature", "importance", "density"], {"default": "gradient"}), "sensitivity": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}), "min_step": ("FLOAT", {"default": 0.01, "min": 0.0001, "max": 1.0, "step": 0.01}), "max_step": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}), "target_steps": ("INT", {"default": 0, "min": 0, "max": 1000, "step": 1}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, adaptation_type, sensitivity, min_step, max_step, target_steps): if len(sigmas) <= 1: return (sigmas,) # Compute step sizes based on chosen adaptation type if adaptation_type == "gradient": # Compute gradient (first difference) grads = torch.abs(sigmas[1:] - sigmas[:-1]) # Normalize gradients if grads.max() > grads.min(): norm_grads = (grads - grads.min()) / (grads.max() - grads.min()) else: norm_grads = torch.ones_like(grads) # Convert to step sizes: smaller steps where gradient is large step_sizes = 1.0 / (1.0 + norm_grads * sensitivity) elif adaptation_type == "curvature": # Compute second derivative approximation if len(sigmas) >= 3: # Second difference second_diff = sigmas[2:] - 2*sigmas[1:-1] + sigmas[:-2] # Pad to match length second_diff = F.pad(second_diff, (0, 1), mode='replicate') else: second_diff = torch.zeros_like(sigmas[:-1]) # Normalize curvature abs_curve = torch.abs(second_diff) if abs_curve.max() > abs_curve.min(): norm_curve = (abs_curve - abs_curve.min()) / (abs_curve.max() - abs_curve.min()) else: norm_curve = torch.ones_like(abs_curve) # Convert to step sizes: smaller steps where curvature is high step_sizes = 1.0 / (1.0 + norm_curve * sensitivity) elif adaptation_type == "importance": # Importance based on values: focus more on extremes centered = torch.abs(sigmas - sigmas.mean()) if centered.max() > centered.min(): importance = (centered - centered.min()) / (centered.max() - centered.min()) else: importance = torch.ones_like(centered) # Steps are smaller for important regions step_sizes = 1.0 / (1.0 + importance[:-1] * sensitivity) elif adaptation_type == "density": # Density-based adaptation using kernel density estimation # Use a simple histogram approximation sigma_min, sigma_max = sigmas.min(), sigmas.max() bins = 20 hist = torch.histc(sigmas, bins=bins, min=sigma_min, max=sigma_max) hist = hist / hist.sum() # Normalize # Map each sigma to its bin density bin_indices = torch.floor((sigmas - sigma_min) / (sigma_max - sigma_min) * (bins-1)).long() bin_indices = torch.clamp(bin_indices, 0, bins-1) densities = hist[bin_indices] # Compute step sizes: smaller steps in high density regions step_sizes = 1.0 / (1.0 + densities[:-1] * sensitivity) # Scale step sizes to [min_step, max_step] if step_sizes.max() > step_sizes.min(): step_sizes = (step_sizes - step_sizes.min()) / (step_sizes.max() - step_sizes.min()) step_sizes = step_sizes * (max_step - min_step) + min_step else: step_sizes = torch.ones_like(step_sizes) * min_step # Cumulative sum to get positions positions = torch.cat([torch.tensor([0.0], device=step_sizes.device), torch.cumsum(step_sizes, dim=0)]) # Normalize positions to match original range positions = positions / positions[-1] * (sigmas[-1] - sigmas[0]) + sigmas[0] # Resample if target_steps is specified if target_steps > 0: new_positions = torch.linspace(sigmas[0], sigmas[-1], target_steps, device=sigmas.device) # Interpolate to get new sigma values new_sigmas = torch.zeros_like(new_positions) # Simple linear interpolation for i, pos in enumerate(new_positions): # Find enclosing original positions idx = torch.searchsorted(positions, pos) idx = torch.clamp(idx, 1, len(positions)-1) # Linear interpolation t = (pos - positions[idx-1]) / (positions[idx] - positions[idx-1]) new_sigmas[i] = sigmas[idx-1] * (1-t) + sigmas[idx-1] * t result = new_sigmas else: result = positions return (result,) # ----- Chaos Function ----- class sigmas_chaos: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "system": (["logistic", "henon", "tent", "sine", "cubic"], {"default": "logistic"}), "parameter": ("FLOAT", {"default": 3.9, "min": 0.1, "max": 5.0, "step": 0.01}), "iterations": ("INT", {"default": 10, "min": 1, "max": 100, "step": 1}), "normalize_output": ("BOOLEAN", {"default": True}), "use_as_seed": ("BOOLEAN", {"default": False}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, system, parameter, iterations, normalize_output, use_as_seed): # Normalize input to [0,1] for chaotic maps if use_as_seed: # Use input as initial seed x = (sigmas - sigmas.min()) / (sigmas.max() - sigmas.min()) else: # Use single initial value and apply iterations x = torch.zeros_like(sigmas) for i in range(len(sigmas)): # Use i/len as initial value for variety x[i] = i / len(sigmas) # Apply chaos map iterations for _ in range(iterations): if system == "logistic": # Logistic map: x_{n+1} = r * x_n * (1 - x_n) x = parameter * x * (1 - x) elif system == "henon": # Simplified 1D version of Henon map x = 1 - parameter * x**2 elif system == "tent": # Tent map x = torch.where(x < 0.5, parameter * x, parameter * (1 - x)) elif system == "sine": # Sine map: x_{n+1} = r * sin(pi * x_n) x = parameter * torch.sin(math.pi * x) elif system == "cubic": # Cubic map: x_{n+1} = r * x_n * (1 - x_n^2) x = parameter * x * (1 - x**2) # Normalize output if requested if normalize_output: result = ((x - x.min()) / (x.max() - x.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() else: result = x return (result,) # ----- Reaction Diffusion Function ----- class sigmas_reaction_diffusion: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "system": (["gray_scott", "fitzhugh_nagumo", "brusselator"], {"default": "gray_scott"}), "iterations": ("INT", {"default": 10, "min": 1, "max": 100, "step": 1}), "dt": ("FLOAT", {"default": 0.1, "min": 0.01, "max": 1.0, "step": 0.01}), "param_a": ("FLOAT", {"default": 0.04, "min": 0.01, "max": 0.1, "step": 0.001}), "param_b": ("FLOAT", {"default": 0.06, "min": 0.01, "max": 0.1, "step": 0.001}), "diffusion_a": ("FLOAT", {"default": 0.1, "min": 0.01, "max": 1.0, "step": 0.01}), "diffusion_b": ("FLOAT", {"default": 0.05, "min": 0.01, "max": 1.0, "step": 0.01}), "normalize_output": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, system, iterations, dt, param_a, param_b, diffusion_a, diffusion_b, normalize_output): # Initialize a and b based on sigmas a = (sigmas - sigmas.min()) / (sigmas.max() - sigmas.min()) b = 1.0 - a # Pad for diffusion calculation (periodic boundary) a_pad = F.pad(a.unsqueeze(0).unsqueeze(0), (1, 1), mode='circular').squeeze() b_pad = F.pad(b.unsqueeze(0).unsqueeze(0), (1, 1), mode='circular').squeeze() # Simple 1D reaction-diffusion for _ in range(iterations): # Compute Laplacian (diffusion term) as second derivative laplacian_a = a_pad[:-2] + a_pad[2:] - 2 * a laplacian_b = b_pad[:-2] + b_pad[2:] - 2 * b if system == "gray_scott": # Gray-Scott model for pattern formation # a is "U" (activator), b is "V" (inhibitor) feed = 0.055 # feed rate kill = 0.062 # kill rate # Update equations a_new = a + dt * (diffusion_a * laplacian_a - a * b**2 + feed * (1 - a)) b_new = b + dt * (diffusion_b * laplacian_b + a * b**2 - (feed + kill) * b) elif system == "fitzhugh_nagumo": # FitzHugh-Nagumo model (simplified) # a is the membrane potential, b is the recovery variable # Update equations a_new = a + dt * (diffusion_a * laplacian_a + a - a**3 - b + param_a) b_new = b + dt * (diffusion_b * laplacian_b + param_b * (a - b)) elif system == "brusselator": # Brusselator model # a is U, b is V # Update equations a_new = a + dt * (diffusion_a * laplacian_a + 1 - (param_b + 1) * a + param_a * a**2 * b) b_new = b + dt * (diffusion_b * laplacian_b + param_b * a - param_a * a**2 * b) # Update and repad a, b = a_new, b_new a_pad = F.pad(a.unsqueeze(0).unsqueeze(0), (1, 1), mode='circular').squeeze() b_pad = F.pad(b.unsqueeze(0).unsqueeze(0), (1, 1), mode='circular').squeeze() # Use the activator component as the result result = a # Normalize output if requested if normalize_output: result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() return (result,) # ----- Attractor Function ----- class sigmas_attractor: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "attractor": (["lorenz", "rossler", "aizawa", "chen", "thomas"], {"default": "lorenz"}), "iterations": ("INT", {"default": 5, "min": 1, "max": 50, "step": 1}), "dt": ("FLOAT", {"default": 0.01, "min": 0.001, "max": 0.1, "step": 0.001}), "component": (["x", "y", "z", "magnitude"], {"default": "x"}), "normalize_output": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, attractor, iterations, dt, component, normalize_output): # Initialize 3D state from sigmas n = len(sigmas) # Normalize sigmas to a reasonable range for the attractor norm_sigmas = (sigmas - sigmas.min()) / (sigmas.max() - sigmas.min()) * 2.0 - 1.0 # Create initial state x = norm_sigmas y = torch.roll(norm_sigmas, 1) # Shifted version for variety z = torch.roll(norm_sigmas, 2) # Another shifted version # Parameters for the attractors if attractor == "lorenz": sigma, rho, beta = 10.0, 28.0, 8.0/3.0 elif attractor == "rossler": a, b, c = 0.2, 0.2, 5.7 elif attractor == "aizawa": a, b, c, d, e, f = 0.95, 0.7, 0.6, 3.5, 0.25, 0.1 elif attractor == "chen": a, b, c = 5.0, -10.0, -0.38 elif attractor == "thomas": b = 0.208186 # Run the attractor dynamics for _ in range(iterations): if attractor == "lorenz": # Lorenz attractor dx = sigma * (y - x) dy = x * (rho - z) - y dz = x * y - beta * z elif attractor == "rossler": # Rössler attractor dx = -y - z dy = x + a * y dz = b + z * (x - c) elif attractor == "aizawa": # Aizawa attractor dx = (z - b) * x - d * y dy = d * x + (z - b) * y dz = c + a * z - z**3/3 - (x**2 + y**2) * (1 + e * z) + f * z * x**3 elif attractor == "chen": # Chen attractor dx = a * (y - x) dy = (c - a) * x - x * z + c * y dz = x * y - b * z elif attractor == "thomas": # Thomas attractor dx = -b * x + torch.sin(y) dy = -b * y + torch.sin(z) dz = -b * z + torch.sin(x) # Update state x = x + dt * dx y = y + dt * dy z = z + dt * dz # Select component if component == "x": result = x elif component == "y": result = y elif component == "z": result = z elif component == "magnitude": result = torch.sqrt(x**2 + y**2 + z**2) # Normalize output if requested if normalize_output: result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() return (result,) # ----- Catmull-Rom Spline ----- class sigmas_catmull_rom: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "tension": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}), "points": ("INT", {"default": 100, "min": 5, "max": 1000, "step": 5}), "boundary_condition": (["repeat", "clamp", "mirror"], {"default": "clamp"}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, tension, points, boundary_condition): n = len(sigmas) # Need at least 4 points for Catmull-Rom interpolation if n < 4: # If we have fewer, just use linear interpolation t = torch.linspace(0, 1, points, device=sigmas.device) result = torch.zeros(points, device=sigmas.device, dtype=sigmas.dtype) for i in range(points): idx = min(int(i * (n - 1) / (points - 1)), n - 2) alpha = (i * (n - 1) / (points - 1)) - idx result[i] = (1 - alpha) * sigmas[idx] + alpha * sigmas[idx + 1] return (result,) # Handle boundary conditions for control points if boundary_condition == "repeat": # Repeat endpoints p0 = sigmas[0] p3 = sigmas[-1] elif boundary_condition == "clamp": # Extrapolate p0 = 2 * sigmas[0] - sigmas[1] p3 = 2 * sigmas[-1] - sigmas[-2] elif boundary_condition == "mirror": # Mirror p0 = sigmas[1] p3 = sigmas[-2] # Create extended control points control_points = torch.cat([torch.tensor([p0], device=sigmas.device), sigmas, torch.tensor([p3], device=sigmas.device)]) # Compute spline result = torch.zeros(points, device=sigmas.device, dtype=sigmas.dtype) # Parameter to adjust curve tension (0 = Catmull-Rom, 1 = Linear) alpha = 1.0 - tension for i in range(points): # Determine which segment we're in t = i / (points - 1) * (n - 1) idx = min(int(t), n - 2) # Normalized parameter within the segment [0, 1] t_local = t - idx # Get control points for this segment p0 = control_points[idx] p1 = control_points[idx + 1] p2 = control_points[idx + 2] p3 = control_points[idx + 3] # Catmull-Rom basis functions t2 = t_local * t_local t3 = t2 * t_local # Compute spline point result[i] = ( (-alpha * t3 + 2 * alpha * t2 - alpha * t_local) * p0 + ((2 - alpha) * t3 + (alpha - 3) * t2 + 1) * p1 + ((alpha - 2) * t3 + (3 - 2 * alpha) * t2 + alpha * t_local) * p2 + (alpha * t3 - alpha * t2) * p3 ) * 0.5 return (result,) # ----- Lambert W-Function ----- class sigmas_lambert_w: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "branch": (["principal", "secondary"], {"default": "principal"}), "scale": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}), "normalize_output": ("BOOLEAN", {"default": True}), "max_iterations": ("INT", {"default": 20, "min": 5, "max": 100, "step": 1}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, branch, scale, normalize_output, max_iterations): # Apply scaling x = sigmas * scale # Lambert W function (numerically approximated) result = torch.zeros_like(x) # Process each value separately (since Lambert W is non-vectorized) for i in range(len(x)): xi = x[i].item() # Initial guess varies by branch if branch == "principal": # Valid for x >= -1/e if xi < -1/math.e: xi = -1/math.e # Clamp to domain # Initial guess for W₀(x) if xi < 0: w = 0.0 elif xi < 1: w = xi * (1 - xi * (1 - 0.5 * xi)) else: w = math.log(xi) else: # secondary branch # Valid for -1/e <= x < 0 if xi < -1/math.e: xi = -1/math.e # Clamp to lower bound elif xi >= 0: xi = -0.01 # Clamp to upper bound # Initial guess for W₋₁(x) w = math.log(-xi) # Halley's method for numerical approximation for _ in range(max_iterations): ew = math.exp(w) wew = w * ew # If we've converged, break if abs(wew - xi) < 1e-10: break # Halley's update wpe = w + 1 # w plus 1 div = ew * wpe - (ew * w - xi) * wpe / (2 * wpe * ew) w_next = w - (wew - xi) / div # Check for convergence if abs(w_next - w) < 1e-10: w = w_next break w = w_next result[i] = w # Normalize output if requested if normalize_output: result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() return (result,) # ----- Zeta & Eta Functions ----- class sigmas_zeta_eta: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "function": (["riemann_zeta", "dirichlet_eta", "lerch_phi"], {"default": "riemann_zeta"}), "offset": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.1}), "scale": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}), "normalize_output": ("BOOLEAN", {"default": True}), "approx_terms": ("INT", {"default": 100, "min": 10, "max": 1000, "step": 10}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, function, offset, scale, normalize_output, approx_terms): # Apply offset and scaling s = sigmas * scale + offset # Process based on function type if function == "riemann_zeta": # Riemann zeta function # For Re(s) > 1, ζ(s) = sum(1/n^s, n=1 to infinity) # For performance reasons, we'll use scipy's implementation for CPU # and a truncated series approximation for GPU # Move to CPU for scipy s_cpu = s.cpu().numpy() # Apply zeta function result_np = np.zeros_like(s_cpu) for i, si in enumerate(s_cpu): # Handle special values if si == 1.0: # ζ(1) is the harmonic series, which diverges to infinity result_np[i] = float('inf') elif si < 0 and si == int(si) and int(si) % 2 == 0: # ζ(-2n) = 0 for n > 0 result_np[i] = 0.0 else: try: # Use scipy for computation result_np[i] = float(special.zeta(si)) except (ValueError, OverflowError): # Fall back to approximation for problematic values if si > 1: # Truncated series for Re(s) > 1 result_np[i] = sum(1.0 / np.power(n, si) for n in range(1, approx_terms)) else: # Use functional equation for Re(s) < 0 if si < 0: # ζ(s) = 2^s π^(s-1) sin(πs/2) Γ(1-s) ζ(1-s) # Gamma function blows up at negative integers, so use the fact that # ζ(-n) = -B_{n+1}/(n+1) for n > 0, where B is a Bernoulli number # However, as this gets complex, we'll use a simpler approximation result_np[i] = 0.0 # Default for problematic values # Convert back to tensor result = torch.tensor(result_np, device=sigmas.device, dtype=sigmas.dtype) elif function == "dirichlet_eta": # Dirichlet eta function (alternating zeta function) # η(s) = sum((-1)^(n+1)/n^s, n=1 to infinity) # For GPU efficiency, compute directly using alternating series result = torch.zeros_like(s) # Use a fixed number of terms for approximation for i in range(1, approx_terms + 1): term = torch.pow(i, -s) * (1 if i % 2 == 1 else -1) result += term elif function == "lerch_phi": # Lerch transcendent with fixed parameters # Φ(z, s, a) = sum(z^n / (n+a)^s, n=0 to infinity) # We'll use z=0.5, a=1 for simplicity z, a = 0.5, 1.0 result = torch.zeros_like(s) for i in range(approx_terms): term = torch.pow(z, i) / torch.pow(i + a, s) result += term # Replace infinities and NaNs with large or small values result = torch.where(torch.isfinite(result), result, torch.sign(result) * 1e10) # Normalize output if requested if normalize_output: result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() return (result,) # ----- Gamma & Beta Functions ----- class sigmas_gamma_beta: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "function": (["gamma", "beta", "incomplete_gamma", "incomplete_beta", "log_gamma"], {"default": "gamma"}), "offset": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.1}), "scale": ("FLOAT", {"default": 0.1, "min": 0.01, "max": 10.0, "step": 0.01}), "parameter_a": ("FLOAT", {"default": 0.5, "min": 0.1, "max": 10.0, "step": 0.1}), "parameter_b": ("FLOAT", {"default": 0.5, "min": 0.1, "max": 10.0, "step": 0.1}), "normalize_output": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, function, offset, scale, parameter_a, parameter_b, normalize_output): # Apply offset and scaling x = sigmas * scale + offset # Convert to numpy for special functions x_np = x.cpu().numpy() # Apply function if function == "gamma": # Gamma function Γ(x) # For performance and stability, use scipy result_np = np.zeros_like(x_np) for i, xi in enumerate(x_np): # Handle special cases if xi <= 0 and xi == int(xi): # Gamma has poles at non-positive integers result_np[i] = float('inf') else: try: result_np[i] = float(special.gamma(xi)) except (ValueError, OverflowError): # Use approximation for large values result_np[i] = float('inf') elif function == "log_gamma": # Log Gamma function log(Γ(x)) # More numerically stable for large values result_np = np.zeros_like(x_np) for i, xi in enumerate(x_np): # Handle special cases if xi <= 0 and xi == int(xi): # log(Γ(x)) is undefined for non-positive integers result_np[i] = float('inf') else: try: result_np[i] = float(special.gammaln(xi)) except (ValueError, OverflowError): # Use approximation for large values result_np[i] = float('inf') elif function == "beta": # Beta function B(a, x) result_np = np.zeros_like(x_np) for i, xi in enumerate(x_np): try: result_np[i] = float(special.beta(parameter_a, xi)) except (ValueError, OverflowError): # Handle cases where beta is undefined result_np[i] = float('inf') elif function == "incomplete_gamma": # Regularized incomplete gamma function P(a, x) result_np = np.zeros_like(x_np) for i, xi in enumerate(x_np): if xi < 0: # Undefined for negative x result_np[i] = 0.0 else: try: result_np[i] = float(special.gammainc(parameter_a, xi)) except (ValueError, OverflowError): result_np[i] = 1.0 # Approach 1 for large x elif function == "incomplete_beta": # Regularized incomplete beta function I(x; a, b) result_np = np.zeros_like(x_np) for i, xi in enumerate(x_np): # Clamp to [0,1] for domain of incomplete beta xi_clamped = min(max(xi, 0), 1) try: result_np[i] = float(special.betainc(parameter_a, parameter_b, xi_clamped)) except (ValueError, OverflowError): result_np[i] = 0.5 # Default for errors # Convert back to tensor result = torch.tensor(result_np, device=sigmas.device, dtype=sigmas.dtype) # Replace infinities and NaNs result = torch.where(torch.isfinite(result), result, torch.sign(result) * 1e10) # Normalize output if requested if normalize_output: # Handle cases where result has infinities if torch.isinf(result).any() or torch.isnan(result).any(): # Replace inf/nan with max/min finite values max_val = torch.max(result[torch.isfinite(result)]) if torch.any(torch.isfinite(result)) else 1e10 min_val = torch.min(result[torch.isfinite(result)]) if torch.any(torch.isfinite(result)) else -1e10 result = torch.where(torch.isinf(result) & (result > 0), max_val, result) result = torch.where(torch.isinf(result) & (result < 0), min_val, result) result = torch.where(torch.isnan(result), (max_val + min_val) / 2, result) # Now normalize result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() return (result,) # ----- Sigma Lerp ----- class sigmas_lerp: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_a": ("SIGMAS", {"forceInput": True}), "sigmas_b": ("SIGMAS", {"forceInput": True}), "t": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}), "ensure_length": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas_a, sigmas_b, t, ensure_length): if ensure_length and len(sigmas_a) != len(sigmas_b): # Resize the smaller one to match the larger one if len(sigmas_a) < len(sigmas_b): sigmas_a = torch.nn.functional.interpolate( sigmas_a.unsqueeze(0).unsqueeze(0), size=len(sigmas_b), mode='linear' ).squeeze(0).squeeze(0) else: sigmas_b = torch.nn.functional.interpolate( sigmas_b.unsqueeze(0).unsqueeze(0), size=len(sigmas_a), mode='linear' ).squeeze(0).squeeze(0) return ((1 - t) * sigmas_a + t * sigmas_b,) # ----- Sigma InvLerp ----- class sigmas_invlerp: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "min_value": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "max_value": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, min_value, max_value): # Clamp values to avoid division by zero if min_value == max_value: max_value = min_value + 1e-5 normalized = (sigmas - min_value) / (max_value - min_value) # Clamp the values to be in [0, 1] normalized = torch.clamp(normalized, 0.0, 1.0) return (normalized,) # ----- Sigma ArcSine ----- class sigmas_arcsine: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "normalize_input": ("BOOLEAN", {"default": True}), "scale_output": ("BOOLEAN", {"default": True}), "out_min": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "out_max": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, normalize_input, scale_output, out_min, out_max): if normalize_input: sigmas = torch.clamp(sigmas, -1.0, 1.0) else: # Ensure values are in valid arcsin domain sigmas = torch.clamp(sigmas, -1.0, 1.0) result = torch.asin(sigmas) if scale_output: # ArcSine output is in range [-π/2, π/2] # Normalize to [0, 1] and then scale to [out_min, out_max] result = (result + math.pi/2) / math.pi result = result * (out_max - out_min) + out_min return (result,) # ----- Sigma LinearSine ----- class sigmas_linearsine: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "amplitude": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 10.0, "step": 0.01}), "frequency": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), "phase": ("FLOAT", {"default": 0.0, "min": -6.28, "max": 6.28, "step": 0.01}), # -2π to 2π "linear_weight": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, amplitude, frequency, phase, linear_weight): # Create indices for the sine function indices = torch.linspace(0, 1, len(sigmas), device=sigmas.device) # Calculate sine component sine_component = amplitude * torch.sin(2 * math.pi * frequency * indices + phase) # Blend linear and sine components step_indices = torch.linspace(0, 1, len(sigmas), device=sigmas.device) result = linear_weight * sigmas + (1 - linear_weight) * (step_indices.unsqueeze(0) * sine_component) return (result.squeeze(0),) # ----- Sigmas Append ----- class sigmas_append: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "value": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "count": ("INT", {"default": 1, "min": 1, "max": 100, "step": 1}) }, "optional": { "additional_sigmas": ("SIGMAS", {"forceInput": False}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, value, count, additional_sigmas=None): # Create tensor of the value to append append_values = torch.full((count,), value, device=sigmas.device, dtype=sigmas.dtype) # Append the values result = torch.cat([sigmas, append_values], dim=0) # If additional sigmas provided, append those as well if additional_sigmas is not None: result = torch.cat([result, additional_sigmas], dim=0) return (result,) # ----- Sigma Arccosine ----- class sigmas_arccosine: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "normalize_input": ("BOOLEAN", {"default": True}), "scale_output": ("BOOLEAN", {"default": True}), "out_min": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "out_max": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, normalize_input, scale_output, out_min, out_max): if normalize_input: sigmas = torch.clamp(sigmas, -1.0, 1.0) else: # Ensure values are in valid arccos domain sigmas = torch.clamp(sigmas, -1.0, 1.0) result = torch.acos(sigmas) if scale_output: # ArcCosine output is in range [0, π] # Normalize to [0, 1] and then scale to [out_min, out_max] result = result / math.pi result = result * (out_max - out_min) + out_min return (result,) # ----- Sigma Arctangent ----- class sigmas_arctangent: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "scale_output": ("BOOLEAN", {"default": True}), "out_min": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "out_max": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, scale_output, out_min, out_max): result = torch.atan(sigmas) if scale_output: # ArcTangent output is in range [-π/2, π/2] # Normalize to [0, 1] and then scale to [out_min, out_max] result = (result + math.pi/2) / math.pi result = result * (out_max - out_min) + out_min return (result,) # ----- Sigma CrossProduct ----- class sigmas_crossproduct: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_a": ("SIGMAS", {"forceInput": True}), "sigmas_b": ("SIGMAS", {"forceInput": True}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas_a, sigmas_b): # Ensure we have at least 3 elements in each tensor # If not, pad with zeros or truncate if len(sigmas_a) < 3: sigmas_a = torch.nn.functional.pad(sigmas_a, (0, 3 - len(sigmas_a))) if len(sigmas_b) < 3: sigmas_b = torch.nn.functional.pad(sigmas_b, (0, 3 - len(sigmas_b))) # Take the first 3 elements of each tensor a = sigmas_a[:3] b = sigmas_b[:3] # Compute cross product c = torch.zeros(3, device=sigmas_a.device, dtype=sigmas_a.dtype) c[0] = a[1] * b[2] - a[2] * b[1] c[1] = a[2] * b[0] - a[0] * b[2] c[2] = a[0] * b[1] - a[1] * b[0] return (c,) # ----- Sigma DotProduct ----- class sigmas_dotproduct: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas_a": ("SIGMAS", {"forceInput": True}), "sigmas_b": ("SIGMAS", {"forceInput": True}), "normalize": ("BOOLEAN", {"default": False}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas_a, sigmas_b, normalize): # Ensure equal lengths by taking the minimum min_length = min(len(sigmas_a), len(sigmas_b)) a = sigmas_a[:min_length] b = sigmas_b[:min_length] if normalize: a_norm = torch.norm(a) b_norm = torch.norm(b) # Avoid division by zero if a_norm > 0 and b_norm > 0: a = a / a_norm b = b / b_norm # Compute dot product result = torch.sum(a * b) # Return as a single-element tensor return (torch.tensor([result], device=sigmas_a.device, dtype=sigmas_a.dtype),) # ----- Sigma Fmod ----- class sigmas_fmod: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "divisor": ("FLOAT", {"default": 1.0, "min": 0.0001, "max": 10000.0, "step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, divisor): # Ensure divisor is not zero if divisor == 0: divisor = 0.0001 result = torch.fmod(sigmas, divisor) return (result,) # ----- Sigma Frac ----- class sigmas_frac: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas): # Get the fractional part (x - floor(x)) result = sigmas - torch.floor(sigmas) return (result,) # ----- Sigma If ----- class sigmas_if: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "condition_sigmas": ("SIGMAS", {"forceInput": True}), "true_sigmas": ("SIGMAS", {"forceInput": True}), "false_sigmas": ("SIGMAS", {"forceInput": True}), "threshold": ("FLOAT", {"default": 0.5, "min": -10000.0, "max": 10000.0, "step": 0.01}), "comp_type": (["greater", "less", "equal", "not_equal"], {"default": "greater"}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, condition_sigmas, true_sigmas, false_sigmas, threshold, comp_type): # Make sure we have values to compare max_length = max(len(condition_sigmas), len(true_sigmas), len(false_sigmas)) # Extend all tensors to the maximum length using interpolation if len(condition_sigmas) != max_length: condition_sigmas = torch.nn.functional.interpolate( condition_sigmas.unsqueeze(0).unsqueeze(0), size=max_length, mode='linear' ).squeeze(0).squeeze(0) if len(true_sigmas) != max_length: true_sigmas = torch.nn.functional.interpolate( true_sigmas.unsqueeze(0).unsqueeze(0), size=max_length, mode='linear' ).squeeze(0).squeeze(0) if len(false_sigmas) != max_length: false_sigmas = torch.nn.functional.interpolate( false_sigmas.unsqueeze(0).unsqueeze(0), size=max_length, mode='linear' ).squeeze(0).squeeze(0) # Create mask based on comparison type if comp_type == "greater": mask = condition_sigmas > threshold elif comp_type == "less": mask = condition_sigmas < threshold elif comp_type == "equal": mask = torch.isclose(condition_sigmas, torch.tensor(threshold, device=condition_sigmas.device)) elif comp_type == "not_equal": mask = ~torch.isclose(condition_sigmas, torch.tensor(threshold, device=condition_sigmas.device)) # Apply the mask to select values result = torch.where(mask, true_sigmas, false_sigmas) return (result,) # ----- Sigma Logarithm2 ----- class sigmas_logarithm2: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "handle_negative": ("BOOLEAN", {"default": True}), "epsilon": ("FLOAT", {"default": 1e-10, "min": 1e-15, "max": 0.1, "step": 1e-10}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, handle_negative, epsilon): if handle_negative: # For negative values, compute -log2(-x) and negate the result mask_negative = sigmas < 0 mask_positive = ~mask_negative # Prepare positive and negative parts pos_part = torch.log2(torch.clamp(sigmas[mask_positive], min=epsilon)) neg_part = -torch.log2(torch.clamp(-sigmas[mask_negative], min=epsilon)) # Create result tensor result = torch.zeros_like(sigmas) result[mask_positive] = pos_part result[mask_negative] = neg_part else: # Simply compute log2, clamping values to avoid log(0) result = torch.log2(torch.clamp(sigmas, min=epsilon)) return (result,) # ----- Sigma SmoothStep ----- class sigmas_smoothstep: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "edge0": ("FLOAT", {"default": 0.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "edge1": ("FLOAT", {"default": 1.0, "min": -10000.0, "max": 10000.0, "step": 0.01}), "mode": (["smoothstep", "smootherstep"], {"default": "smoothstep"}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, edge0, edge1, mode): # Normalize the values to the range [0, 1] t = torch.clamp((sigmas - edge0) / (edge1 - edge0), 0.0, 1.0) if mode == "smoothstep": # Smooth step: 3t^2 - 2t^3 result = t * t * (3.0 - 2.0 * t) else: # smootherstep # Smoother step: 6t^5 - 15t^4 + 10t^3 result = t * t * t * (t * (t * 6.0 - 15.0) + 10.0) # Scale back to the original range result = result * (edge1 - edge0) + edge0 return (result,) # ----- Sigma SquareRoot ----- class sigmas_squareroot: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "handle_negative": ("BOOLEAN", {"default": False}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, handle_negative): if handle_negative: # For negative values, compute sqrt(-x) and negate the result mask_negative = sigmas < 0 mask_positive = ~mask_negative # Prepare positive and negative parts pos_part = torch.sqrt(sigmas[mask_positive]) neg_part = -torch.sqrt(-sigmas[mask_negative]) # Create result tensor result = torch.zeros_like(sigmas) result[mask_positive] = pos_part result[mask_negative] = neg_part else: # Only compute square root for non-negative values # Negative values will be set to 0 result = torch.sqrt(torch.clamp(sigmas, min=0)) return (result,) # ----- Sigma TimeStep ----- class sigmas_timestep: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "dt": ("FLOAT", {"default": 0.1, "min": 0.0001, "max": 10.0, "step": 0.01}), "scaling": (["linear", "quadratic", "sqrt", "log"], {"default": "linear"}), "decay": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, dt, scaling, decay): # Create time steps timesteps = torch.arange(len(sigmas), device=sigmas.device, dtype=sigmas.dtype) * dt # Apply scaling if scaling == "quadratic": timesteps = timesteps ** 2 elif scaling == "sqrt": timesteps = torch.sqrt(timesteps) elif scaling == "log": # Add small epsilon to avoid log(0) timesteps = torch.log(timesteps + 1e-10) # Apply decay if decay > 0: decay_factor = torch.exp(-decay * timesteps) timesteps = timesteps * decay_factor # Normalize to match the range of sigmas timesteps = ((timesteps - timesteps.min()) / (timesteps.max() - timesteps.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() return (timesteps,) class sigmas_gaussian_cdf: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "mu": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step": 0.01}), "sigma": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 10.0, "step": 0.01}), "normalize_output": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, mu, sigma, normalize_output): # Apply Gaussian CDF transformation result = 0.5 * (1 + torch.erf((sigmas - mu) / (sigma * math.sqrt(2)))) # Normalize output if requested if normalize_output: result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() return (result,) class sigmas_stepwise_multirate: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 30, "min": 1, "max": 1000, "step": 1}), "rates": ("STRING", {"default": "1.0,0.5,0.25", "multiline": False}), "boundaries": ("STRING", {"default": "0.3,0.7", "multiline": False}), "start_value": ("FLOAT", {"default": 10.0, "min": 0.0, "max": 100.0, "step": 0.1}), "end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 100.0, "step": 0.01}), "pad_end": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, steps, rates, boundaries, start_value, end_value, pad_end): # Parse rates and boundaries rates_list = [float(r) for r in rates.split(',')] if len(rates_list) < 1: rates_list = [1.0] boundaries_list = [float(b) for b in boundaries.split(',')] if len(boundaries_list) != len(rates_list) - 1: # Create equal size segments if boundaries don't match rates boundaries_list = [i / len(rates_list) for i in range(1, len(rates_list))] # Convert boundaries to step indices boundary_indices = [int(b * steps) for b in boundaries_list] # Create steps array result = torch.zeros(steps) # Fill segments with different rates current_idx = 0 for i, rate in enumerate(rates_list): next_idx = boundary_indices[i] if i < len(boundary_indices) else steps segment_length = next_idx - current_idx if segment_length <= 0: continue segment_start = start_value if i == 0 else result[current_idx-1] segment_end = end_value if i == len(rates_list) - 1 else start_value * (1 - boundaries_list[i]) # Apply rate to the segment t = torch.linspace(0, 1, segment_length) segment = segment_start + (segment_end - segment_start) * (t ** rate) result[current_idx:next_idx] = segment current_idx = next_idx # Add padding zero at the end if requested if pad_end: result = torch.cat([result, torch.tensor([0.0])]) return (result,) class sigmas_harmonic_decay: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 30, "min": 1, "max": 1000, "step": 1}), "start_value": ("FLOAT", {"default": 10.0, "min": 0.0, "max": 100.0, "step": 0.1}), "end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 100.0, "step": 0.01}), "harmonic_offset": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10.0, "step": 0.01}), "decay_rate": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}), "pad_end": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, steps, start_value, end_value, harmonic_offset, decay_rate, pad_end): # Create harmonic series: 1/(n+offset)^rate n = torch.arange(1, steps + 1, dtype=torch.float32) harmonic_values = 1.0 / torch.pow(n + harmonic_offset, decay_rate) # Normalize to [0, 1] normalized = (harmonic_values - harmonic_values.min()) / (harmonic_values.max() - harmonic_values.min()) # Scale to [end_value, start_value] and reverse (higher values first) result = start_value - (start_value - end_value) * normalized result = torch.flip(result, [0]) # Add padding zero at the end if requested if pad_end: result = torch.cat([result, torch.tensor([0.0])]) return (result,) class sigmas_adaptive_noise_floor: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "min_noise_level": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 1.0, "step": 0.001}), "adaptation_factor": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}), "window_size": ("INT", {"default": 3, "min": 1, "max": 10, "step": 1}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, min_noise_level, adaptation_factor, window_size): # Initialize result with original sigmas result = sigmas.clone() # Apply adaptive noise floor for i in range(window_size, len(sigmas)): # Calculate local statistics in the window window = sigmas[i-window_size:i] local_mean = torch.mean(window) local_var = torch.var(window) # Adapt the noise floor based on local statistics adaptive_floor = min_noise_level + adaptation_factor * local_var / (local_mean + 1e-6) # Apply the floor if needed if result[i] < adaptive_floor: result[i] = adaptive_floor return (result,) class sigmas_collatz_iteration: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "iterations": ("INT", {"default": 3, "min": 1, "max": 20, "step": 1}), "scaling_factor": ("FLOAT", {"default": 0.1, "min": 0.0001, "max": 10.0, "step": 0.01}), "normalize_output": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, iterations, scaling_factor, normalize_output): # Scale input to reasonable range for Collatz scaled_input = sigmas * scaling_factor # Apply Collatz iterations result = scaled_input.clone() for _ in range(iterations): # Create masks for even and odd values even_mask = (result % 2 == 0) odd_mask = ~even_mask # Apply Collatz function: n/2 for even, 3n+1 for odd result[even_mask] = result[even_mask] / 2 result[odd_mask] = 3 * result[odd_mask] + 1 # Normalize output if requested if normalize_output: result = ((result - result.min()) / (result.max() - result.min())) * (sigmas.max() - sigmas.min()) + sigmas.min() return (result,) class sigmas_conway_sequence: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 20, "min": 1, "max": 50, "step": 1}), "sequence_type": (["look_and_say", "audioactive", "paperfolding", "thue_morse"], {"default": "look_and_say"}), "normalize_range": ("BOOLEAN", {"default": True}), "min_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01}), "max_value": ("FLOAT", {"default": 10.0, "min": 0.0, "max": 50.0, "step": 0.1}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, steps, sequence_type, normalize_range, min_value, max_value): if sequence_type == "look_and_say": # Start with "1" s = "1" lengths = [1] # Length of first term is 1 # Generate look-and-say sequence for _ in range(min(steps - 1, 25)): # Limit to prevent excessive computation next_s = "" i = 0 while i < len(s): count = 1 while i + 1 < len(s) and s[i] == s[i + 1]: i += 1 count += 1 next_s += str(count) + s[i] i += 1 s = next_s lengths.append(len(s)) # Convert to tensor result = torch.tensor(lengths, dtype=torch.float32) elif sequence_type == "audioactive": # Audioactive sequence (similar to look-and-say but counts digits) a = [1] for _ in range(min(steps - 1, 30)): b = [] digit_count = {} for digit in a: digit_count[digit] = digit_count.get(digit, 0) + 1 for digit in sorted(digit_count.keys()): b.append(digit_count[digit]) b.append(digit) a = b result = torch.tensor(a, dtype=torch.float32) if len(result) > steps: result = result[:steps] elif sequence_type == "paperfolding": # Paper folding sequence (dragon curve) sequence = [] for i in range(min(steps, 30)): sequence.append(1 if (i & (i + 1)) % 2 == 0 else 0) result = torch.tensor(sequence, dtype=torch.float32) elif sequence_type == "thue_morse": # Thue-Morse sequence sequence = [0] while len(sequence) < steps: sequence.extend([1 - x for x in sequence]) result = torch.tensor(sequence, dtype=torch.float32)[:steps] # Normalize to desired range if normalize_range: if result.max() > result.min(): result = (result - result.min()) / (result.max() - result.min()) result = result * (max_value - min_value) + min_value else: result = torch.ones_like(result) * min_value return (result,) class sigmas_gilbreath_sequence: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 30, "min": 10, "max": 100, "step": 1}), "levels": ("INT", {"default": 3, "min": 1, "max": 10, "step": 1}), "normalize_range": ("BOOLEAN", {"default": True}), "min_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01}), "max_value": ("FLOAT", {"default": 10.0, "min": 0.0, "max": 50.0, "step": 0.1}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, steps, levels, normalize_range, min_value, max_value): # Generate first few prime numbers def sieve_of_eratosthenes(limit): sieve = [True] * (limit + 1) sieve[0] = sieve[1] = False for i in range(2, int(limit**0.5) + 1): if sieve[i]: for j in range(i*i, limit + 1, i): sieve[j] = False return [i for i in range(limit + 1) if sieve[i]] # Get primes primes = sieve_of_eratosthenes(steps * 6) # Get enough primes primes = primes[:steps] # Generate Gilbreath sequence levels sequences = [primes] for level in range(1, levels): prev_seq = sequences[level-1] new_seq = [abs(prev_seq[i] - prev_seq[i+1]) for i in range(len(prev_seq)-1)] sequences.append(new_seq) # Select the requested level selected_level = min(levels-1, len(sequences)-1) result_list = sequences[selected_level] # Ensure we have enough values while len(result_list) < steps: result_list.append(1) # Gilbreath conjecture: eventually all 1s # Convert to tensor result = torch.tensor(result_list[:steps], dtype=torch.float32) # Normalize to desired range if normalize_range: if result.max() > result.min(): result = (result - result.min()) / (result.max() - result.min()) result = result * (max_value - min_value) + min_value else: result = torch.ones_like(result) * min_value return (result,) class sigmas_cnf_inverse: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS", {"forceInput": True}), "time_steps": ("INT", {"default": 20, "min": 5, "max": 100, "step": 1}), "flow_type": (["linear", "quadratic", "sigmoid", "exponential"], {"default": "sigmoid"}), "reverse": ("BOOLEAN", {"default": True}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, sigmas, time_steps, flow_type, reverse): # Create normalized time steps t = torch.linspace(0, 1, time_steps) # Apply CNF flow transformation if flow_type == "linear": flow = t elif flow_type == "quadratic": flow = t**2 elif flow_type == "sigmoid": flow = 1 / (1 + torch.exp(-10 * (t - 0.5))) elif flow_type == "exponential": flow = torch.exp(3 * t) - 1 flow = flow / flow.max() # Normalize to [0,1] # Reverse flow if requested if reverse: flow = 1 - flow # Interpolate sigmas according to flow # First normalize sigmas to [0,1] for interpolation normalized_sigmas = (sigmas - sigmas.min()) / (sigmas.max() - sigmas.min()) # Create indices for interpolation indices = flow * (len(sigmas) - 1) # Linear interpolation result = torch.zeros(time_steps, device=sigmas.device, dtype=sigmas.dtype) for i in range(time_steps): idx_low = int(indices[i]) idx_high = min(idx_low + 1, len(sigmas) - 1) frac = indices[i] - idx_low result[i] = (1 - frac) * normalized_sigmas[idx_low] + frac * normalized_sigmas[idx_high] # Scale back to original sigma range result = result * (sigmas.max() - sigmas.min()) + sigmas.min() return (result,) class sigmas_riemannian_flow: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 30, "min": 5, "max": 100, "step": 1}), "metric_type": (["euclidean", "hyperbolic", "spherical", "lorentzian"], {"default": "hyperbolic"}), "curvature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}), "start_value": ("FLOAT", {"default": 10.0, "min": 0.1, "max": 50.0, "step": 0.1}), "end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, steps, metric_type, curvature, start_value, end_value): # Create parameter t in [0, 1] t = torch.linspace(0, 1, steps) # Apply different Riemannian metrics if metric_type == "euclidean": # Simple linear interpolation in Euclidean space result = start_value * (1 - t) + end_value * t elif metric_type == "hyperbolic": # Hyperbolic space geodesic K = -curvature # Negative curvature for hyperbolic space # Convert to hyperbolic coordinates (using Poincaré disk model) x_start = torch.tanh(start_value / 2) x_end = torch.tanh(end_value / 2) # Distance in hyperbolic space d = torch.acosh(1 + 2 * ((x_start - x_end)**2) / ((1 - x_start**2) * (1 - x_end**2))) # Geodesic interpolation lambda_t = torch.sinh(t * d) / torch.sinh(d) result = 2 * torch.atanh((1 - lambda_t) * x_start + lambda_t * x_end) elif metric_type == "spherical": # Spherical space geodesic (great circle) K = curvature # Positive curvature for spherical space # Convert to angular coordinates theta_start = start_value * torch.sqrt(K) theta_end = end_value * torch.sqrt(K) # Geodesic interpolation along great circle result = torch.sin((1 - t) * theta_start + t * theta_end) / torch.sqrt(K) elif metric_type == "lorentzian": # Lorentzian spacetime-inspired metric (time dilation effect) gamma = 1 / torch.sqrt(1 - curvature * t**2) # Lorentz factor result = start_value * (1 - t) + end_value * t result = result * gamma # Apply time dilation # Ensure the values are in the desired range result = torch.clamp(result, min=min(start_value, end_value), max=max(start_value, end_value)) # Ensure result is decreasing if start_value > end_value if start_value > end_value and result[0] < result[-1]: result = torch.flip(result, [0]) return (result,) class sigmas_langevin_dynamics: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 30, "min": 5, "max": 100, "step": 1}), "start_value": ("FLOAT", {"default": 10.0, "min": 0.1, "max": 50.0, "step": 0.1}), "end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01}), "temperature": ("FLOAT", {"default": 0.5, "min": 0.01, "max": 10.0, "step": 0.01}), "friction": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}), "seed": ("INT", {"default": 42, "min": 0, "max": 99999, "step": 1}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, steps, start_value, end_value, temperature, friction, seed): # Set random seed for reproducibility torch.manual_seed(seed) # Potential function (quadratic well centered at end_value) def U(x): return 0.5 * (x - end_value)**2 # Gradient of the potential def grad_U(x): return x - end_value # Initialize state x = torch.tensor([start_value], dtype=torch.float32) v = torch.zeros(1) # Initial velocity # Discretization parameters dt = 1.0 / steps sqrt_2dt = math.sqrt(2 * dt) # Storage for trajectory trajectory = [start_value] # Langevin dynamics integration (velocity Verlet with Langevin thermostat) for _ in range(steps - 1): # Half step in velocity v = v - dt * friction * v - dt * grad_U(x) / 2 # Full step in position x = x + dt * v # Random force (thermal noise) noise = torch.randn(1) * sqrt_2dt * temperature # Another half step in velocity with noise v = v - dt * friction * v - dt * grad_U(x) / 2 + noise # Store current position trajectory.append(x.item()) # Convert to tensor result = torch.tensor(trajectory, dtype=torch.float32) # Ensure we reach the end value result[-1] = end_value return (result,) class sigmas_persistent_homology: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 30, "min": 5, "max": 100, "step": 1}), "start_value": ("FLOAT", {"default": 10.0, "min": 0.1, "max": 50.0, "step": 0.1}), "end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01}), "persistence_type": (["linear", "exponential", "logarithmic", "sigmoidal"], {"default": "exponential"}), "birth_density": ("FLOAT", {"default": 0.3, "min": 0.0, "max": 1.0, "step": 0.01}), "death_density": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0, "step": 0.01}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, steps, start_value, end_value, persistence_type, birth_density, death_density): # Basic filtration function (linear by default) t = torch.linspace(0, 1, steps) # Persistence diagram simulation # Create birth and death times birth_points = int(steps * birth_density) death_points = int(steps * death_density) # Filtration function based on selected type if persistence_type == "linear": filtration = t elif persistence_type == "exponential": filtration = 1 - torch.exp(-5 * t) elif persistence_type == "logarithmic": filtration = torch.log(1 + 9 * t) / torch.log(torch.tensor([10.0])) elif persistence_type == "sigmoidal": filtration = 1 / (1 + torch.exp(-10 * (t - 0.5))) # Generate birth-death pairs birth_indices = torch.linspace(0, steps // 2, birth_points).long() death_indices = torch.linspace(steps // 2, steps - 1, death_points).long() # Create persistence barcode barcode = torch.zeros(steps) for b_idx in birth_indices: for d_idx in death_indices: if b_idx < d_idx: # Add a persistence feature from birth to death barcode[b_idx:d_idx] += 1 # Normalize and weight the barcode if barcode.max() > 0: barcode = barcode / barcode.max() # Modulate the filtration function with the persistence barcode result = filtration * (0.7 + 0.3 * barcode) # Scale to desired range result = start_value + (end_value - start_value) * result return (result,) class sigmas_normalizing_flows: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "steps": ("INT", {"default": 30, "min": 5, "max": 100, "step": 1}), "start_value": ("FLOAT", {"default": 10.0, "min": 0.1, "max": 50.0, "step": 0.1}), "end_value": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 10.0, "step": 0.01}), "flow_type": (["affine", "planar", "radial", "realnvp"], {"default": "realnvp"}), "num_transforms": ("INT", {"default": 3, "min": 1, "max": 10, "step": 1}), "seed": ("INT", {"default": 42, "min": 0, "max": 99999, "step": 1}) } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS",) CATEGORY = "RES4LYF/sigmas" def main(self, steps, start_value, end_value, flow_type, num_transforms, seed): # Set random seed for reproducibility torch.manual_seed(seed) # Create base linear schedule from start_value to end_value base_schedule = torch.linspace(start_value, end_value, steps) # Apply different normalizing flow transformations if flow_type == "affine": # Affine transformation: f(x) = a*x + b result = base_schedule.clone() for _ in range(num_transforms): a = torch.rand(1) * 0.5 + 0.75 # Scale in [0.75, 1.25] b = (torch.rand(1) - 0.5) * 0.2 # Shift in [-0.1, 0.1] result = a * result + b elif flow_type == "planar": # Planar flow: f(x) = x + u * tanh(w * x + b) result = base_schedule.clone() for _ in range(num_transforms): u = torch.rand(1) * 0.4 - 0.2 # in [-0.2, 0.2] w = torch.rand(1) * 2 - 1 # in [-1, 1] b = torch.rand(1) * 0.2 - 0.1 # in [-0.1, 0.1] result = result + u * torch.tanh(w * result + b) elif flow_type == "radial": # Radial flow: f(x) = x + beta * (x - x0) / (alpha + |x - x0|) result = base_schedule.clone() for _ in range(num_transforms): # Pick a random reference point within the range idx = torch.randint(0, steps, (1,)) x0 = result[idx] alpha = torch.rand(1) * 0.5 + 0.5 # in [0.5, 1.0] beta = torch.rand(1) * 0.4 - 0.2 # in [-0.2, 0.2] # Apply radial flow diff = result - x0 r = torch.abs(diff) result = result + beta * diff / (alpha + r) elif flow_type == "realnvp": # Simplified RealNVP-inspired flow with masking result = base_schedule.clone() for _ in range(num_transforms): # Create alternating mask mask = torch.zeros(steps) mask[::2] = 1 # Mask even indices # Generate scale and shift parameters log_scale = torch.rand(steps) * 0.2 - 0.1 # in [-0.1, 0.1] shift = torch.rand(steps) * 0.2 - 0.1 # in [-0.1, 0.1] # Apply affine coupling transformation scale = torch.exp(log_scale * mask) masked_shift = shift * mask # Transform result = result * scale + masked_shift # Rescale to ensure we maintain start_value and end_value if result[0] != start_value or result[-1] != end_value: result = (result - result[0]) / (result[-1] - result[0]) * (end_value - start_value) + start_value return (result,) class sigmas_split_value: def __init__(self): pass @classmethod def INPUT_TYPES(s): return { "required": { "sigmas": ("SIGMAS",), "split_value": ("FLOAT", {"default": 0.875, "min": 0.0, "max": 80085.0, "step": 0.001}), "bias_split_up": ("BOOLEAN", {"default": False, "tooltip": "If True, split happens above the split value, so high_sigmas includes the split point."}), } } FUNCTION = "main" RETURN_TYPES = ("SIGMAS", "SIGMAS") RETURN_NAMES = ("high_sigmas", "low_sigmas") CATEGORY = "RES4LYF/sigmas" DESCRIPTION = ("Splits sigma schedule at a specific sigma value.") def main(self, sigmas, split_value, bias_split_up): if len(sigmas) == 0: return (sigmas, sigmas) # Find the split index if bias_split_up: # Find first sigma <= split_value split_idx = None for i, sigma in enumerate(sigmas): if sigma <= split_value: split_idx = i break if split_idx is None: # All sigmas are above split_value return (sigmas, torch.tensor([], device=sigmas.device, dtype=sigmas.dtype)) # high_sigmas: from start to split_idx (inclusive) # low_sigmas: from split_idx to end high_sigmas = sigmas[:split_idx + 1] low_sigmas = sigmas[split_idx:] else: # Find first sigma < split_value split_idx = None for i, sigma in enumerate(sigmas): if sigma < split_value: split_idx = i break if split_idx is None: # All sigmas are >= split_value return (torch.tensor([], device=sigmas.device, dtype=sigmas.dtype), sigmas) # high_sigmas: from start to split_idx (exclusive) # low_sigmas: from split_idx-1 to end (includes the boundary point) high_sigmas = sigmas[:split_idx] low_sigmas = sigmas[split_idx - 1:] return (high_sigmas, low_sigmas) def get_bong_tangent_sigmas(steps, slope, pivot, start, end): smax = ((2/pi)*atan(-slope*(0-pivot))+1)/2 smin = ((2/pi)*atan(-slope*((steps-1)-pivot))+1)/2 srange = smax-smin sscale = start - end sigmas = [ ( (((2/pi)*atan(-slope*(x-pivot))+1)/2) - smin) * (1/srange) * sscale + end for x in range(steps)] return sigmas def bong_tangent_scheduler(model_sampling, steps, start=1.0, middle=0.5, end=0.0, pivot_1=0.6, pivot_2=0.6, slope_1=0.2, slope_2=0.2, pad=False): steps += 2 midpoint = int( (steps*pivot_1 + steps*pivot_2) / 2 ) pivot_1 = int(steps * pivot_1) pivot_2 = int(steps * pivot_2) slope_1 = slope_1 / (steps/40) slope_2 = slope_2 / (steps/40) stage_2_len = steps - midpoint stage_1_len = steps - stage_2_len tan_sigmas_1 = get_bong_tangent_sigmas(stage_1_len, slope_1, pivot_1, start, middle) tan_sigmas_2 = get_bong_tangent_sigmas(stage_2_len, slope_2, pivot_2 - stage_1_len, middle, end) tan_sigmas_1 = tan_sigmas_1[:-1] if pad: tan_sigmas_2 = tan_sigmas_2+[0] tan_sigmas = torch.tensor(tan_sigmas_1 + tan_sigmas_2) return tan_sigmas ================================================ FILE: style_transfer.py ================================================ import torch import torch.nn.functional as F import torch.nn as nn from torch import Tensor, FloatTensor from typing import Optional, Callable, Tuple, Dict, List, Any, Union import einops from einops import rearrange import copy import comfy from .latents import gaussian_blur_2d, median_blur_2d # WIP... not yet in use... class StyleTransfer: def __init__(self, style_method = "WCT", embedder_method = None, patch_size = 1, pinv_dtype = torch.float64, dtype = torch.float64, ): self.style_method = style_method self.embedder_method = None self.unembedder_method = None if embedder_method is not None: self.set_embedder_method(embedder_method) self.patch_size = patch_size #if embedder_type == "conv2d": # self.unembedder = self.invert_conv2d self.pinv_dtype = pinv_dtype self.dtype = dtype self.patchify = None self.unpatchify = None self.orig_shape = None self.grid_sizes = None #self.x_embed_ndim = 0 def set_patchify_method(self, patchify_method=None): self.patchify_method = patchify_method def set_unpatchify_method(self, unpatchify_method=None): self.unpatchify_method = unpatchify_method def set_embedder_method(self, embedder_method): self.embedder_method = copy.deepcopy(embedder_method).to(self.pinv_dtype) self.W = self.embedder_method.weight self.B = self.embedder_method.bias if isinstance(embedder_method, nn.Linear): self.unembedder_method = self.invert_linear elif isinstance(embedder_method, nn.Conv2d): self.unembedder_method = self.invert_conv2d elif isinstance(embedder_method, nn.Conv3d): self.unembedder_method = self.invert_conv3d def set_patch_size(self, patch_size): self.patch_size = patch_size def unpatchify(self, x: Tensor) -> List[Tensor]: x_arr = [] for i, img_size in enumerate(self.img_sizes): # [[64,64]] , img_sizes: List[Tuple[int, int]] pH, pW = img_size x_arr.append( einops.rearrange(x[i, :pH*pW].reshape(1, pH, pW, -1), 'B H W (p1 p2 C) -> B C (H p1) (W p2)', p1=self.patch_size, p2=self.patch_size) ) x = torch.cat(x_arr, dim=0) return x def patchify(self, x: Tensor): x = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size)) pH, pW = x.shape[-2] // self.patch_size, x.shape[-1] // self.patch_size self.img_sizes = [[pH, pW]] * x.shape[0] x = einops.rearrange(x, 'B C (H p1) (W p2) -> B (H W) (p1 p2 C)', p1=self.patch_size, p2=self.patch_size) return x def embedder(self, x): if isinstance(self.embedder_method, nn.Linear): x = self.patchify(x) self.orig_shape = x.shape x = self.embedder_method(x) self.grid_sizes = x.shape[2:] #self.x_embed_ndim = x.ndim #if x.ndim > 3: # x = einops.rearrange(x, "B C H W -> B (H W) C") return x def unembedder(self, x): #if self.x_embed_ndim > 3: # x = einops.rearrange(x, "B (H W) C -> B C H W", W=self.orig_shape[-1]) x = self.unembedder_method(x) return x def invert_linear(self, x : torch.Tensor,) -> torch.Tensor: x = x.to(self.pinv_dtype) #x = (x - self.B.to(self.dtype)) @ torch.linalg.pinv(self.W.to(self.pinv_dtype)).T.to(self.dtype) x = (x - self.B) @ torch.linalg.pinv(self.W).T return x.to(self.dtype) def invert_conv2d(self, z: torch.Tensor,) -> torch.Tensor: z = z.to(self.pinv_dtype) conv = self.embedder_method B, C_in, H, W = self.orig_shape C_out, _, kH, kW = conv.weight.shape stride_h, stride_w = conv.stride pad_h, pad_w = conv.padding b = conv.bias.view(1, C_out, 1, 1).to(z) z_nobias = z - b W_flat = conv.weight.view(C_out, -1).to(z) W_pinv = torch.linalg.pinv(W_flat) Bz, Co, Hp, Wp = z_nobias.shape z_flat = z_nobias.reshape(Bz, Co, -1) x_patches = W_pinv @ z_flat x_sum = F.fold( x_patches, output_size=(H + 2*pad_h, W + 2*pad_w), kernel_size=(kH, kW), stride=(stride_h, stride_w), ) ones = torch.ones_like(x_patches) count = F.fold( ones, output_size=(H + 2*pad_h, W + 2*pad_w), kernel_size=(kH, kW), stride=(stride_h, stride_w), ) x_recon = x_sum / count.clamp(min=1e-6) if pad_h > 0 or pad_w > 0: x_recon = x_recon[..., pad_h:pad_h+H, pad_w:pad_w+W] return x_recon.to(self.dtype) def invert_conv3d(self, z: torch.Tensor, ) -> torch.Tensor: z = z.to(self.pinv_dtype) conv = self.embedder_method grid_sizes = self.grid_sizes B, C_in, D, H, W = self.orig_shape pD, pH, pW = self.patch_size sD, sH, sW = pD, pH, pW if z.ndim == 3: # [B, S, C_out] -> reshape to [B, C_out, D', H', W'] S = z.shape[1] if grid_sizes is None: Dp = D // pD Hp = H // pH # getting actual patchified dims Wp = W // pW else: Dp, Hp, Wp = grid_sizes C_out = z.shape[2] z = z.transpose(1, 2).reshape(B, C_out, Dp, Hp, Wp) else: B2, C_out, Dp, Hp, Wp = z.shape assert B2 == B, "Batch size mismatch... ya sharked it." b = conv.bias.view(1, C_out, 1, 1, 1) # need to kncokout bias to invert via weight z_nobias = z - b # 2D filter -> pinv w3 = conv.weight # [C_out, C_in, 1, pH, pW] w2 = w3.squeeze(2) # [C_out, C_in, pH, pW] out_ch, in_ch, kH, kW = w2.shape W_flat = w2.view(out_ch, -1) # [C_out, in_ch*pH*pW] W_pinv = torch.linalg.pinv(W_flat) # [in_ch*pH*pW, C_out] # merge depth for 2D unfold wackiness z2 = z_nobias.permute(0,2,1,3,4).reshape(B*Dp, C_out, Hp, Wp) # apply pinv ... get patch vectors z_flat = z2.reshape(B*Dp, C_out, -1) # [B*Dp, C_out, L] x_patches = W_pinv @ z_flat # [B*Dp, in_ch*pH*pW, L] # fold -> restore spatial frames x2 = F.fold( x_patches, output_size=(H, W), kernel_size=(pH, pW), stride=(sH, sW) ) # → [B*Dp, C_in, H, W] # unmerge depth (de-depth charge) x2 = x2.reshape(B, Dp, in_ch, H, W) # [B, Dp, C_in, H, W] x_recon = x2.permute(0,2,1,3,4).contiguous() # [B, C_in, D, H, W] return x_recon.to(self.dtype) def adain_seq_inplace(self, content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: mean_c = content.mean(1, keepdim=True) std_c = content.std (1, keepdim=True).add_(eps) mean_s = style.mean (1, keepdim=True) std_s = style.std (1, keepdim=True).add_(eps) content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) return content class StyleWCT: def __init__(self, dtype=torch.float64, use_svd=False,): self.dtype = dtype self.use_svd = use_svd self.y0_adain_embed = None self.mu_s = None self.y0_color = None self.spatial_shape = None def whiten(self, f_s_centered: torch.Tensor, set=False): cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1) if self.use_svd: U_svd, S_svd, Vh_svd = torch.linalg.svd(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) S_eig = S_svd U_eig = U_svd else: S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) if set: S_eig_root = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values else: S_eig_root = S_eig.clamp(min=0).rsqrt() # inverse square root whiten = U_eig @ torch.diag(S_eig_root) @ U_eig.T return whiten.to(f_s_centered) def set(self, y0_adain_embed: torch.Tensor, spatial_shape=None): if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: self.y0_adain_embed = y0_adain_embed.clone() if spatial_shape is not None: self.spatial_shape = spatial_shape f_s = y0_adain_embed[0] # if y0_adain_embed.ndim > 4 else y0_adain_embed self.mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - self.mu_s self.y0_color = self.whiten(f_s_centered, set=True) def get(self, denoised_embed: torch.Tensor): for wct_i in range(denoised_embed.shape[0]): f_c = denoised_embed[wct_i] mu_c = f_c.mean(dim=0, keepdim=True) f_c_centered = f_c - mu_c whiten = self.whiten(f_c_centered) f_c_whitened = f_c_centered @ whiten.T f_cs = f_c_whitened @ self.y0_color.T + self.mu_s denoised_embed[wct_i] = f_cs return denoised_embed class WaveletStyleWCT(StyleWCT): def set(self, y0_adain_embed: torch.Tensor, h_len, w_len): if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: self.y0_adain_embed = y0_adain_embed.clone() B, HW, C = y0_adain_embed.shape LL, _, _, _ = haar_wavelet_decompose(y0_adain_embed.contiguous().view(B, C, h_len, w_len)) B_LL, C_LL, H_LL, W_LL = LL.shape #flat = rearrange(LL, 'b c h w -> b (h w) c') flat = LL.contiguous().view(B_LL, H_LL * W_LL, C_LL) f_s = flat[0] # assuming batch size 1 or using only the first self.mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - self.mu_s self.y0_color = self.whiten(f_s_centered, set=True) #self.y0_adain_embed = flat # cache if needed def get(self, denoised_embed: torch.Tensor, h_len, w_len, stylize_highfreq=False): B, HW, C = denoised_embed.shape denoised_embed = denoised_embed.contiguous().view(B, C, h_len, w_len) for i in range(B): x = denoised_embed[i:i+1] # [1, C, H, W] LL, LH, HL, HH = haar_wavelet_decompose(x) def process_band(band): Bc, Cc, Hc, Wc = band.shape flat = band.contiguous().view(Bc, Hc * Wc, Cc) styled = super(WaveletStyleWCT, self).get(flat) return styled.contiguous().view(Bc, Cc, Hc, Wc) LL_styled = process_band(LL) if stylize_highfreq: LH_styled = process_band(LH) HL_styled = process_band(HL) HH_styled = process_band(HH) else: LH_styled, HL_styled, HH_styled = LH, HL, HH recon = haar_wavelet_reconstruct(LL_styled, LH_styled, HL_styled, HH_styled) denoised_embed[i] = recon.squeeze(0) return denoised_embed.view(B, HW, C) def haar_wavelet_decompose(x): """ Orthonormal Haar decomposition. Input: [B, C, H, W] Output: LL, LH, HL, HH with shape [B, C, H//2, W//2] """ if x.dtype != torch.float32: x = x.float() B, C, H, W = x.shape assert H % 2 == 0 and W % 2 == 0, "Input must have even H, W" # Precompute norm = 1 / 2**0.5 x00 = x[:, :, 0::2, 0::2] x01 = x[:, :, 0::2, 1::2] x10 = x[:, :, 1::2, 0::2] x11 = x[:, :, 1::2, 1::2] LL = (x00 + x01 + x10 + x11) * norm * 0.5 LH = (x00 - x01 + x10 - x11) * norm * 0.5 HL = (x00 + x01 - x10 - x11) * norm * 0.5 HH = (x00 - x01 - x10 + x11) * norm * 0.5 return LL, LH, HL, HH def haar_wavelet_reconstruct(LL, LH, HL, HH): """ Orthonormal inverse Haar reconstruction. Input: LL, LH, HL, HH [B, C, H, W] Output: Reconstructed [B, C, H*2, W*2] """ norm = 1 / 2**0.5 B, C, H, W = LL.shape x00 = (LL + LH + HL + HH) * norm x01 = (LL - LH + HL - HH) * norm x10 = (LL + LH - HL - HH) * norm x11 = (LL - LH - HL + HH) * norm out = torch.zeros(B, C, H * 2, W * 2, device=LL.device, dtype=LL.dtype) out[:, :, 0::2, 0::2] = x00 out[:, :, 0::2, 1::2] = x01 out[:, :, 1::2, 0::2] = x10 out[:, :, 1::2, 1::2] = x11 return out """ class StyleFeatures: def __init__(self, dtype=torch.float64,): self.dtype = dtype def set(self, y0_adain_embed: torch.Tensor): def get(self, denoised_embed: torch.Tensor): return "Norpity McNerp" """ class Retrojector: def __init__(self, proj=None, patch_size=2, pinv_dtype=torch.float64, dtype=torch.float64, ENDO=False): self.proj = proj self.patch_size = patch_size self.pinv_dtype = pinv_dtype self.dtype = dtype self.LINEAR = isinstance(proj, nn.Linear) self.CONV2D = isinstance(proj, nn.Conv2d) self.CONV3D = isinstance(proj, nn.Conv3d) self.ENDO = ENDO self.W = proj.weight.data.to(dtype=pinv_dtype).cuda() if self.LINEAR: self.W_inv = torch.linalg.pinv(self.W.cuda()) elif self.CONV2D: C_out, _, kH, kW = proj.weight.shape W_flat = proj.weight.view(C_out, -1).to(dtype=pinv_dtype) self.W_inv = torch.linalg.pinv(W_flat.cuda()) if proj.bias is None: if self.LINEAR: bias_size = proj.out_features else: bias_size = proj.out_channels self.b = torch.zeros(bias_size, dtype=pinv_dtype, device=self.W_inv.device) else: self.b = proj.bias.data.to(dtype=pinv_dtype).to(self.W_inv.device) def embed(self, img: torch.Tensor): self.h = img.shape[-2] // self.patch_size self.w = img.shape[-1] // self.patch_size img = comfy.ldm.common_dit.pad_to_patch_size(img, (self.patch_size, self.patch_size)) if self.CONV2D: self.orig_shape = img.shape # for unembed img_embed = F.conv2d( img.to(self.W), weight=self.W, bias=self.b, stride=self.proj.stride, padding=self.proj.padding ) #img_embed = rearrange(img_embed, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=self.patch_size, pw=self.patch_size) img_embed = rearrange(img_embed, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=1, pw=1) elif self.LINEAR: if img.ndim == 4: img = rearrange(img, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=self.patch_size, pw=self.patch_size) if self.ENDO: img_embed = F.linear(img.to(self.b) - self.b, self.W_inv) else: img_embed = F.linear(img.to(self.W), self.W, self.b) return img_embed.to(img) def unembed(self, img_embed: torch.Tensor): if self.CONV2D: #img_embed = rearrange(img_embed, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=self.h, w=self.w, ph=self.patch_size, pw=self.patch_size) img_embed = rearrange(img_embed, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=self.h, w=self.w, ph=1, pw=1) img = self.invert_conv2d(img_embed) elif self.LINEAR: if self.ENDO: img = F.linear(img_embed.to(self.W), self.W, self.b) else: img = F.linear(img_embed.to(self.b) - self.b, self.W_inv) if img.ndim == 3: img = rearrange(img, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=self.h, w=self.w, ph=self.patch_size, pw=self.patch_size) return img.to(img_embed) def invert_conv2d(self, z: torch.Tensor,) -> torch.Tensor: z_dtype = z.dtype z = z.to(self.pinv_dtype) conv = self.proj B, C_in, H, W = self.orig_shape C_out, _, kH, kW = conv.weight.shape stride_h, stride_w = conv.stride pad_h, pad_w = conv.padding b = conv.bias.view(1, C_out, 1, 1).to(z) z_nobias = z - b #W_flat = conv.weight.view(C_out, -1).to(z) #W_pinv = torch.linalg.pinv(W_flat) Bz, Co, Hp, Wp = z_nobias.shape z_flat = z_nobias.reshape(Bz, Co, -1) x_patches = self.W_inv @ z_flat x_sum = F.fold( x_patches, output_size=(H + 2*pad_h, W+ 2*pad_w), kernel_size=(kH, kW), stride=(stride_h, stride_w), ) ones = torch.ones_like(x_patches) count = F.fold( ones, output_size=(H + 2*pad_h, W + 2*pad_w), kernel_size=(kH, kW), stride=(stride_h, stride_w), ) x_recon = x_sum / count.clamp(min=1e-6) if pad_h > 0 or pad_w > 0: x_recon = x_recon[..., pad_h:pad_h+H, pad_w:pad_w+W] return x_recon.to(z_dtype) def invert_patch_embedding(self, z: torch.Tensor, original_shape: torch.Size, grid_sizes: Optional[Tuple[int,int,int]] = None) -> torch.Tensor: B, C_in, D, H, W = original_shape pD, pH, pW = self.patch_size sD, sH, sW = pD, pH, pW if z.ndim == 3: # [B, S, C_out] -> reshape to [B, C_out, D', H', W'] S = z.shape[1] if grid_sizes is None: Dp = D // pD Hp = H // pH Wp = W // pW else: Dp, Hp, Wp = grid_sizes C_out = z.shape[2] z = z.transpose(1, 2).reshape(B, C_out, Dp, Hp, Wp) else: B2, C_out, Dp, Hp, Wp = z.shape assert B2 == B, "Batch size mismatch... ya sharked it." # kncokout bias b = self.patch_embedding.bias.view(1, C_out, 1, 1, 1) z_nobias = z - b # 2D filter -> pinv w3 = self.patch_embedding.weight # [C_out, C_in, 1, pH, pW] w2 = w3.squeeze(2) # [C_out, C_in, pH, pW] out_ch, in_ch, kH, kW = w2.shape W_flat = w2.view(out_ch, -1) # [C_out, in_ch*pH*pW] W_pinv = torch.linalg.pinv(W_flat) # [in_ch*pH*pW, C_out] # merge depth for 2D unfold wackiness z2 = z_nobias.permute(0,2,1,3,4).reshape(B*Dp, C_out, Hp, Wp) # apply pinv ... get patch vectors z_flat = z2.reshape(B*Dp, C_out, -1) # [B*Dp, C_out, L] x_patches = W_pinv @ z_flat # [B*Dp, in_ch*pH*pW, L] # fold -> spatial frames x2 = F.fold( x_patches, output_size=(H, W), kernel_size=(pH, pW), stride=(sH, sW) ) # → [B*Dp, C_in, H, W] # un-merge depth x2 = x2.reshape(B, Dp, in_ch, H, W) # [B, Dp, C_in, H, W] x_recon = x2.permute(0,2,1,3,4).contiguous() # [B, C_in, D, H, W] return x_recon def invert_conv2d( conv: torch.nn.Conv2d, z: torch.Tensor, original_shape: torch.Size, ) -> torch.Tensor: import torch.nn.functional as F B, C_in, H, W = original_shape C_out, _, kH, kW = conv.weight.shape stride_h, stride_w = conv.stride pad_h, pad_w = conv.padding if conv.bias is not None: b = conv.bias.view(1, C_out, 1, 1).to(z) z_nobias = z - b else: z_nobias = z W_flat = conv.weight.view(C_out, -1).to(z) W_pinv = torch.linalg.pinv(W_flat) Bz, Co, Hp, Wp = z_nobias.shape z_flat = z_nobias.reshape(Bz, Co, -1) x_patches = W_pinv @ z_flat x_sum = F.fold( x_patches, output_size=(H + 2*pad_h, W + 2*pad_w), kernel_size=(kH, kW), stride=(stride_h, stride_w), ) ones = torch.ones_like(x_patches) count = F.fold( ones, output_size=(H + 2*pad_h, W + 2*pad_w), kernel_size=(kH, kW), stride=(stride_h, stride_w), ) x_recon = x_sum / count.clamp(min=1e-6) if pad_h > 0 or pad_w > 0: x_recon = x_recon[..., pad_h:pad_h+H, pad_w:pad_w+W] return x_recon def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, dim=1, eps: float = 1e-7) -> torch.Tensor: mean_c = content.mean(dim, keepdim=True) std_c = content.std (dim, keepdim=True).add_(eps) # in-place add mean_s = style.mean (dim, keepdim=True) std_s = style.std (dim, keepdim=True).add_(eps) content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain return content def adain_seq(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: return ((content - content.mean(1, keepdim=True)) / (content.std(1, keepdim=True) + eps)) * (style.std(1, keepdim=True) + eps) + style.mean(1, keepdim=True) def apply_scattersort_tiled( denoised_spatial : torch.Tensor, y0_adain_spatial : torch.Tensor, tile_h : int, tile_w : int, pad : int, ): """ Apply spatial scattersort between denoised_spatial and y0_adain_spatial using local tile-wise sorted value matching. Args: denoised_spatial (Tensor): (B, C, H, W) tensor. y0_adain_spatial (Tensor): (B, C, H, W) reference tensor. tile_h (int): tile height. tile_w (int): tile width. pad (int): padding size to apply around tiles. Returns: denoised_embed (Tensor): (B, H*W, C) tensor after sortmatch. """ denoised_padded = F.pad(denoised_spatial, (pad, pad, pad, pad), mode='reflect') y0_padded = F.pad(y0_adain_spatial, (pad, pad, pad, pad), mode='reflect') denoised_padded_out = denoised_padded.clone() _, _, h_len, w_len = denoised_spatial.shape for ix in range(pad, h_len, tile_h): for jx in range(pad, w_len, tile_w): tile = denoised_padded[:, :, ix - pad:ix + tile_h + pad, jx - pad:jx + tile_w + pad] y0_tile = y0_padded[:, :, ix - pad:ix + tile_h + pad, jx - pad:jx + tile_w + pad] tile = rearrange(tile, "b c h w -> b c (h w)", h=tile_h + pad * 2, w=tile_w + pad * 2) y0_tile = rearrange(y0_tile, "b c h w -> b c (h w)", h=tile_h + pad * 2, w=tile_w + pad * 2) src_sorted, src_idx = tile.sort(dim=-1) ref_sorted, ref_idx = y0_tile.sort(dim=-1) new_tile = tile.scatter(dim=-1, index=src_idx, src=ref_sorted.expand(src_sorted.shape)) new_tile = rearrange(new_tile, "b c (h w) -> b c h w", h=tile_h + pad * 2, w=tile_w + pad * 2) denoised_padded_out[:, :, ix:ix + tile_h, jx:jx + tile_w] = ( new_tile if pad == 0 else new_tile[:, :, pad:-pad, pad:-pad] ) denoised_padded_out = denoised_padded_out if pad == 0 else denoised_padded_out[:, :, pad:-pad, pad:-pad] return denoised_padded_out def apply_scattersort_masked( denoised_embed : torch.Tensor, y0_adain_embed : torch.Tensor, y0_style_pos_mask : torch.Tensor | None, y0_style_pos_mask_edge : torch.Tensor | None, h_len : int, w_len : int ): if y0_style_pos_mask is None: flatmask = torch.ones((1,1,h_len,w_len)).bool().flatten().bool() else: flatmask = F.interpolate(y0_style_pos_mask, size=(h_len, w_len)).bool().flatten().cpu() flatunmask = ~flatmask if y0_style_pos_mask_edge is not None: edgemask = F.interpolate( y0_style_pos_mask_edge.unsqueeze(0), size=(h_len, w_len) ).bool().flatten() flatmask = flatmask & (~edgemask) flatunmask = flatunmask & (~edgemask) denoised_masked = denoised_embed[:, flatmask, :].clone() y0_adain_masked = y0_adain_embed[:, flatmask, :].clone() src_sorted, src_idx = denoised_masked.sort(dim=-2) ref_sorted, ref_idx = y0_adain_masked.sort(dim=-2) denoised_embed[:, flatmask, :] = src_sorted.scatter(dim=-2, index=src_idx, src=ref_sorted.expand(src_sorted.shape)) if (flatunmask == True).any(): denoised_unmasked = denoised_embed[:, flatunmask, :].clone() y0_adain_unmasked = y0_adain_embed[:, flatunmask, :].clone() src_sorted, src_idx = denoised_unmasked.sort(dim=-2) ref_sorted, ref_idx = y0_adain_unmasked.sort(dim=-2) denoised_embed[:, flatunmask, :] = src_sorted.scatter(dim=-2, index=src_idx, src=ref_sorted.expand(src_sorted.shape)) if y0_style_pos_mask_edge is not None: denoised_edgemasked = denoised_embed[:, edgemask, :].clone() y0_adain_edgemasked = y0_adain_embed[:, edgemask, :].clone() src_sorted, src_idx = denoised_edgemasked.sort(dim=-2) ref_sorted, ref_idx = y0_adain_edgemasked.sort(dim=-2) denoised_embed[:, edgemask, :] = src_sorted.scatter(dim=-2, index=src_idx, src=ref_sorted.expand(src_sorted.shape)) return denoised_embed def apply_scattersort( denoised_embed : torch.Tensor, y0_adain_embed : torch.Tensor, ): #src_sorted, src_idx = denoised_embed.cpu().sort(dim=-2) src_idx = denoised_embed.argsort(dim=-2) ref_sorted = y0_adain_embed.sort(dim=-2)[0] denoised_embed.scatter_(dim=-2, index=src_idx, src=ref_sorted.expand(ref_sorted.shape)) return denoised_embed def apply_scattersort_spatial( denoised_spatial : torch.Tensor, y0_adain_spatial : torch.Tensor, ): denoised_embed = rearrange(denoised_spatial, "b c h w -> b (h w) c") y0_adain_embed = rearrange(y0_adain_spatial, "b c h w -> b (h w) c") src_sorted, src_idx = denoised_embed.sort(dim=-2) ref_sorted, ref_idx = y0_adain_embed.sort(dim=-2) denoised_embed = src_sorted.scatter(dim=-2, index=src_idx, src=ref_sorted.expand(src_sorted.shape)) return rearrange(denoised_embed, "b (h w) c -> b c h w", h=denoised_spatial.shape[-2], w=denoised_spatial.shape[-1]) def apply_scattersort_spatial( x_spatial : torch.Tensor, y_spatial : torch.Tensor, ): x_emb = rearrange(x_spatial, "b c h w -> b (h w) c") y_emb = rearrange(y_spatial, "b c h w -> b (h w) c") x_sorted, x_idx = x_emb.sort(dim=-2) y_sorted, y_idx = y_emb.sort(dim=-2) x_emb = x_sorted.scatter(dim=-2, index=x_idx, src=y_sorted.expand(x_sorted.shape)) return rearrange(x_emb, "b (h w) c -> b c h w", h=x_spatial.shape[-2], w=x_spatial.shape[-1]) def apply_adain_spatial( x_spatial : torch.Tensor, y_spatial : torch.Tensor, ): x_emb = rearrange(x_spatial, "b c h w -> b (h w) c") y_emb = rearrange(y_spatial, "b c h w -> b (h w) c") x_mean = x_emb.mean(-2, keepdim=True) x_std = x_emb.std (-2, keepdim=True) y_mean = y_emb.mean(-2, keepdim=True) y_std = y_emb.std (-2, keepdim=True) assert (x_std == 0).any() == 0, "Target tensor has no variance!" assert (y_std == 0).any() == 0, "Reference tensor has no variance!" x_emb_adain = (x_emb - x_mean) / x_std x_emb_adain = (x_emb_adain * y_std) + y_mean return x_emb_adain.reshape_as(x_spatial) def adain_patchwise(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5) -> torch.Tensor: # this one is really slow B, C, H, W = content.shape device = content.device dtype = content.dtype if kernel_size is None: kernel_size = int(2 * math.ceil(3 * sigma) + 1) if kernel_size % 2 == 0: kernel_size += 1 pad = kernel_size // 2 coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad gauss = torch.exp(-0.5 * (coords / sigma) ** 2) gauss /= gauss.sum() kernel_2d = (gauss[:, None] * gauss[None, :]).to(dtype=dtype) weight = kernel_2d.view(1, 1, kernel_size, kernel_size) content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect') style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect') result = torch.zeros_like(content) for i in range(H): for j in range(W): c_patch = content_padded[:, :, i:i + kernel_size, j:j + kernel_size] s_patch = style_padded[:, :, i:i + kernel_size, j:j + kernel_size] w = weight.expand_as(c_patch) c_mean = (c_patch * w).sum(dim=(-1, -2), keepdim=True) c_std = ((c_patch - c_mean)**2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps s_mean = (s_patch * w).sum(dim=(-1, -2), keepdim=True) s_std = ((s_patch - s_mean)**2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps normed = (c_patch[:, :, pad:pad+1, pad:pad+1] - c_mean) / c_std stylized = normed * s_std + s_mean result[:, :, i, j] = stylized.squeeze(-1).squeeze(-1) return result def adain_patchwise_row_batch(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5) -> torch.Tensor: B, C, H, W = content.shape device, dtype = content.device, content.dtype if kernel_size is None: kernel_size = int(2 * math.ceil(3 * sigma) + 1) if kernel_size % 2 == 0: kernel_size += 1 pad = kernel_size // 2 coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad gauss = torch.exp(-0.5 * (coords / sigma) ** 2) gauss = (gauss / gauss.sum()).to(dtype) kernel_2d = (gauss[:, None] * gauss[None, :]) weight = kernel_2d.view(1, 1, kernel_size, kernel_size) content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect') style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect') result = torch.zeros_like(content) for i in range(H): c_row_patches = torch.stack([ content_padded[:, :, i:i+kernel_size, j:j+kernel_size] for j in range(W) ], dim=0) # [W, B, C, k, k] s_row_patches = torch.stack([ style_padded[:, :, i:i+kernel_size, j:j+kernel_size] for j in range(W) ], dim=0) w = weight.expand_as(c_row_patches[0]) c_mean = (c_row_patches * w).sum(dim=(-1, -2), keepdim=True) c_std = ((c_row_patches - c_mean) ** 2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps s_mean = (s_row_patches * w).sum(dim=(-1, -2), keepdim=True) s_std = ((s_row_patches - s_mean) ** 2 * w).sum(dim=(-1, -2), keepdim=True).sqrt() + eps center = kernel_size // 2 central = c_row_patches[:, :, :, center:center+1, center:center+1] normed = (central - c_mean) / c_std stylized = normed * s_std + s_mean result[:, :, i, :] = stylized.squeeze(-1).squeeze(-1).permute(1, 2, 0) # [B,C,W] return result def adain_patchwise_row_batch_med(content: torch.Tensor, style: torch.Tensor, sigma: float = 1.0, kernel_size: int = None, eps: float = 1e-5, mask: torch.Tensor = None, use_median_blur: bool = False, lowpass_weight=1.0, highpass_weight=1.0) -> torch.Tensor: B, C, H, W = content.shape device, dtype = content.device, content.dtype if kernel_size is None: kernel_size = int(2 * math.ceil(3 * abs(sigma)) + 1) if kernel_size % 2 == 0: kernel_size += 1 pad = kernel_size // 2 content_padded = F.pad(content, (pad, pad, pad, pad), mode='reflect') style_padded = F.pad(style, (pad, pad, pad, pad), mode='reflect') result = torch.zeros_like(content) scaling = torch.ones((B, 1, H, W), device=device, dtype=dtype) sigma_scale = torch.ones((H, W), device=device, dtype=torch.float32) if mask is not None: with torch.no_grad(): padded_mask = F.pad(mask.float(), (pad, pad, pad, pad), mode="reflect") blurred_mask = F.avg_pool2d(padded_mask, kernel_size=kernel_size, stride=1, padding=pad) blurred_mask = blurred_mask[..., pad:-pad, pad:-pad] edge_proximity = blurred_mask * (1.0 - blurred_mask) scaling = 1.0 - (edge_proximity / 0.25).clamp(0.0, 1.0) sigma_scale = scaling[0, 0] # assuming single-channel mask broadcasted across B, C if not use_median_blur: coords = torch.arange(kernel_size, dtype=torch.float64, device=device) - pad base_gauss = torch.exp(-0.5 * (coords / sigma) ** 2) base_gauss = (base_gauss / base_gauss.sum()).to(dtype) gaussian_table = {} for s in sigma_scale.unique(): sig = float((sigma * s + eps).clamp(min=1e-3)) gauss_local = torch.exp(-0.5 * (coords / sig) ** 2) gauss_local = (gauss_local / gauss_local.sum()).to(dtype) kernel_2d = gauss_local[:, None] * gauss_local[None, :] gaussian_table[s.item()] = kernel_2d for i in range(H): row_result = torch.zeros(B, C, W, dtype=dtype, device=device) for j in range(W): c_patch = content_padded[:, :, i:i+kernel_size, j:j+kernel_size] s_patch = style_padded[:, :, i:i+kernel_size, j:j+kernel_size] if use_median_blur: # Median blur with residual restoration unfolded_c = c_patch.reshape(B, C, -1) unfolded_s = s_patch.reshape(B, C, -1) c_median = unfolded_c.median(dim=-1, keepdim=True).values s_median = unfolded_s.median(dim=-1, keepdim=True).values center = kernel_size // 2 central = c_patch[:, :, center, center].view(B, C, 1) residual = central - c_median stylized = lowpass_weight * s_median + residual * highpass_weight else: k = gaussian_table[float(sigma_scale[i, j].item())] local_weight = k.view(1, 1, kernel_size, kernel_size).expand(B, C, kernel_size, kernel_size) c_mean = (c_patch * local_weight).sum(dim=(-1, -2), keepdim=True) c_std = ((c_patch - c_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps s_mean = (s_patch * local_weight).sum(dim=(-1, -2), keepdim=True) s_std = ((s_patch - s_mean) ** 2 * local_weight).sum(dim=(-1, -2), keepdim=True).sqrt() + eps center = kernel_size // 2 central = c_patch[:, :, center:center+1, center:center+1] normed = (central - c_mean) / c_std stylized = normed * s_std + s_mean local_scaling = scaling[:, :, i, j].view(B, 1, 1) stylized = central * (1 - local_scaling) + stylized * local_scaling row_result[:, :, j] = stylized.squeeze(-1) result[:, :, i, :] = row_result return result def weighted_mix_n(tensor_list, weight_list, dim=-1, offset=0): assert all(t.shape == tensor_list[0].shape for t in tensor_list) assert len(tensor_list) == len(weight_list) total_weight = sum(weight_list) ratios = [w / total_weight for w in weight_list] length = tensor_list[0].shape[dim] idx = torch.arange(length) # Create a bin index tensor based on weighted slots float_bins = (idx + offset) * len(ratios) / length bin_idx = torch.floor(float_bins).long() % len(ratios) # Allocate slots based on ratio using a cyclic pattern counters = [0.0 for _ in ratios] slots = torch.empty_like(idx) for i in range(length): # Assign to the group that's most under-allocated expected = [r * (i + 1) for r in ratios] errors = [expected[j] - counters[j] for j in range(len(ratios))] k = max(range(len(errors)), key=lambda j: errors[j]) slots[i] = k counters[k] += 1 # Create mask for each tensor out = tensor_list[0].clone() for i, tensor in enumerate(tensor_list): mask = slots == i while mask.dim() < tensor.dim(): mask = mask.unsqueeze(0) mask = mask.expand_as(tensor) out = torch.where(mask, tensor, out) return out from torch import vmap BLOCK_NAMES = {"double_blocks", "single_blocks", "up_blocks", "middle_blocks", "down_blocks", "input_blocks", "output_blocks"} DEFAULT_BLOCK_WEIGHTS_MMDIT = { "attn_norm" : 0.0, "attn_norm_mod": 0.0, "attn" : 1.0, "attn_gated" : 0.0, "attn_res" : 1.0, "ff_norm" : 0.0, "ff_norm_mod" : 0.0, "ff" : 1.0, "ff_gated" : 0.0, "ff_res" : 1.0, "h_tile" : 8, "w_tile" : 8, } DEFAULT_ATTN_WEIGHTS_MMDIT = { "q_proj": 0.0, "k_proj": 0.0, "v_proj": 1.0, "q_norm": 0.0, "k_norm": 0.0, "out" : 1.0, "h_tile": 8, "w_tile": 8, } DEFAULT_BASE_WEIGHTS_MMDIT = { "proj_in" : 1.0, "proj_out": 1.0, "h_tile" : 8, "w_tile" : 8, } class Stylizer: buffer = {} CLS_WCT = StyleWCT() CLS_WCT2 = WaveletStyleWCT() def __init__(self, dtype=torch.float64, device=torch.device("cuda")): self.dtype = dtype self.device = device self.mask = [None] self.apply_to = [""] self.method = ["passthrough"] self.h_tile = [-1] self.w_tile = [-1] self.w_len = 0 self.h_len = 0 self.img_len = 0 self.IMG_1ST = True self.HEADS = 0 self.KONTEXT = 0 def set_mode(self, mode): self.method = [mode] #[getattr(self, mode)] def set_weights(self, **kwargs): for k, v in kwargs.items(): if hasattr(self, k): setattr(self, k, [v]) def set_weights_recursive(self, **kwargs): for name, val in kwargs.items(): if hasattr(self, name): setattr(self, name, [val]) for attr_name, attr_val in vars(self).items(): if isinstance(attr_val, Stylizer): attr_val.set_weights_recursive(**kwargs) for list_name in BLOCK_NAMES: lst = getattr(self, list_name, None) if isinstance(lst, list): for element in lst: if isinstance(element, Stylizer): element.set_weights_recursive(**kwargs) def merge_weights(self, other): def recursive_merge(a, b, path): if isinstance(a, list) and isinstance(b, list): if path in BLOCK_NAMES: out = [] for i in range(max(len(a), len(b))): if i < len(a) and i < len(b): out.append(recursive_merge(a[i], b[i], path=None)) elif i < len(a): out.append(a[i]) else: out.append(b[i]) return out return a + b if isinstance(a, dict) and isinstance(b, dict): merged = dict(a) for k, v_b in b.items(): if k in merged: merged[k] = recursive_merge(merged[k], v_b, path=None) else: merged[k] = v_b return merged if hasattr(a, "__dict__") and hasattr(b, "__dict__"): for attr, val_b in vars(b).items(): val_a = getattr(a, attr, None) if val_a is not None: setattr(a, attr, recursive_merge(val_a, val_b, path=attr)) else: setattr(a, attr, val_b) return a return b for attr in vars(self): if attr in BLOCK_NAMES: merged = recursive_merge(getattr(self, attr), getattr(other, attr, []), path=attr) elif hasattr(other, attr): merged = recursive_merge(getattr(self, attr), getattr(other, attr), path=attr) else: continue setattr(self, attr, merged) def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): self.h_len = h_len self.w_len = w_len self.img_slice = img_slice self.txt_slice = txt_slice self.img_len = h_len * w_len self.HEADS = HEADS @staticmethod def middle_slice(length, weight): """ Returns a slice object that selects the middle `weight` fraction of a dimension. Example: weight=1.0 → full slice; weight=0.5 → middle 50% """ if weight >= 1.0: return slice(None) wr = int((length * (1 - weight)) // 2) return slice(wr, -wr if wr > 0 else None) @staticmethod def get_outer_slice(x, weight): if weight >= 0.0: return x length = x.shape[-2] wr = int((length * (1 - (-weight))) // 2) return torch.cat([x[...,:wr,:], x[...,-wr:,:]], dim=-2) @staticmethod def restore_outer_slice(x, x_outer, weight): if weight >= 0.0: return x length = x.shape[-2] wr = int((length * (1 - (-weight))) // 2) x[...,:wr,:] = x_outer[...,:wr,:] x[...,-wr:,:] = x_outer[...,-wr:,:] return x def __call__(self, x, attr): if x.shape[0] == 1 and not self.KONTEXT: return x weight_list = getattr(self, attr) weights_all_zero = all(weight == 0.0 for weight in weight_list) if weights_all_zero: return x #self.HEADS=24 #x_ndim = x.ndim #if x_ndim == 3: # B, HW, C = x.shape # if x.shape[-2] != self.HEADS and self.HEADS != 0: # x = x.reshape(B,self.HEADS,HW,-1) HEAD_DIM = x.shape[1] if HEAD_DIM == self.HEADS: B, HEAD_DIM, HW, C = x.shape x = x.reshape(B, HW, C*HEAD_DIM) if hasattr(self, "KONTEXT") and self.KONTEXT == 1: x = x.reshape(2, x.shape[1] // 2, x.shape[2]) txt_slice, img_slice, ktx_slice = self.txt_slice, self.img_slice, None if hasattr(self, "KONTEXT") and self.KONTEXT == 2: ktx_slice = self.img_slice # slice(2 * self.img_slice.start, None) img_slice = slice(2 * self.img_slice.start, self.img_slice.start) txt_slice = slice(None, 2 * self.txt_slice.stop) weights_all_one = all(weight == 1.0 for weight in weight_list) methods_all_scattersort = all(name == "scattersort" for name in self.method) masks_all_none = all(mask is None for mask in self.mask) if weights_all_one and methods_all_scattersort and len(weight_list) > 1 and masks_all_none: buf = Stylizer.buffer buf['src_idx'] = x[0:1].argsort(dim=-2) buf['ref_sorted'], buf['ref_idx'] = x[1:].reshape(1, -1, x.shape[-1]).sort(dim=-2) buf['src'] = buf['ref_sorted'][:,::len(weight_list)].expand_as(buf['src_idx']) # interleave_stride = len(weight_list) x[0:1] = x[0:1].scatter_(dim=-2, index=buf['src_idx'], src=buf['src'],) else: for i, (weight, mask) in enumerate(zip(weight_list, self.mask)): if mask is not None: x01 = x[0:1].clone() slc = Stylizer.middle_slice(x.shape[-2], weight) #slc = slice(None) txt_method_name = self.method[i].removeprefix("tiled_") txt_method = getattr(self, txt_method_name) method_name = self.method[i].removeprefix("tiled_") if self.img_len > x.shape[-2] or self.h_len < 0 else self.method[i] method = getattr(self, method_name) apply_to = self.apply_to[i] if weight == 0.0: continue else: # if weight == 1.0: if weight > 0 and weight < 1: x_clone = x.clone() if self.img_len == x.shape[-2] or apply_to == "img+txt" or self.h_len < 0: x = method(x, idx=i+1, slc=slc) elif self.img_len < x.shape[-2]: if "img" in apply_to: x[...,img_slice,:] = method(x[...,img_slice,:], idx=i+1, slc=slc) #if ktx_slice is not None: # x[...,ktx_slice,:] = method(x[...,ktx_slice,:], idx=i+1) #x[:,:self.img_len,:] = method(x[:,:self.img_len,:], idx=i+1) if "txt" in apply_to: x[...,txt_slice,:] = txt_method(x[...,txt_slice,:], idx=i+1, slc=slc) #x[:,self.img_len:,:] = method(x[:,self.img_len:,:], idx=i+1) if not "img" in apply_to and not "txt" in apply_to: pass else: x = method(x, idx=i+1, slc=slc) if weight > 0 and weight < 1 and txt_method_name != "scattersort": x = torch.lerp(x_clone, x, weight) #else: # x = torch.lerp(x, method(x.clone(), idx=i+1), weight) if mask is not None: x[0:1,...,img_slice,:] = torch.lerp(x01[...,img_slice,:], x[0:1,...,img_slice,:], mask.view(1, -1, 1)) if ktx_slice is not None: x[0:1,...,ktx_slice,:] = torch.lerp(x01[...,ktx_slice,:], x[0:1,...,ktx_slice,:], mask.view(1, -1, 1)) #x[0:1,:self.img_len] = torch.lerp(x01[:,:self.img_len], x[0:1,:self.img_len], mask.view(1, -1, 1)) #if x_ndim == 3: # return x.view(B,HW,C) if hasattr(self, "KONTEXT") and self.KONTEXT == 1: x = x.reshape(1, x.shape[1] * 2, x.shape[2]) if HEAD_DIM == self.HEADS: return x.reshape(B, HEAD_DIM, HW, C) else: return x def WCT(self, x, idx=1): Stylizer.CLS_WCT.set(x[idx:idx+1]) x[0:1] = Stylizer.CLS_WCT.get(x[0:1]) return x def WCT2(self, x, idx=1): Stylizer.CLS_WCT2.set(x[idx:idx+1], self.h_len, self.w_len) x[0:1] = Stylizer.CLS_WCT2.get(x[0:1], self.h_len, self.w_len) return x @staticmethod def AdaIN_(x, y, eps: float = 1e-7) -> torch.Tensor: mean_c = x.mean(-2, keepdim=True) std_c = x.std (-2, keepdim=True).add_(eps) # in-place add mean_s = y.mean (-2, keepdim=True) std_s = y.std (-2, keepdim=True).add_(eps) x.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain return x def AdaIN(self, x, idx=1, eps: float = 1e-7) -> torch.Tensor: mean_c = x[0:1].mean(-2, keepdim=True) std_c = x[0:1].std (-2, keepdim=True).add_(eps) # in-place add mean_s = x[idx:idx+1].mean (-2, keepdim=True) std_s = x[idx:idx+1].std (-2, keepdim=True).add_(eps) x[0:1].sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain return x def injection(self, x:torch.Tensor, idx=1) -> torch.Tensor: x[0:1] = x[idx:idx+1] return x @staticmethod def injection_(x:torch.Tensor, y:torch.Tensor) -> torch.Tensor: return y @staticmethod def passthrough(x:torch.Tensor, idx=1) -> torch.Tensor: return x @staticmethod def decompose_magnitude_direction(x, dim=-1, eps=1e-8): magnitude = x.norm(p=2, dim=dim, keepdim=True) direction = x / (magnitude + eps) return magnitude, direction @staticmethod def scattersort_dir_(x, y, dim=-2): #buf = Stylizer.buffer #buf['src_sorted'], buf['src_idx'] = x.sort(dim=-2) #buf['ref_sorted'], buf['ref_idx'] = y.sort(dim=-2) #mag, _ = Stylizer.decompose_magnitude_direction(buf['src_sorted'], dim) #_, dir = Stylizer.decompose_magnitude_direction(buf['ref_sorted'], dim) mag, _ = Stylizer.decompose_magnitude_direction(x.to(torch.float64), dim) buf = Stylizer.buffer buf['src_idx'] = x.argsort(dim=-2) buf['ref_sorted'], buf['ref_idx'] = y .sort(dim=-2) x.scatter_(dim=-2, index=buf['src_idx'], src=buf['ref_sorted'].expand_as(buf['src_idx'])) _, dir = Stylizer.decompose_magnitude_direction(x.to(torch.float64), dim) return (mag * dir).to(x) @staticmethod def scattersort_dir2_(x, y, dim=-2): #buf = Stylizer.buffer #buf['src_sorted'], buf['src_idx'] = x.sort(dim=-2) #buf['ref_sorted'], buf['ref_idx'] = y.sort(dim=-2) #mag, _ = Stylizer.decompose_magnitude_direction(buf['src_sorted'], dim) #_, dir = Stylizer.decompose_magnitude_direction(buf['ref_sorted'], dim) buf = Stylizer.buffer buf['src_sorted'], buf['src_idx'] = x.sort(dim=dim) buf['ref_sorted'], buf['ref_idx'] = y.sort(dim=dim) buf['x_sub'], buf['x_sub_idx'] = buf['src_sorted'].sort(dim=-1) buf['y_sub'], buf['y_sub_idx'] = buf['ref_sorted'].sort(dim=-1) mag, _ = Stylizer.decompose_magnitude_direction(buf['x_sub'].to(torch.float64), -1) _, dir = Stylizer.decompose_magnitude_direction(buf['y_sub'].to(torch.float64), -1) buf['y_sub'] = (mag * dir).to(x) buf['ref_sorted'].scatter_(dim=-1, index=buf['y_sub_idx'], src=buf['y_sub'].expand_as(buf['y_sub_idx'])) mag, _ = Stylizer.decompose_magnitude_direction(buf['src_sorted'].to(torch.float64), dim) _, dir = Stylizer.decompose_magnitude_direction(buf['ref_sorted'].to(torch.float64), dim) buf['ref_sorted'] = (mag * dir).to(x) x.scatter_(dim=dim, index=buf['src_idx'], src=buf['ref_sorted'].expand_as(buf['src_idx'])) return x @staticmethod def scattersort_dir(x, idx=1): x[0:1] = Stylizer.scattersort_dir_(x[0:1], x[idx:idx+1]) return x @staticmethod def scattersort_dir2(x, idx=1): x[0:1] = Stylizer.scattersort_dir2_(x[0:1], x[idx:idx+1]) return x @staticmethod def scattersort_(x, y, slc=slice(None)): buf = Stylizer.buffer buf['src_idx'] = x.argsort(dim=-2) buf['ref_sorted'], buf['ref_idx'] = y .sort(dim=-2) return x.scatter_(dim=-2, index=buf['src_idx'][...,slc,:], src=buf['ref_sorted'][...,slc,:].expand_as(buf['src_idx'][...,slc,:])) @staticmethod def scattersort_double(x, y): buf = Stylizer.buffer buf['src_sorted'], buf['src_idx'] = x.sort(dim=-2) buf['ref_sorted'], buf['ref_idx'] = y.sort(dim=-2) buf['x_sub_idx'] = buf['src_sorted'].argsort(dim=-1) buf['y_sub'], buf['y_sub_idx'] = buf['ref_sorted'].sort(dim=-1) x.scatter_(dim=-1, index=buf['x_sub_idx'], src=buf['y_sub'].expand_as(buf['x_sub_idx'])) return x.scatter_(dim=-2, index=buf['src_idx'], src=buf['ref_sorted'].expand_as(buf['src_idx'])) def scattersort_aoeu(self, x, idx=1, slc=slice(None)): x[0:1] = Stylizer.scattersort_(x[0:1], x[idx:idx+1], slc) return x def scattersort(self, x, idx=1, slc=slice(None)): if x.shape[0] != 2: x[0:1] = Stylizer.scattersort_(x[0:1], x[idx:idx+1], slc) return x buf = Stylizer.buffer buf['sorted'], buf['idx'] = x.sort(dim=-2) return x.scatter_(dim=-2, index=buf['idx'][0:1][...,slc,:], src=buf['sorted'][1:2][...,slc,:].expand_as(buf['idx'][0:1][...,slc,:])) def tiled_scattersort(self, x, idx=1): #, h_tile=None, w_tile=None): #if HDModel.RECON_MODE: # return denoised_embed #den = x[0:1] [:,:self.img_len,:].view(-1, 2560, self.h_len, self.w_len) #style = x[idx:idx+1][:,:self.img_len,:].view(-1, 2560, self.h_len, self.w_len) #h_tile = self.h_tile[idx-1] if h_tile is None else h_tile #w_tile = self.w_tile[idx-1] if w_tile is None else w_tile C = x.shape[-1] den = x[0:1] [:,self.img_slice,:].reshape(-1, C, self.h_len, self.w_len) style = x[idx:idx+1][:,self.img_slice,:].reshape(-1, C, self.h_len, self.w_len) tiles = Stylizer.get_tiles_as_strided(den, self.h_tile[idx-1], self.w_tile[idx-1]) ref_tile = Stylizer.get_tiles_as_strided(style, self.h_tile[idx-1], self.w_tile[idx-1]) # rearrange for vmap to run on (nH, nW) ( as outer axes) tiles_v = tiles .permute(2, 3, 0, 1, 4, 5) # (nH, nW, B, C, tile_h, tile_w) ref_tile_v = ref_tile.permute(2, 3, 0, 1, 4, 5) # (nH, nW, B, C, tile_h, tile_w) # vmap over spatial dimms (nH, nW)... num of tiles high, num tiles wide vmap2 = torch.vmap(torch.vmap(Stylizer.apply_scattersort_per_tile, in_dims=0), in_dims=0) result = vmap2(tiles_v, ref_tile_v) # (nH, nW, B, C, tile_h, tile_w) # --> (B, C, nH, nW, tile_h, tile_w) result = result.permute(2, 3, 0, 1, 4, 5) #( B, C, nH, nW, tile_h, tile_w) # in-place copy, werx if result has same shape/strides as tiles... overwrites same mem location "content" is using tiles.copy_(result) return x def tiled_AdaIN(self, x, idx=1): #if HDModel.RECON_MODE: # return denoised_embed #den = x[0:1] [:,:self.img_len,:].view(-1, 2560, self.h_len, self.w_len) #style = x[idx:idx+1][:,:self.img_len,:].view(-1, 2560, self.h_len, self.w_len) C = x.shape[-1] den = x[0:1] [:,self.img_slice,:].reshape(-1, C, self.h_len, self.w_len) style = x[idx:idx+1][:,self.img_slice,:].reshape(-1, C, self.h_len, self.w_len) tiles = Stylizer.get_tiles_as_strided(den, self.h_tile[idx-1], self.w_tile[idx-1]) ref_tile = Stylizer.get_tiles_as_strided(style, self.h_tile[idx-1], self.w_tile[idx-1]) # rearrange for vmap to run on (nH, nW) ( as outer axes) tiles_v = tiles .permute(2, 3, 0, 1, 4, 5) # (nH, nW, B, C, tile_h, tile_w) ref_tile_v = ref_tile.permute(2, 3, 0, 1, 4, 5) # (nH, nW, B, C, tile_h, tile_w) # vmap over spatial dimms (nH, nW)... num of tiles high, num tiles wide vmap2 = torch.vmap(torch.vmap(Stylizer.apply_AdaIN_per_tile, in_dims=0), in_dims=0) result = vmap2(tiles_v, ref_tile_v) # (nH, nW, B, C, tile_h, tile_w) # --> (B, C, nH, nW, tile_h, tile_w) result = result.permute(2, 3, 0, 1, 4, 5) #( B, C, nH, nW, tile_h, tile_w) # in-place copy, werx if result has same shape/strides as tiles... overwrites same mem location "content" is using tiles.copy_(result) return x @staticmethod def get_tiles_as_strided(x, tile_h, tile_w): B, C, H, W = x.shape stride = x.stride() nH = H // tile_h nW = W // tile_w tiles = x.as_strided( size=(B, C, nH, nW, tile_h, tile_w), stride=(stride[0], stride[1], stride[2] * tile_h, stride[3] * tile_w, stride[2], stride[3]) ) return tiles # shape: (B, C, nH, nW, tile_h, tile_w) @staticmethod def apply_scattersort_per_tile(tile, ref_tile): flat = tile .flatten(-2, -1) ref_flat = ref_tile.flatten(-2, -1) sorted_ref, _ = ref_flat .sort(dim=-1) src_sorted, src_idx = flat.sort(dim=-1) out = flat.scatter(dim=-1, index=src_idx, src=sorted_ref) return out.view_as(tile) @staticmethod def apply_AdaIN_per_tile(tile, ref_tile, eps: float = 1e-7): mean_c = tile.mean(-2, keepdim=True) std_c = tile.std (-2, keepdim=True).add_(eps) # in-place add mean_s = ref_tile.mean (-2, keepdim=True) std_s = ref_tile.std (-2, keepdim=True).add_(eps) tile.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) # in-place chain return tile class StyleMMDiT_Attn(Stylizer): def __init__(self, mode): super().__init__() self.q_proj = [0.0] self.k_proj = [0.0] self.v_proj = [0.0] self.q_norm = [0.0] self.k_norm = [0.0] self.out = [0.0] class StyleMMDiT_FF(Stylizer): # these hit img or joint only, never txt def __init__(self, mode): super().__init__() self.ff_1 = [0.0] self.ff_1_silu = [0.0] self.ff_3 = [0.0] self.ff_13 = [0.0] self.ff_2 = [0.0] class StyleMMDiT_MoE(Stylizer): # these hit img or joint only, never txt def __init__(self, mode): super().__init__() self.FF_SHARED = StyleMMDiT_FF(mode) self.FF_SEPARATE = StyleMMDiT_FF(mode) self.shared = [0.0] self.gate = [False] self.topk_weight = [0.0] self.separate = [0.0] self.sum = [0.0] self.out = [0.0] class StyleMMDiT_SubBlock(Stylizer): def __init__(self, mode): super().__init__() self.ATTN = StyleMMDiT_Attn(mode) # options for attn itself: qkv proj, qk norm, attn out self.attn_norm = [0.0] self.attn_norm_mod = [0.0] self.attn = [0.0] self.attn_gated = [0.0] self.attn_res = [0.0] self.ff_norm = [0.0] self.ff_norm_mod = [0.0] self.ff = [0.0] self.ff_gated = [0.0] self.ff_res = [0.0] self.mask = [None] def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): super().set_len(h_len, w_len, img_slice, txt_slice, HEADS) self.ATTN.set_len(h_len, w_len, img_slice, txt_slice, HEADS) class StyleMMDiT_IMG_Block(StyleMMDiT_SubBlock): # img or joint def __init__(self, mode): super().__init__(mode) self.FF = StyleMMDiT_MoE(mode) # options for MoE if img or joint def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): super().set_len(h_len, w_len, img_slice, txt_slice, HEADS) self.FF.set_len(h_len, w_len, img_slice, txt_slice, HEADS) class StyleMMDiT_TXT_Block(StyleMMDiT_SubBlock): # txt only def __init__(self, mode): super().__init__(mode) self.FF = StyleMMDiT_FF(mode) # options for FF within MoE for img or joint; or for txt alone def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): super().set_len(h_len, w_len, img_slice, txt_slice, HEADS) self.FF.set_len(h_len, w_len, img_slice, txt_slice, HEADS) class StyleMMDiT_BaseBlock: def __init__(self, mode="passthrough"): self.img = StyleMMDiT_IMG_Block(mode) self.txt = StyleMMDiT_TXT_Block(mode) self.mask = [None] self.attn_mask = [None] def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): self.h_len = h_len self.w_len = w_len self.img_len = h_len * w_len self.img_slice = img_slice self.txt_slice = txt_slice self.HEADS = HEADS self.img.set_len(h_len, w_len, img_slice, txt_slice, HEADS) self.txt.set_len(-1, -1, img_slice, txt_slice, HEADS) for i, mask in enumerate(self.mask): if mask is not None and mask.ndim > 1: self.mask[i] = F.interpolate(mask.unsqueeze(0), size=(h_len, w_len)).flatten().to(torch.bfloat16).cuda() self.img.mask = self.mask for i, mask in enumerate(self.attn_mask): if mask is not None and mask.ndim > 1: self.attn_mask[i] = F.interpolate(mask.unsqueeze(0), size=(h_len, w_len)).flatten().to(torch.bfloat16).cuda() self.img.ATTN.mask = self.attn_mask class StyleMMDiT_DoubleBlock(StyleMMDiT_BaseBlock): def __init__(self, mode="passthrough"): super().__init__(mode) self.txt = StyleMMDiT_TXT_Block(mode) def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): super().set_len(h_len, w_len, img_slice, txt_slice, HEADS) self.txt.set_len(-1, -1, img_slice, txt_slice, HEADS) class StyleMMDiT_SingleBlock(StyleMMDiT_BaseBlock): def __init__(self, mode="passthrough"): super().__init__(mode) class StyleUNet_Resample(Stylizer): def __init__(self, mode): super().__init__() self.conv = [0.0] class StyleUNet_Attn(Stylizer): def __init__(self, mode): super().__init__() self.q_proj = [0.0] self.k_proj = [0.0] self.v_proj = [0.0] self.out = [0.0] class StyleUNet_FF(Stylizer): def __init__(self, mode): super().__init__() self.proj = [0.0] self.geglu = [0.0] self.linear = [0.0] class StyleUNet_TransformerBlock(Stylizer): def __init__(self, mode): super().__init__() self.ATTN1 = StyleUNet_Attn(mode) # self-attn self.FF = StyleUNet_FF (mode) self.ATTN2 = StyleUNet_Attn(mode) # cross-attn self.self_attn = [0.0] self.ff = [0.0] self.cross_attn = [0.0] self.self_attn_res = [0.0] self.cross_attn_res = [0.0] self.ff_res = [0.0] self.norm1 = [0.0] self.norm2 = [0.0] self.norm3 = [0.0] def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): super().set_len(h_len, w_len, img_slice, txt_slice, HEADS) self.ATTN1.set_len(h_len, w_len, img_slice, txt_slice, HEADS) self.ATTN2.set_len(h_len, w_len, img_slice, txt_slice, HEADS) class StyleUNet_SpatialTransformer(Stylizer): def __init__(self, mode): super().__init__() self.TFMR = StyleUNet_TransformerBlock(mode) self.spatial_norm_in = [0.0] self.spatial_proj_in = [0.0] self.spatial_transformer_block = [0.0] self.spatial_transformer = [0.0] self.spatial_proj_out = [0.0] self.spatial_res = [0.0] def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): super().set_len(h_len, w_len, img_slice, txt_slice, HEADS) self.TFMR.set_len(h_len, w_len, img_slice, txt_slice, HEADS) class StyleUNet_ResBlock(Stylizer): def __init__(self, mode): super().__init__() self.in_norm = [0.0] self.in_silu = [0.0] self.in_conv = [0.0] self.emb_silu = [0.0] self.emb_linear = [0.0] self.emb_res = [0.0] self.out_norm = [0.0] self.out_silu = [0.0] self.out_conv = [0.0] self.residual = [0.0] class StyleUNet_BaseBlock(Stylizer): def __init__(self, mode="passthrough"): self.resample_block = StyleUNet_Resample(mode) self.res_block = StyleUNet_ResBlock(mode) self.spatial_block = StyleUNet_SpatialTransformer(mode) self.resample = [0.0] self.res = [0.0] self.spatial = [0.0] self.mask = [None] self.attn_mask = [None] self.KONTEXT = 0 def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): self.h_len = h_len self.w_len = w_len self.img_len = h_len * w_len self.img_slice = img_slice self.txt_slice = txt_slice self.HEADS = HEADS self.resample_block.set_len(h_len, w_len, img_slice, txt_slice, HEADS) self.res_block .set_len(h_len, w_len, img_slice, txt_slice, HEADS) self.spatial_block .set_len(h_len, w_len, img_slice, txt_slice, HEADS) for i, mask in enumerate(self.mask): if mask is not None and mask.ndim > 1: self.mask[i] = F.interpolate(mask.unsqueeze(0), size=(h_len, w_len)).flatten().to(torch.bfloat16).cuda() self.resample_block.mask = self.mask self.res_block.mask = self.mask self.spatial_block.mask = self.mask self.spatial_block.TFMR.mask = self.mask for i, mask in enumerate(self.attn_mask): if mask is not None and mask.ndim > 1: self.attn_mask[i] = F.interpolate(mask.unsqueeze(0), size=(h_len, w_len)).flatten().to(torch.bfloat16).cuda() self.spatial_block.TFMR.ATTN1.mask = self.attn_mask def __call__(self, x, attr): B, C, H, W = x.shape x = super().__call__(x.reshape(B, H*W, C), attr) return x.reshape(B,C,H,W) class StyleUNet_InputBlock(StyleUNet_BaseBlock): def __init__(self, mode="passthrough"): super().__init__(mode) class StyleUNet_MiddleBlock(StyleUNet_BaseBlock): def __init__(self, mode="passthrough"): super().__init__(mode) class StyleUNet_OutputBlock(StyleUNet_BaseBlock): def __init__(self, mode="passthrough"): super().__init__(mode) class Style_Model(Stylizer): def __init__(self, dtype=torch.float64, device=torch.device("cuda")): super().__init__(dtype, device) self.guides = [] self.GUIDES_INITIALIZED = False #self.double_blocks = [StyleMMDiT_DoubleBlock() for _ in range(100)] #self.single_blocks = [StyleMMDiT_SingleBlock() for _ in range(100)] self.h_len = -1 self.w_len = -1 self.img_len = -1 self.h_tile = [-1] self.w_tile = [-1] self.proj_in = [0.0] # these are for img only! not sliced self.proj_out = [0.0] self.cond_pos = [None] self.cond_neg = [None] self.noise_mode = "update" self.recon_lure = "none" self.data_shock = "none" self.data_shock_start_step = 0 self.data_shock_end_step = 0 self.Retrojector = None self.Endojector = None self.IMG_1ST = True self.HEADS = 0 self.KONTEXT = 0 def __call__(self, x, attr): if x.shape[0] == 1 and not self.KONTEXT: return x weight_list = getattr(self, attr) weights_all_zero = all(weight == 0.0 for weight in weight_list) if weights_all_zero: return x """x_ndim = x.ndim if x_ndim == 4: B, HEAD, HW, C = x.shape if x_ndim == 3: B, HW, C = x.shape if x.shape[-2] != self.HEADS and self.HEADS != 0: x = x.reshape(B,self.HEADS,HW,-1)""" HEAD_DIM = x.shape[1] if HEAD_DIM == self.HEADS: B, HEAD_DIM, HW, C = x.shape x = x.reshape(B, HW, C*HEAD_DIM) if self.KONTEXT == 1: x = x.reshape(2, x.shape[1] // 2, x.shape[2]) weights_all_one = all(weight == 1.0 for weight in weight_list) methods_all_scattersort = all(name == "scattersort" for name in self.method) masks_all_none = all(mask is None for mask in self.mask) if weights_all_one and methods_all_scattersort and len(weight_list) > 1 and masks_all_none: buf = Stylizer.buffer buf['src_idx'] = x[0:1].argsort(dim=-2) buf['ref_sorted'], buf['ref_idx'] = x[1:].reshape(1, -1, x.shape[-1]).sort(dim=-2) buf['src'] = buf['ref_sorted'][:,::len(weight_list)].expand_as(buf['src_idx']) # interleave_stride = len(weight_list) x[0:1] = x[0:1].scatter_(dim=-2, index=buf['src_idx'], src=buf['src'],) else: for i, (weight, mask) in enumerate(zip(weight_list, self.mask)): if weight > 0 and weight < 1: x_clone = x.clone() if mask is not None: x01 = x[0:1].clone() slc = Stylizer.middle_slice(x.shape[-2], weight) method = getattr(self, self.method[i]) if weight == 0.0: continue elif weight == 1.0: x = method(x, idx=i+1) else: x = method(x, idx=i+1, slc=slc) if weight > 0 and weight < 1 and self.method[i] != "scattersort": x = torch.lerp(x_clone, x, weight) #else: # x = torch.lerp(x, method(x.clone(), idx=i), weight) if mask is not None: x[0:1] = torch.lerp(x01, x[0:1], mask.view(1, -1, 1)) #if x_ndim == 3: # return x.view(B,HW,C) if self.KONTEXT == 1: x = x.reshape(1, x.shape[1] * 2, x.shape[2]) if HEAD_DIM == self.HEADS: return x.reshape(B, HEAD_DIM, HW, C) else: return x def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): self.h_len = h_len self.w_len = w_len self.img_len = h_len * w_len self.img_slice = img_slice self.txt_slice = txt_slice self.HEADS = HEADS #for block in self.double_blocks: # block.set_len(h_len, w_len, img_slice, txt_slice, HEADS) #for block in self.single_blocks: # block.set_len(h_len, w_len, img_slice, txt_slice, HEADS) for i, mask in enumerate(self.mask): if mask is not None and mask.ndim > 1: self.mask[i] = F.interpolate(mask.unsqueeze(0), size=(h_len, w_len)).flatten().to(torch.bfloat16).cuda() def init_guides(self, model): if not self.GUIDES_INITIALIZED: if self.guides == []: self.guides = None elif self.guides is not None: for i, latent in enumerate(self.guides): if type(latent) is dict: latent = model.inner_model.inner_model.process_latent_in(latent['samples']).to(dtype=self.dtype, device=self.device) elif type(latent) is torch.Tensor: latent = latent.to(dtype=self.dtype, device=self.device) else: latent = None #raise ValueError(f"Invalid latent type: {type(latent)}") #if self.VIDEO and latent.shape[2] == 1: # latent = latent.repeat(1, 1, x.shape[2], 1, 1) self.guides[i] = latent if any(g is None for g in self.guides): self.guides = None print("Style guide nonetype set for Kontext.") else: self.guides = torch.cat(self.guides, dim=0) self.GUIDES_INITIALIZED = True def set_conditioning(self, positive, negative): self.cond_pos = [positive] self.cond_neg = [negative] def apply_style_conditioning(self, UNCOND, base_context, base_y=None, base_llama3=None): def get_max_token_lengths(style_conditioning, base_context, base_y=None, base_llama3=None): context_max_len = base_context.shape[-2] llama3_max_len = base_llama3.shape[-2] if base_llama3 is not None else -1 y_max_len = base_y.shape[-1] if base_y is not None else -1 for style_cond in style_conditioning: if style_cond is None: continue context_max_len = max(context_max_len, style_cond[0][0].shape[-2]) if base_llama3 is not None: llama3_max_len = max(llama3_max_len, style_cond[0][1]['conditioning_llama3'].shape[-2]) if base_y is not None: y_max_len = max(y_max_len, style_cond[0][1]['pooled_output'].shape[-1]) return context_max_len, llama3_max_len, y_max_len def pad_to_len(x, target_len, pad_value=0.0, dim=1): if target_len < 0: return x cur_len = x.shape[dim] if cur_len == target_len: return x return F.pad(x, (0, 0, 0, target_len - cur_len), value=pad_value) style_conditioning = self.cond_pos if not UNCOND else self.cond_neg context_max_len, llama3_max_len, y_max_len = get_max_token_lengths( style_conditioning = style_conditioning, base_context = base_context, base_y = base_y, base_llama3 = base_llama3, ) bsz_style = len(style_conditioning) context = base_context.repeat(bsz_style + 1, 1, 1) y = base_y.repeat(bsz_style + 1, 1) if base_y is not None else None llama3 = base_llama3.repeat(bsz_style + 1, 1, 1, 1) if base_llama3 is not None else None context = pad_to_len(context, context_max_len, dim=-2) llama3 = pad_to_len(llama3, llama3_max_len, dim=-2) if base_llama3 is not None else None y = pad_to_len(y, y_max_len, dim=-1) if base_y is not None else None for ci, style_cond in enumerate(style_conditioning): if style_cond is None: continue context[ci+1:ci+2] = pad_to_len(style_cond[0][0], context_max_len, dim=-2).to(context) if llama3 is not None: llama3 [ci+1:ci+2] = pad_to_len(style_cond[0][1]['conditioning_llama3'], llama3_max_len, dim=-2).to(llama3) if y is not None: y [ci+1:ci+2] = pad_to_len(style_cond[0][1]['pooled_output'], y_max_len, dim=-1).to(y) return context, y, llama3 def WCT_data(self, denoised_embed, y0_style_embed): Stylizer.CLS_WCT.set(y0_style_embed.to(denoised_embed)) return Stylizer.CLS_WCT.get(denoised_embed) def WCT2_data(self, denoised_embed, y0_style_embed): Stylizer.CLS_WCT2.set(y0_style_embed.to(denoised_embed)) return Stylizer.CLS_WCT2.get(denoised_embed) def apply_to_data(self, denoised, y0_style=None, mode="none"): if mode == "none": return denoised y0_style = self.guides if y0_style is None else y0_style y0_style_embed = self.Retrojector.embed(y0_style) denoised_embed = self.Retrojector.embed(denoised) B,HW,C = y0_style_embed.shape embed = torch.cat([denoised_embed, y0_style_embed.view(1,B*HW,C)[:,::B,:]], dim=0) method = getattr(self, mode) if mode == "scattersort": slc = Stylizer.middle_slice(embed.shape[-2], self.data_shock_weight) embed = method(embed, slc=slc) else: embed = method(embed) return self.Retrojector.unembed(embed[0:1]) def apply_recon_lure(self, denoised, y0_style): if self.recon_lure == "none": return denoised for i in range(denoised.shape[0]): denoised[i:i+1] = self.apply_to_data(denoised[i:i+1], y0_style, self.recon_lure) return denoised def apply_data_shock(self, denoised): if self.data_shock == "none": return denoised datashock_ref = getattr(self, "datashock_ref", None) if self.data_shock == "scattersort": return self.apply_to_data(denoised, datashock_ref, self.data_shock) else: return torch.lerp(denoised, self.apply_to_data(denoised, datashock_ref, self.data_shock), torch.Tensor([self.data_shock_weight]).double().cuda()) class StyleMMDiT_Model(Style_Model): def __init__(self, dtype=torch.float64, device=torch.device("cuda")): super().__init__(dtype, device) self.double_blocks = [StyleMMDiT_DoubleBlock() for _ in range(100)] self.single_blocks = [StyleMMDiT_SingleBlock() for _ in range(100)] def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): super().set_len(h_len, w_len, img_slice, txt_slice, HEADS) for block in self.double_blocks: block.set_len(h_len, w_len, img_slice, txt_slice, HEADS) for block in self.single_blocks: block.set_len(h_len, w_len, img_slice, txt_slice, HEADS) class StyleUNet_Model(Style_Model): def __init__(self, dtype=torch.float64, device=torch.device("cuda")): super().__init__(dtype, device) self.input_blocks = [StyleUNet_InputBlock() for _ in range(100)] self.middle_blocks = [StyleUNet_MiddleBlock() for _ in range(100)] self.output_blocks = [StyleUNet_OutputBlock() for _ in range(100)] def set_len(self, h_len, w_len, img_slice, txt_slice, HEADS): super().set_len(h_len, w_len, img_slice, txt_slice, HEADS) for block in self.input_blocks: block.set_len(h_len, w_len, img_slice, txt_slice, HEADS) for block in self.middle_blocks: block.set_len(h_len, w_len, img_slice, txt_slice, HEADS) for block in self.output_blocks: block.set_len(h_len, w_len, img_slice, txt_slice, HEADS) def __call__(self, x, attr): B, C, H, W = x.shape x = super().__call__(x.reshape(B, H*W, C), attr) return x.reshape(B,C,H,W) ================================================ FILE: wan/model.py ================================================ # original version: https://github.com/Wan-Video/Wan2.1/blob/main/wan/modules/model.py # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved. import math from typing import Optional, Callable, Tuple, Dict, Any, Union import torch import torch.nn as nn import torch.nn.functional as F from einops import repeat from comfy.ldm.modules.attention import optimized_attention, attention_pytorch from comfy.ldm.flux.layers import EmbedND from comfy.ldm.flux.math import apply_rope from comfy.ldm.modules.diffusionmodules.mmdit import RMSNorm import comfy.ldm.common_dit import comfy.model_management from ..latents import interpolate_spd from ..helper import ExtraOptions def sinusoidal_embedding_1d(dim, position): # preprocess assert dim % 2 == 0 half = dim // 2 position = position.type(torch.float32) # calculation sinusoid = torch.outer( position, torch.pow(10000, -torch.arange(half).to(position).div(half))) x = torch.cat([torch.cos(sinusoid), torch.sin(sinusoid)], dim=1) return x class ReWanRawSelfAttention(nn.Module): def __init__(self, dim, num_heads, window_size = (-1, -1), qk_norm = True, eps = 1e-6, operation_settings = {}): assert dim % num_heads == 0 super().__init__() self.dim = dim self.num_heads = num_heads self.head_dim = dim // num_heads self.window_size = window_size self.qk_norm = qk_norm self.eps = eps # layers self.q = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.k = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.v = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.o = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.norm_q = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() self.norm_k = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() def forward(self, x, freqs, mask=None): r""" Args: x(Tensor): Shape [B, L, num_heads, C / num_heads] freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2] """ b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim # query, key, value function def qkv_fn(x): q = self.norm_q(self.q(x)).view(b, s, n, d) k = self.norm_k(self.k(x)).view(b, s, n, d) v = self.v(x).view(b, s, n * d) return q, k, v q, k, v = qkv_fn(x) q, k = apply_rope(q, k, freqs) # q,k.shape = 2,14040,12,128 v.shape = 2,14040,1536 x = optimized_attention( q.view(b, s, n * d), k.view(b, s, n * d), v, heads=self.num_heads, ) x = self.o(x) return x def attention_weights(q, k): # implementation of in-place softmax to reduce memory req scores = torch.matmul(q, k.transpose(-2, -1)) scores.div_(math.sqrt(q.size(-1))) torch.exp(scores, out=scores) summed = torch.sum(scores, dim=-1, keepdim=True) scores /= summed return scores.nan_to_num_(0.0, 65504., -65504.) class ReWanSlidingSelfAttention(nn.Module): def __init__(self, dim, num_heads, window_size = (-1, -1), qk_norm = True, eps = 1e-6, operation_settings = {}): assert dim % num_heads == 0 super().__init__() self.dim = dim self.num_heads = num_heads self.head_dim = dim // num_heads self.window_size = window_size self.qk_norm = qk_norm self.eps = eps self.winderz = 15 self.winderz_type= "standard" # layers self.q = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.k = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.v = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.o = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.norm_q = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() self.norm_k = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() def forward(self, x, freqs, mask=None, grid_sizes=None): r""" Args: x(Tensor): Shape [B, L, num_heads, C / num_heads] freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2] """ b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim # query, key, value function def qkv_fn(x): q = self.norm_q(self.q(x)).view(b, s, n, d) k = self.norm_k(self.k(x)).view(b, s, n, d) v = self.v(x).view(b, s, n * d) return q, k, v q, k, v = qkv_fn(x) q, k = apply_rope(q, k, freqs) # q,k.shape = 2,14040,12,128 v.shape = 2,14040,1536 img_len = grid_sizes[1] * grid_sizes[2] total_frames = int(q.shape[1] // img_len) window_size = self.winderz half_window = window_size // 2 q_ = q.view(b, s, n * d) k_ = k.view(b, s, n * d) x_list = [] for i in range(total_frames): q_start = i * img_len q_end = (i + 1) * img_len # circular frame indices for key/value window center = i #window_indices = [(center + offset) % total_frames for offset in range(-half_window, half_window + 1)] if self.winderz_type == "standard": start = max(0, center - half_window) end = min(total_frames, center + half_window + 1) # Shift window if it would be too short if end - start < window_size: if start == 0: end = min(total_frames, start + window_size) elif end == total_frames: start = max(0, end - window_size) window_indices = list(range(start, end)) elif self.winderz_type == "circular": window_indices = [(center + offset) % total_frames for offset in range(-half_window, half_window + 1)] # frame indices to token indices token_indices = [] for frame in window_indices: start = frame * img_len token_indices.extend(range(start, start + img_len)) token_indices = torch.tensor(token_indices, device=q.device) x = optimized_attention( q_[:, q_start:q_end, :], # [B, img_len, C] k_.index_select(1, token_indices), # [B, window_size * img_len, C] v .index_select(1, token_indices), heads=self.num_heads, ) x_list.append(x) x = torch.cat(x_list, dim=1) del x_list, q, k, v, q_, k_ x = self.o(x) return x class ReWanT2VSlidingCrossAttention(ReWanSlidingSelfAttention): def forward(self, x, context, context_clip=None, mask=None, grid_sizes=None): r""" Args: x(Tensor): Shape [B, L1, C] context(Tensor): Shape [B, L2, C] """ # compute query, key, value q = self.norm_q(self.q(x)) k = self.norm_k(self.k(context)) v = self.v(context) img_len = grid_sizes[1] * grid_sizes[2] total_frames = int(q.shape[1] // img_len) window_size = self.winderz half_window = window_size // 2 b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim q_, k_ = q, k #q_ = q.view(b, s, n * d) #k_ = k.view(b, s, n * d) x_list = [] for i in range(total_frames): q_start = i * img_len q_end = (i + 1) * img_len # circular frame indices for key/value window center = i #window_indices = [(center + offset) % total_frames for offset in range(-half_window, half_window + 1)] if self.winderz_type == "standard": start = max(0, center - half_window) end = min(total_frames, center + half_window + 1) # Shift window if it would be too short if end - start < window_size: if start == 0: end = min(total_frames, start + window_size) elif end == total_frames: start = max(0, end - window_size) window_indices = list(range(start, end)) elif self.winderz_type == "circular": window_indices = [(center + offset) % total_frames for offset in range(-half_window, half_window + 1)] # frame indices to token indices token_indices = [] for frame in window_indices: start = frame * img_len token_indices.extend(range(start, start + img_len)) token_indices = torch.tensor(token_indices, device=q.device) x = optimized_attention( q_[:, q_start:q_end, :], # [B, img_len, C] k_, #.index_select(1, token_indices), # [B, window_size * img_len, C] v , #.index_select(1, token_indices), heads=self.num_heads, ) x_list.append(x) x = torch.cat(x_list, dim=1) del x_list, q, k, v, q_, k_ x = self.o(x) return x class ReWanSelfAttention(nn.Module): def __init__(self, dim, num_heads, window_size = (-1, -1), qk_norm = True, eps = 1e-6, operation_settings = {}): assert dim % num_heads == 0 super().__init__() self.dim = dim self.num_heads = num_heads self.head_dim = dim // num_heads self.window_size = window_size self.qk_norm = qk_norm self.eps = eps # layers self.q = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.k = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.v = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.o = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.norm_q = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() self.norm_k = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() def forward(self, x, freqs, mask=None, grid_sizes=None): r""" Args: x(Tensor): Shape [B, L, num_heads, C / num_heads] freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2] """ b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim # query, key, value function def qkv_fn(x): q = self.norm_q(self.q(x)).view(b, s, n, d) k = self.norm_k(self.k(x)).view(b, s, n, d) v = self.v(x).view(b, s, n * d) return q, k, v q, k, v = qkv_fn(x) q, k = apply_rope(q, k, freqs) # q,k.shape = 2,14040,12,128 v.shape = 2,14040,1536 if mask is not None and mask.shape[-1] > 0: #dtype = mask.dtype if mask.dtype == torch.bool else q.dtype #txt_len = mask.shape[1] - mask.shape[0] x = attention_pytorch( q.view(b, s, n * d), k.view(b, s, n * d), v, heads=self.num_heads, mask=mask#[:,txt_len:].to(dtype) ) else: x = optimized_attention( q.view(b, s, n * d), k.view(b, s, n * d), v, heads=self.num_heads, ) x = self.o(x) return x class ReWanT2VRawCrossAttention(ReWanSelfAttention): def forward(self, x, context, context_clip=None, mask=None, grid_sizes=None): r""" Args: x(Tensor): Shape [B, L1, C] context(Tensor): Shape [B, L2, C] """ # compute query, key, value q = self.norm_q(self.q(x)) k = self.norm_k(self.k(context)) v = self.v(context) x = optimized_attention(q, k, v, heads=self.num_heads, mask=None) x = self.o(x) return x class ReWanT2VCrossAttention(ReWanSelfAttention): def forward(self, x, context, context_clip=None, mask=None, grid_sizes=None): r""" Args: x(Tensor): Shape [B, L1, C] context(Tensor): Shape [B, L2, C] """ # compute query, key, value q = self.norm_q(self.q(x)) k = self.norm_k(self.k(context)) v = self.v(context) #if mask is not None: # num_repeats = q.shape[1] // mask.shape[0] # mask = mask.repeat(num_repeats, 1) # compute attention # x.shape 2,14040,1536 q.shape 2,14040,1536 k,v.shape = 2,512,1536 mask = 14040,512 num_heads=12 if mask is not None: # and (mask.shape[-1] - mask.shape[-2]) == k.shape[-2]: # need mask shape 11664,5120 #dtype = mask.dtype if mask.dtype == torch.bool else q.dtype dtype = torch.bool x = attention_pytorch(q, k, v, heads=self.num_heads, mask=mask.to(q.device).bool()) #x = attention_pytorch(q, k, v, heads=self.num_heads, mask=mask[:,:k.shape[-2]].to(q.device).bool()) else: x = optimized_attention(q, k, v, heads=self.num_heads, mask=None) x = self.o(x) return x class ReWanI2VCrossAttention(ReWanSelfAttention): # image2video only def __init__(self, dim, num_heads, window_size=(-1, -1), qk_norm=True, eps=1e-6, operation_settings={}, ): super().__init__(dim, num_heads, window_size, qk_norm, eps, operation_settings=operation_settings) self.k_img = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.v_img = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) # self.alpha = nn.Parameter(torch.zeros((1, ))) self.norm_k_img = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity() def forward(self, x, context, context_clip=None, mask=None, grid_sizes=None): r""" Args: x(Tensor): Shape [B, L1, C] context(Tensor): Shape [B, L2, C] """ """context_img = context[:, :257] context = context[:, 257:] mask_clip = None""" context_img = context_clip mask_clip = None if mask is not None: mask_clip = F.interpolate(mask[None, None, ...].to(torch.float16), (mask.shape[0], 257 * mask.shape[1]//512), mode='nearest-exact').squeeze().to(mask.dtype) """mask_clip = [] for i in range(mask.shape[-1]//512): mask_clip.append(mask[:,i*512:i*512 + 257]) mask_clip = torch.cat(mask_clip, dim=-1)""" # compute query, key, value q = self.norm_q(self.q(x)) k = self.norm_k(self.k(context)) v = self.v(context) k_img = self.norm_k_img(self.k_img(context_img)) v_img = self.v_img(context_img) img_x = optimized_attention(q, k_img, v_img, heads=self.num_heads, mask=mask_clip) # compute attention x = optimized_attention(q, k, v, heads=self.num_heads, mask=mask) # output x = x + img_x x = self.o(x) return x WAN_CROSSATTENTION_CLASSES = { 't2v_cross_attn': ReWanT2VCrossAttention, 'i2v_cross_attn': ReWanI2VCrossAttention, } class ReWanAttentionBlock(nn.Module): def __init__(self, cross_attn_type, dim, ffn_dim, num_heads, window_size = (-1, -1), qk_norm = True, cross_attn_norm = False, eps = 1e-6, operation_settings = {}): super().__init__() self.dim = dim self.ffn_dim = ffn_dim self.num_heads = num_heads self.window_size = window_size self.qk_norm = qk_norm self.cross_attn_norm = cross_attn_norm self.eps = eps # layers self.norm1 = operation_settings.get("operations").LayerNorm(dim, eps, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.self_attn = ReWanSelfAttention( dim, num_heads, window_size, qk_norm, eps, operation_settings=operation_settings) self.norm3 = operation_settings.get("operations").LayerNorm( dim, eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if cross_attn_norm else nn.Identity() self.cross_attn = WAN_CROSSATTENTION_CLASSES[cross_attn_type]( dim, num_heads, (-1, -1), qk_norm, eps, operation_settings=operation_settings) self.norm2 = operation_settings.get("operations").LayerNorm(dim, eps, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.ffn = nn.Sequential( operation_settings.get("operations").Linear(dim, ffn_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), nn.GELU(approximate='tanh'), operation_settings.get("operations").Linear(ffn_dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))) # modulation self.modulation = nn.Parameter(torch.empty(1, 6, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))) def forward( self, x, e, freqs, context, context_clip=None, self_mask=None, cross_mask=None, grid_sizes = None, #mask=None, ): r""" Args: x(Tensor): Shape [B, L, C] e(Tensor): Shape [B, 6, C] freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2] """ # assert e.dtype == torch.float32 e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e).chunk(6, dim=1) # assert e[0].dtype == torch.float32 # e = tuple with 6 elem, shape = 2,1,1536 # with length = 33 so 9 frames # self-attention y = self.self_attn( self.norm1(x) * (1 + e[1]) + e[0], freqs, grid_sizes=grid_sizes, mask=self_mask) # mask[:,txt_len:]) x = x + y * e[2] # cross-attention & ffn # x,y.shape 2,14040,1536 x = x + self.cross_attn(self.norm3(x), context, context_clip=context_clip, mask=cross_mask, grid_sizes=grid_sizes,) #mask[:,:txt_len]) #print("before norm2 ", torch.cuda.memory_allocated() / 1024**3) y = self.ffn(self.norm2(x) * (1 + e[4]) + e[3]) #print("after norm2 ", torch.cuda.memory_allocated() / 1024**3) x = x + y * e[5] return x class Head(nn.Module): def __init__(self, dim, out_dim, patch_size, eps=1e-6, operation_settings={}): super().__init__() self.dim = dim self.out_dim = out_dim self.patch_size = patch_size self.eps = eps # layers out_dim = math.prod(patch_size) * out_dim self.norm = operation_settings.get("operations").LayerNorm(dim, eps, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) self.head = operation_settings.get("operations").Linear (dim, out_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) # modulation self.modulation = nn.Parameter(torch.empty(1, 2, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))) def forward(self, x, e): r""" Args: x(Tensor): Shape [B, L1, C] e(Tensor): Shape [B, C] """ # assert e.dtype == torch.float32 e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e.unsqueeze(1)).chunk(2, dim=1) x = (self.head(self.norm(x) * (1 + e[1]) + e[0])) return x class MLPProj(torch.nn.Module): def __init__(self, in_dim, out_dim, operation_settings={}): super().__init__() self.proj = torch.nn.Sequential( operation_settings .get("operations").LayerNorm(in_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), operation_settings.get("operations").Linear(in_dim, in_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), torch.nn.GELU(), operation_settings.get("operations").Linear (in_dim, out_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), operation_settings .get("operations").LayerNorm(out_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))) def forward(self, image_embeds): clip_extra_context_tokens = self.proj(image_embeds) return clip_extra_context_tokens class ReWanModel(torch.nn.Module): r""" Wan diffusion backbone supporting both text-to-video and image-to-video. """ def __init__(self, model_type = 't2v', patch_size = (1, 2, 2), text_len = 512, in_dim = 16, dim = 2048, ffn_dim = 8192, freq_dim = 256, text_dim = 4096, out_dim = 16, num_heads = 16, num_layers = 32, window_size = (-1, -1), qk_norm = True, cross_attn_norm = True, eps = 1e-6, image_model = None, device = None, dtype = None, operations = None, ): r""" Initialize the diffusion model backbone. Args: model_type (`str`, *optional*, defaults to 't2v'): Model variant - 't2v' (text-to-video) or 'i2v' (image-to-video) patch_size (`tuple`, *optional*, defaults to (1, 2, 2)): 3D patch dimensions for video embedding (t_patch, h_patch, w_patch) text_len (`int`, *optional*, defaults to 512): Fixed length for text embeddings in_dim (`int`, *optional*, defaults to 16): Input video channels (C_in) dim (`int`, *optional*, defaults to 2048): Hidden dimension of the transformer ffn_dim (`int`, *optional*, defaults to 8192): Intermediate dimension in feed-forward network freq_dim (`int`, *optional*, defaults to 256): Dimension for sinusoidal time embeddings text_dim (`int`, *optional*, defaults to 4096): Input dimension for text embeddings out_dim (`int`, *optional*, defaults to 16): Output video channels (C_out) num_heads (`int`, *optional*, defaults to 16): Number of attention heads num_layers (`int`, *optional*, defaults to 32): Number of transformer blocks window_size (`tuple`, *optional*, defaults to (-1, -1)): Window size for local attention (-1 indicates global attention) qk_norm (`bool`, *optional*, defaults to True): Enable query/key normalization cross_attn_norm (`bool`, *optional*, defaults to False): Enable cross-attention normalization eps (`float`, *optional*, defaults to 1e-6): Epsilon value for normalization layers """ super().__init__() self.dtype = dtype operation_settings = {"operations": operations, "device": device, "dtype": dtype} assert model_type in ['t2v', 'i2v'] self.model_type = model_type self.patch_size = patch_size self.text_len = text_len self.in_dim = in_dim self.dim = dim self.ffn_dim = ffn_dim self.freq_dim = freq_dim self.text_dim = text_dim self.out_dim = out_dim self.num_heads = num_heads self.num_layers = num_layers self.window_size = window_size self.qk_norm = qk_norm self.cross_attn_norm = cross_attn_norm self.eps = eps # embeddings self.patch_embedding = operations.Conv3d( in_dim, dim, kernel_size=patch_size, stride=patch_size, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) #dtype=torch.float32) self.text_embedding = nn.Sequential( operations.Linear(text_dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), nn.GELU(approximate='tanh'), operations.Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))) self.time_embedding = nn.Sequential( operations.Linear(freq_dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), nn.SiLU(), operations.Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))) self.time_projection = nn.Sequential(nn.SiLU(), operations.Linear(dim, dim * 6, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))) # blocks cross_attn_type = 't2v_cross_attn' if model_type == 't2v' else 'i2v_cross_attn' self.blocks = nn.ModuleList([ ReWanAttentionBlock( cross_attn_type, dim, ffn_dim, num_heads, window_size, qk_norm, cross_attn_norm, eps, operation_settings=operation_settings) for _ in range(num_layers) ]) # head self.head = Head(dim, out_dim, patch_size, eps, operation_settings=operation_settings) d = dim // num_heads self.rope_embedder = EmbedND(dim=d, theta=10000.0, axes_dim=[d - 4 * (d // 6), 2 * (d // 6), 2 * (d // 6)]) if model_type == 'i2v': self.img_emb = MLPProj(1280, dim, operation_settings=operation_settings) else: self.img_emb = None def invert_patch_embedding(self, z: torch.Tensor, original_shape: torch.Size, grid_sizes: Optional[Tuple[int,int,int]] = None) -> torch.Tensor: import torch.nn.functional as F B, C_in, D, H, W = original_shape pD, pH, pW = self.patch_size sD, sH, sW = pD, pH, pW if z.ndim == 3: # [B, S, C_out] -> reshape to [B, C_out, D', H', W'] S = z.shape[1] if grid_sizes is None: Dp = D // pD Hp = H // pH Wp = W // pW else: Dp, Hp, Wp = grid_sizes C_out = z.shape[2] z = z.transpose(1, 2).reshape(B, C_out, Dp, Hp, Wp) else: B2, C_out, Dp, Hp, Wp = z.shape assert B2 == B, "Batch size mismatch... ya sharked it." # kncokout bias b = self.patch_embedding.bias.view(1, C_out, 1, 1, 1) z_nobias = z - b # 2D filter -> pinv w3 = self.patch_embedding.weight # [C_out, C_in, 1, pH, pW] w2 = w3.squeeze(2) # [C_out, C_in, pH, pW] out_ch, in_ch, kH, kW = w2.shape W_flat = w2.view(out_ch, -1) # [C_out, in_ch*pH*pW] W_pinv = torch.linalg.pinv(W_flat) # [in_ch*pH*pW, C_out] # merge depth for 2D unfold wackiness z2 = z_nobias.permute(0,2,1,3,4).reshape(B*Dp, C_out, Hp, Wp) # apply pinv ... get patch vectors z_flat = z2.reshape(B*Dp, C_out, -1) # [B*Dp, C_out, L] x_patches = W_pinv @ z_flat # [B*Dp, in_ch*pH*pW, L] # fold -> spatial frames x2 = F.fold( x_patches, output_size=(H, W), kernel_size=(pH, pW), stride=(sH, sW) ) # → [B*Dp, C_in, H, W] # un-merge depth x2 = x2.reshape(B, Dp, in_ch, H, W) # [B, Dp, C_in, H, W] x_recon = x2.permute(0,2,1,3,4).contiguous() # [B, C_in, D, H, W] return x_recon def forward_orig( self, x, t, context, clip_fea = None, freqs = None, transformer_options = {}, UNCOND = False, ): r""" Forward pass through the diffusion model Args: x (Tensor): List of input video tensors with shape [B, C_in, F, H, W] t (Tensor): Diffusion timesteps tensor of shape [B] context (List[Tensor]): List of text embeddings each with shape [B, L, C] seq_len (`int`): Maximum sequence length for positional encoding clip_fea (Tensor, *optional*): CLIP image features for image-to-video mode y (List[Tensor], *optional*): Conditional video inputs for image-to-video mode, same shape as x Returns: List[Tensor]: List of denoised video tensors with original input shapes [C_out, F, H / 8, W / 8] """ """trash = x[:,16:,...] x_slice_flip = torch.cat([x[:,:16,...], torch.flip(trash, dims=[2])], dim=1) x_slice_flip = self.patch_embedding(x_slice_flip.float()).to(x.dtype) x = self.patch_embedding(x.float()).to(x.dtype) x = torch.cat([x[:,:,:9,...], x_slice_flip[:,:,9:,...]], dim=2)""" """x1 = self.patch_embedding(x[:,:,:8,...].float()).to(x.dtype) x_slice = torch.cat([x[:,:16,8:,...], trash[:,:,0:9, ...]], dim=1) x2 = self.patch_embedding(x_slice.float()).to(x.dtype) x = torch.cat([x1, x2], dim=2)""" y0_style_pos = transformer_options.get("y0_style_pos") y0_style_neg = transformer_options.get("y0_style_neg") SIGMA = t[0].clone() / 1000 EO = transformer_options.get("ExtraOptions", ExtraOptions("")) # embeddings #self.patch_embedding.to(self.time_embedding[0].weight.dtype) x_orig = x.clone() #x = self.patch_embedding(x.float()).to(self.time_embedding[0].weight.dtype) #next line to torch.Size([1, 5120, 17, 30, 30]) from 1,36,17,30,30 x = self.patch_embedding(x.float()).to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn grid_sizes = x.shape[2:] x = x.flatten(2).transpose(1, 2) # x.shape 1,32400,5120 bfloat16 316.4 MB # time embeddings e = self.time_embedding( sinusoidal_embedding_1d(self.freq_dim, t).to(dtype=x[0].dtype)) e0 = self.time_projection(e).unflatten(1, (6, self.dim)) # e0.shape = 2,6,1536 tiny ( < 0.1 MB) # context context = self.text_embedding(context) context_clip = None if clip_fea is not None and self.img_emb is not None: context_clip = self.img_emb(clip_fea) # bs x 257 x dim #context = torch.concat([context_clip, context], dim=1) # arguments kwargs = dict( e = e0, freqs = freqs, # 1,32400,1,64,2,2 bfloat16 15.8 MB context = context, # 1,1536,5120 bfloat16 15.0 MB context_clip = context_clip, grid_sizes = grid_sizes) weight = transformer_options['reg_cond_weight'] if 'reg_cond_weight' in transformer_options else 0.0 floor = transformer_options['reg_cond_floor'] if 'reg_cond_floor' in transformer_options else 0.0 floor = min(floor, weight) if type(weight) == float or type(weight) == int: pass else: weight = weight.item() AttnMask = transformer_options.get('AttnMask') # somewhere around here, jumped to 20.6GB mask = None if AttnMask is not None and weight > 0: mask = AttnMask.get(weight=weight) #mask_obj[0](transformer_options, weight.item()) # 32400,33936 bool 1048.6 MB mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False if not mask_type_bool: mask = mask.to(x.dtype) #text_len = context.shape[1] # mask_obj[0].text_len #mask[text_len:,text_len:] = torch.clamp(mask[text_len:,text_len:], min=floor.to(mask.device)) #ORIGINAL SELF-ATTN REGION BLEED #reg_cond_mask = reg_cond_mask_expanded.unsqueeze(0).clone() if reg_cond_mask_expanded is not None else None mask_type_bool = type(mask[0][0].item()) == bool if mask is not None else False txt_len = context.shape[1] # mask_obj[0].text_len #txt_len = mask.shape[-1] - mask.shape[-2] if mask is not None else "Unlogic Condition" #what's the point of this? #self_attn_mask = mask[:, txt_len:] #cross_attn_mask = mask[:,:txt_len ].bool() #i = 0 #for block in self.blocks: for i, block in enumerate(self.blocks): if mask_type_bool and weight < (i / (len(self.blocks)-1)) and mask is not None: mask = mask.to(x.dtype) #if mask_type_bool and weight < (i / (len(self.blocks)-1)) and mask is not None: # mask = mask.to(x.dtype) if mask is not None: #if True: # x = block(x, self_mask=None, cross_mask=mask.bool(), **kwargs) if mask_type_bool and floor < 0 and (i / (len(self.blocks)-1)) < (-floor): # use self-attn mask until block number x = block(x, self_mask=mask[:,txt_len:], cross_mask=mask[:,:txt_len].bool(), **kwargs) elif mask_type_bool and floor > 0 and floor < (i / (len(self.blocks)-1)): # use self-attn mask after block number x = block(x, self_mask=mask[:,txt_len:], cross_mask=mask[:,:txt_len].bool(), **kwargs) #x = block(x, self_mask=None, cross_mask=mask[:,:txt_len].bool(), **kwargs) elif floor == 0: x = block(x, self_mask=mask[:,txt_len:], cross_mask=mask[:,:txt_len].bool(), **kwargs) else: #x = block(x, self_mask=mask[:,txt_len:], cross_mask=mask[:,:txt_len].bool(), **kwargs) x = block(x, self_mask=None, cross_mask=mask[:,:txt_len].bool(), **kwargs) else: x = block(x, **kwargs) #x = block(x, mask=mask, **kwargs) #i += 1 # head x = self.head(x, e) # unpatchify eps = self.unpatchify(x, grid_sizes) dtype = eps.dtype if self.style_dtype is None else self.style_dtype pinv_dtype = torch.float32 if dtype != torch.float64 else dtype W_inv = None #if eps.shape[0] == 2 or (eps.shape[0] == 1 and not UNCOND): if y0_style_pos is not None: y0_style_pos_weight = transformer_options.get("y0_style_pos_weight") y0_style_pos_synweight = transformer_options.get("y0_style_pos_synweight") y0_style_pos_synweight *= y0_style_pos_weight y0_style_pos = y0_style_pos.to(torch.float32) x = x_orig.clone().to(torch.float32) eps = eps.to(torch.float32) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps img = comfy.ldm.common_dit.pad_to_patch_size(denoised, self.patch_size) patch_size = self.patch_size denoised_embed = self.patch_embedding(img.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn grid_sizes = denoised_embed.shape[2:] denoised_embed = denoised_embed.flatten(2).transpose(1, 2) img_y0_adain = comfy.ldm.common_dit.pad_to_patch_size(y0_style_pos, self.patch_size) patch_size = self.patch_size y0_adain_embed = self.patch_embedding(img_y0_adain.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn grid_sizes = y0_adain_embed.shape[2:] y0_adain_embed = y0_adain_embed.flatten(2).transpose(1, 2) if transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) #denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype) denoised_embed = self.invert_patch_embedding(denoised_embed, x_orig.shape, grid_sizes) denoised_embed = self.patch_embedding(denoised_embed.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn grid_sizes = denoised_embed.shape[2:] denoised_embed = denoised_embed.flatten(2).transpose(1, 2) #denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: self.y0_adain_embed = y0_adain_embed f_s = y0_adain_embed[0].clone() self.mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - self.mu_s cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T self.y0_color = whiten.to(f_s_centered) for wct_i in range(eps.shape[0]): f_c = denoised_embed[wct_i].clone() mu_c = f_c.mean(dim=0, keepdim=True) f_c_centered = f_c - mu_c cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) inv_sqrt_eig = S_eig.clamp(min=0).rsqrt() whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T whiten = whiten.to(f_c_centered) f_c_whitened = f_c_centered @ whiten.T f_cs = f_c_whitened @ self.y0_color.T + self.mu_s denoised_embed[wct_i] = f_cs denoised_approx = self.invert_patch_embedding(denoised_embed, x_orig.shape, grid_sizes) denoised_approx = denoised_approx.to(eps) eps = (x - denoised_approx) / sigma #if eps.shape[0] == 2: # eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) # eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) #else: # eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) if not UNCOND: if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_pos_weight * (eps[1] - eps_orig[1]) eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) else: eps[0] = eps_orig[0] + y0_style_pos_weight * (eps[0] - eps_orig[0]) elif eps.shape[0] == 1 and UNCOND: eps[0] = eps_orig[0] + y0_style_pos_synweight * (eps[0] - eps_orig[0]) eps = eps.float() #if eps.shape[0] == 2 or (eps.shape[0] == 1 and UNCOND): if y0_style_neg is not None: y0_style_neg_weight = transformer_options.get("y0_style_neg_weight") y0_style_neg_synweight = transformer_options.get("y0_style_neg_synweight") y0_style_neg_synweight *= y0_style_neg_weight y0_style_neg = y0_style_neg.to(torch.float32) x = x_orig.clone().to(torch.float32) eps = eps.to(torch.float32) eps_orig = eps.clone() sigma = SIGMA #t_orig[0].to(torch.float32) / 1000 denoised = x - sigma * eps img = comfy.ldm.common_dit.pad_to_patch_size(denoised, self.patch_size) patch_size = self.patch_size denoised_embed = self.patch_embedding(img.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn grid_sizes = denoised_embed.shape[2:] denoised_embed = denoised_embed.flatten(2).transpose(1, 2) img_y0_adain = comfy.ldm.common_dit.pad_to_patch_size(y0_style_neg, self.patch_size) patch_size = self.patch_size y0_adain_embed = self.patch_embedding(img_y0_adain.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn grid_sizes = y0_adain_embed.shape[2:] y0_adain_embed = y0_adain_embed.flatten(2).transpose(1, 2) if transformer_options['y0_style_method'] == "AdaIN": denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) for adain_iter in range(EO("style_iter", 0)): denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) #denoised_embed = (denoised_embed - b) @ torch.linalg.pinv(W.to(pinv_dtype)).T.to(dtype) denoised_embed = self.invert_patch_embedding(denoised_embed, x_orig.shape, grid_sizes) denoised_embed = self.patch_embedding(denoised_embed.float()) #.to(x.dtype) # vram jumped from ~16-16.5 up to 17.98 gained 300mb with weights at torch.float8_e4m3fn grid_sizes = denoised_embed.shape[2:] denoised_embed = denoised_embed.flatten(2).transpose(1, 2) #denoised_embed = F.linear(denoised_embed .to(W), W, b).to(img) denoised_embed = adain_seq_inplace(denoised_embed, y0_adain_embed) elif transformer_options['y0_style_method'] == "WCT": if self.y0_adain_embed is None or self.y0_adain_embed.shape != y0_adain_embed.shape or torch.norm(self.y0_adain_embed - y0_adain_embed) > 0: self.y0_adain_embed = y0_adain_embed f_s = y0_adain_embed[0].clone() self.mu_s = f_s.mean(dim=0, keepdim=True) f_s_centered = f_s - self.mu_s cov = (f_s_centered.T.double() @ f_s_centered.double()) / (f_s_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) S_eig_sqrt = S_eig.clamp(min=0).sqrt() # eigenvalues -> singular values whiten = U_eig @ torch.diag(S_eig_sqrt) @ U_eig.T self.y0_color = whiten.to(f_s_centered) for wct_i in range(eps.shape[0]): f_c = denoised_embed[wct_i].clone() mu_c = f_c.mean(dim=0, keepdim=True) f_c_centered = f_c - mu_c cov = (f_c_centered.T.double() @ f_c_centered.double()) / (f_c_centered.size(0) - 1) S_eig, U_eig = torch.linalg.eigh(cov + 1e-5 * torch.eye(cov.size(0), dtype=cov.dtype, device=cov.device)) inv_sqrt_eig = S_eig.clamp(min=0).rsqrt() whiten = U_eig @ torch.diag(inv_sqrt_eig) @ U_eig.T whiten = whiten.to(f_c_centered) f_c_whitened = f_c_centered @ whiten.T f_cs = f_c_whitened @ self.y0_color.T + self.mu_s denoised_embed[wct_i] = f_cs denoised_approx = self.invert_patch_embedding(denoised_embed, x_orig.shape, grid_sizes) denoised_approx = denoised_approx.to(eps) #eps = (x - denoised_approx) / sigma #eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) #if eps.shape[0] == 2: # eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) if UNCOND: eps = (x - denoised_approx) / sigma eps[0] = eps_orig[0] + y0_style_neg_weight * (eps[0] - eps_orig[0]) if eps.shape[0] == 2: eps[1] = eps_orig[1] + y0_style_neg_synweight * (eps[1] - eps_orig[1]) elif eps.shape[0] == 1 and not UNCOND: eps[0] = eps_orig[0] + y0_style_neg_synweight * (eps[0] - eps_orig[0]) eps = eps.float() return eps # context.shape = 2,512,1536 x.shape = 2,14040,1536 timestep.shape h_len=30, w_len=52 30 * 52 = 1560 def forward(self, x, timestep, context, clip_fea=None, transformer_options={}, **kwargs): """if False: #clip_fea is not None: bs, c, t, h, w = x.shape x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size) patch_size = self.patch_size # tuple = 1,2,2, t_len = ((t + (patch_size[0] // 2)) // patch_size[0]) h_len = ((h + (patch_size[1] // 2)) // patch_size[1]) w_len = ((w + (patch_size[2] // 2)) // patch_size[2]) img_ids = torch.zeros((t_len, h_len, w_len, 3), device=x.device, dtype=x.dtype) img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(0, t_len - 1, steps=t_len, device=x.device, dtype=x.dtype).reshape(-1, 1, 1) img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).reshape(1, -1, 1) img_ids[:, :, :, 2] = img_ids[:, :, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).reshape(1, 1, -1) img_ids = repeat(img_ids, "t h w c -> b (t h w) c", b=bs) # 14040 = 9 * 1560 1560 = 1536 + 24 1560/24 = 65 freqs = self.rope_embedder(img_ids).movedim(1, 2) return self.forward_orig(x, timestep, context, clip_fea=clip_fea, freqs=freqs)[:, :, :t, :h, :w]""" #x = torch.cat([x[:,:,:8,...], torch.flip(x[:,:,8:,...], dims=[2])], dim=2) x_orig = x.clone() # 1,16,36,60,60 bfloat16 timestep_orig = timestep.clone() # 1000 float32 context_orig = context.clone() # 1,512,4096 bfloat16 out_list = [] for i in range(len(transformer_options['cond_or_uncond'])): UNCOND = transformer_options['cond_or_uncond'][i] == 1 x = x_orig.clone() timestep = timestep_orig.clone() context = context_orig.clone() bs, c, t, h, w = x.shape x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size) patch_size = self.patch_size transformer_options['original_shape'] = x.shape transformer_options['patch_size'] = patch_size """if UNCOND: transformer_options['reg_cond_weight'] = 0.0 # -1 context_tmp = context[i][None,...].clone()""" if UNCOND: #transformer_options['reg_cond_weight'] = -1 #context_tmp = context[i][None,...].clone() transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0) #transformer_options['regional_conditioning_weight'] transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0) #transformer_options['regional_conditioning_floor'] #if "regional_conditioning_floor" in transformer_options else 0.0 transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig') AttnMask = transformer_options.get('AttnMask', None) RegContext = transformer_options.get('RegContext', None) if AttnMask is not None and transformer_options['reg_cond_weight'] != 0.0: AttnMask.attn_mask_recast(x.dtype) context_tmp = RegContext.get().to(context.dtype) clip_fea = RegContext.get_clip_fea() clip_fea = clip_fea.to(x.dtype) if clip_fea else None A = context[i][None,...].clone() B = context_tmp context_tmp = A.repeat(1, (B.shape[1] // A.shape[1]) + 1, 1)[:, :B.shape[1], :] else: context_tmp = context[i][None,...].clone() elif UNCOND == False: transformer_options['reg_cond_weight'] = transformer_options.get("regional_conditioning_weight", 0.0) #transformer_options['regional_conditioning_weight'] transformer_options['reg_cond_floor'] = transformer_options.get("regional_conditioning_floor", 0.0) #transformer_options['regional_conditioning_floor'] #if "regional_conditioning_floor" in transformer_options else 0.0 transformer_options['reg_cond_mask_orig'] = transformer_options.get('regional_conditioning_mask_orig') AttnMask = transformer_options.get('AttnMask', None) RegContext = transformer_options.get('RegContext', None) if AttnMask is not None and transformer_options['reg_cond_weight'] != 0.0: AttnMask.attn_mask_recast(x.dtype) context_tmp = RegContext.get() clip_fea = RegContext.get_clip_fea() clip_fea = clip_fea.to(x.dtype) if clip_fea else None else: context_tmp = context[i][None,...].clone() if context_tmp is None: context_tmp = context[i][None,...].clone() context_tmp = context_tmp.to(context.dtype) t_len = ((t + (patch_size[0] // 2)) // patch_size[0]) h_len = ((h + (patch_size[1] // 2)) // patch_size[1]) w_len = ((w + (patch_size[2] // 2)) // patch_size[2]) img_ids = torch.zeros((t_len, h_len, w_len, 3), device=x.device, dtype=x.dtype) img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(0, t_len - 1, steps=t_len, device=x.device, dtype=x.dtype).reshape(-1, 1, 1) img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).reshape(1, -1, 1) img_ids[:, :, :, 2] = img_ids[:, :, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).reshape(1, 1, -1) img_ids = repeat(img_ids, "t h w c -> b (t h w) c", b=bs) # 14040 = 9 * 1560 1560 = 1536 + 24 1560/24 = 65 freqs = self.rope_embedder(img_ids).movedim(1, 2).to(x.dtype) out_x = self.forward_orig( x [i][None,...], timestep [i][None,...], context_tmp, clip_fea = clip_fea, freqs = freqs[i][None,...], transformer_options = transformer_options, UNCOND = UNCOND, )[:, :, :t, :h, :w] #out_x = torch.cat([out_x[:,:,:8,...], torch.flip(out_x[:,:,8:,...], dims=[2])], dim=2) out_list.append(out_x) out_stack = torch.stack(out_list, dim=0).squeeze(dim=1) return out_stack def unpatchify(self, x, grid_sizes): r""" Reconstruct video tensors from patch embeddings. Args: x (List[Tensor]): List of patchified features, each with shape [L, C_out * prod(patch_size)] grid_sizes (Tensor): Original spatial-temporal grid dimensions before patching, shape [B, 3] (3 dimensions correspond to F_patches, H_patches, W_patches) Returns: List[Tensor]: Reconstructed video tensors with shape [L, C_out, F, H / 8, W / 8] """ c = self.out_dim u = x b = u.shape[0] u = u[:, :math.prod(grid_sizes)].view(b, *grid_sizes, *self.patch_size, c) u = torch.einsum('bfhwpqrc->bcfphqwr', u) u = u.reshape(b, c, *[i * j for i, j in zip(grid_sizes, self.patch_size)]) return u def adain_seq_inplace(content: torch.Tensor, style: torch.Tensor, eps: float = 1e-7) -> torch.Tensor: mean_c = content.mean(1, keepdim=True) std_c = content.std (1, keepdim=True).add_(eps) mean_s = style.mean (1, keepdim=True) std_s = style.std (1, keepdim=True).add_(eps) content.sub_(mean_c).div_(std_c).mul_(std_s).add_(mean_s) return content ================================================ FILE: wan/vae.py ================================================ # original version: https://github.com/Wan-Video/Wan2.1/blob/main/wan/modules/vae.py # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved. import torch import torch.nn as nn import torch.nn.functional as F from einops import rearrange from comfy.ldm.modules.diffusionmodules.model import vae_attention import comfy.ops ops = comfy.ops.disable_weight_init CACHE_T = 2 class CausalConv3d(ops.Conv3d): """ Causal 3d convolusion. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._padding = (self.padding[2], self.padding[2], self.padding[1], self.padding[1], 2 * self.padding[0], 0) self.padding = (0, 0, 0) def forward(self, x, cache_x=None): padding = list(self._padding) if cache_x is not None and self._padding[4] > 0: cache_x = cache_x.to(x.device) x = torch.cat([cache_x, x], dim=2) padding[4] -= cache_x.shape[2] x = F.pad(x, padding) return super().forward(x) class RMS_norm(nn.Module): def __init__(self, dim, channel_first=True, images=True, bias=False): super().__init__() broadcastable_dims = (1, 1, 1) if not images else (1, 1) shape = (dim, *broadcastable_dims) if channel_first else (dim,) self.channel_first = channel_first self.scale = dim**0.5 self.gamma = nn.Parameter(torch.ones(shape)) self.bias = nn.Parameter(torch.zeros(shape)) if bias else None def forward(self, x): return F.normalize( x, dim=(1 if self.channel_first else -1)) * self.scale * self.gamma.to(x) + (self.bias.to(x) if self.bias is not None else 0) class Upsample(nn.Upsample): def forward(self, x): """ Fix bfloat16 support for nearest neighbor interpolation. """ return super().forward(x.float()).type_as(x) class Resample(nn.Module): def __init__(self, dim, mode): assert mode in ('none', 'upsample2d', 'upsample3d', 'downsample2d', 'downsample3d') super().__init__() self.dim = dim self.mode = mode # layers if mode == 'upsample2d': self.resample = nn.Sequential( Upsample(scale_factor=(2., 2.), mode='nearest-exact'), ops.Conv2d(dim, dim // 2, 3, padding=1)) elif mode == 'upsample3d': self.resample = nn.Sequential( Upsample(scale_factor=(2., 2.), mode='nearest-exact'), ops.Conv2d(dim, dim // 2, 3, padding=1)) self.time_conv = CausalConv3d( dim, dim * 2, (3, 1, 1), padding=(1, 0, 0)) elif mode == 'downsample2d': self.resample = nn.Sequential( nn.ZeroPad2d((0, 1, 0, 1)), ops.Conv2d(dim, dim, 3, stride=(2, 2))) elif mode == 'downsample3d': self.resample = nn.Sequential( nn.ZeroPad2d((0, 1, 0, 1)), ops.Conv2d(dim, dim, 3, stride=(2, 2))) self.time_conv = CausalConv3d( dim, dim, (3, 1, 1), stride=(2, 1, 1), padding=(0, 0, 0)) else: self.resample = nn.Identity() def forward(self, x, feat_cache=None, feat_idx=[0]): b, c, t, h, w = x.size() if self.mode == 'upsample3d': if feat_cache is not None: idx = feat_idx[0] if feat_cache[idx] is None: feat_cache[idx] = 'Rep' feat_idx[0] += 1 else: cache_x = x[:, :, -CACHE_T:, :, :].clone() if cache_x.shape[2] < 2 and feat_cache[ idx] is not None and feat_cache[idx] != 'Rep': # cache last frame of last two chunk cache_x = torch.cat([ feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( cache_x.device), cache_x ], dim=2) if cache_x.shape[2] < 2 and feat_cache[ idx] is not None and feat_cache[idx] == 'Rep': cache_x = torch.cat([ torch.zeros_like(cache_x).to(cache_x.device), cache_x ], dim=2) if feat_cache[idx] == 'Rep': x = self.time_conv(x) else: x = self.time_conv(x, feat_cache[idx]) feat_cache[idx] = cache_x feat_idx[0] += 1 x = x.reshape(b, 2, c, t, h, w) x = torch.stack((x[:, 0, :, :, :, :], x[:, 1, :, :, :, :]), 3) x = x.reshape(b, c, t * 2, h, w) t = x.shape[2] x = rearrange(x, 'b c t h w -> (b t) c h w') x = self.resample(x) x = rearrange(x, '(b t) c h w -> b c t h w', t=t) if self.mode == 'downsample3d': if feat_cache is not None: idx = feat_idx[0] if feat_cache[idx] is None: feat_cache[idx] = x.clone() feat_idx[0] += 1 else: cache_x = x[:, :, -1:, :, :].clone() # if cache_x.shape[2] < 2 and feat_cache[idx] is not None and feat_cache[idx]!='Rep': # # cache last frame of last two chunk # cache_x = torch.cat([feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(cache_x.device), cache_x], dim=2) x = self.time_conv( torch.cat([feat_cache[idx][:, :, -1:, :, :], x], 2)) feat_cache[idx] = cache_x feat_idx[0] += 1 return x def init_weight(self, conv): conv_weight = conv.weight nn.init.zeros_(conv_weight) c1, c2, t, h, w = conv_weight.size() one_matrix = torch.eye(c1, c2) init_matrix = one_matrix nn.init.zeros_(conv_weight) #conv_weight.data[:,:,-1,1,1] = init_matrix * 0.5 conv_weight.data[:, :, 1, 0, 0] = init_matrix #* 0.5 conv.weight.data.copy_(conv_weight) nn.init.zeros_(conv.bias.data) def init_weight2(self, conv): conv_weight = conv.weight.data nn.init.zeros_(conv_weight) c1, c2, t, h, w = conv_weight.size() init_matrix = torch.eye(c1 // 2, c2) #init_matrix = repeat(init_matrix, 'o ... -> (o 2) ...').permute(1,0,2).contiguous().reshape(c1,c2) conv_weight[:c1 // 2, :, -1, 0, 0] = init_matrix conv_weight[c1 // 2:, :, -1, 0, 0] = init_matrix conv.weight.data.copy_(conv_weight) nn.init.zeros_(conv.bias.data) class ResidualBlock(nn.Module): def __init__(self, in_dim, out_dim, dropout=0.0): super().__init__() self.in_dim = in_dim self.out_dim = out_dim # layers self.residual = nn.Sequential( RMS_norm(in_dim, images=False), nn.SiLU(), CausalConv3d(in_dim, out_dim, 3, padding=1), RMS_norm(out_dim, images=False), nn.SiLU(), nn.Dropout(dropout), CausalConv3d(out_dim, out_dim, 3, padding=1)) self.shortcut = CausalConv3d(in_dim, out_dim, 1) \ if in_dim != out_dim else nn.Identity() def forward(self, x, feat_cache=None, feat_idx=[0]): h = self.shortcut(x) for layer in self.residual: if isinstance(layer, CausalConv3d) and feat_cache is not None: idx = feat_idx[0] cache_x = x[:, :, -CACHE_T:, :, :].clone() if cache_x.shape[2] < 2 and feat_cache[idx] is not None: # cache last frame of last two chunk cache_x = torch.cat([ feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( cache_x.device), cache_x ], dim=2) x = layer(x, feat_cache[idx]) feat_cache[idx] = cache_x feat_idx[0] += 1 else: x = layer(x) return x + h class AttentionBlock(nn.Module): """ Causal self-attention with a single head. """ def __init__(self, dim): super().__init__() self.dim = dim # layers self.norm = RMS_norm(dim) self.to_qkv = ops.Conv2d(dim, dim * 3, 1) self.proj = ops.Conv2d(dim, dim, 1) self.optimized_attention = vae_attention() def forward(self, x): identity = x b, c, t, h, w = x.size() x = rearrange(x, 'b c t h w -> (b t) c h w') x = self.norm(x) # compute query, key, value q, k, v = self.to_qkv(x).chunk(3, dim=1) x = self.optimized_attention(q, k, v) # output x = self.proj(x) x = rearrange(x, '(b t) c h w-> b c t h w', t=t) return x + identity class Encoder3d(nn.Module): def __init__(self, dim=128, z_dim=4, dim_mult=[1, 2, 4, 4], num_res_blocks=2, attn_scales=[], temperal_downsample=[True, True, False], dropout=0.0): super().__init__() self.dim = dim self.z_dim = z_dim self.dim_mult = dim_mult self.num_res_blocks = num_res_blocks self.attn_scales = attn_scales self.temperal_downsample = temperal_downsample # dimensions dims = [dim * u for u in [1] + dim_mult] scale = 1.0 # init block self.conv1 = CausalConv3d(3, dims[0], 3, padding=1) # downsample blocks downsamples = [] for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])): # residual (+attention) blocks for _ in range(num_res_blocks): downsamples.append(ResidualBlock(in_dim, out_dim, dropout)) if scale in attn_scales: downsamples.append(AttentionBlock(out_dim)) in_dim = out_dim # downsample block if i != len(dim_mult) - 1: mode = 'downsample3d' if temperal_downsample[ i] else 'downsample2d' downsamples.append(Resample(out_dim, mode=mode)) scale /= 2.0 self.downsamples = nn.Sequential(*downsamples) # middle blocks self.middle = nn.Sequential( ResidualBlock(out_dim, out_dim, dropout), AttentionBlock(out_dim), ResidualBlock(out_dim, out_dim, dropout)) # output blocks self.head = nn.Sequential( RMS_norm(out_dim, images=False), nn.SiLU(), CausalConv3d(out_dim, z_dim, 3, padding=1)) def forward(self, x, feat_cache=None, feat_idx=[0]): if feat_cache is not None: idx = feat_idx[0] cache_x = x[:, :, -CACHE_T:, :, :].clone() if cache_x.shape[2] < 2 and feat_cache[idx] is not None: # cache last frame of last two chunk cache_x = torch.cat([ feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( cache_x.device), cache_x ], dim=2) x = self.conv1(x, feat_cache[idx]) feat_cache[idx] = cache_x feat_idx[0] += 1 else: x = self.conv1(x) ## downsamples for layer in self.downsamples: if feat_cache is not None: x = layer(x, feat_cache, feat_idx) else: x = layer(x) ## middle for layer in self.middle: if isinstance(layer, ResidualBlock) and feat_cache is not None: x = layer(x, feat_cache, feat_idx) else: x = layer(x) ## head for layer in self.head: if isinstance(layer, CausalConv3d) and feat_cache is not None: idx = feat_idx[0] cache_x = x[:, :, -CACHE_T:, :, :].clone() if cache_x.shape[2] < 2 and feat_cache[idx] is not None: # cache last frame of last two chunk cache_x = torch.cat([ feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( cache_x.device), cache_x ], dim=2) x = layer(x, feat_cache[idx]) feat_cache[idx] = cache_x feat_idx[0] += 1 else: x = layer(x) return x class Decoder3d(nn.Module): def __init__(self, dim=128, z_dim=4, dim_mult=[1, 2, 4, 4], num_res_blocks=2, attn_scales=[], temperal_upsample=[False, True, True], dropout=0.0): super().__init__() self.dim = dim self.z_dim = z_dim self.dim_mult = dim_mult self.num_res_blocks = num_res_blocks self.attn_scales = attn_scales self.temperal_upsample = temperal_upsample # dimensions dims = [dim * u for u in [dim_mult[-1]] + dim_mult[::-1]] scale = 1.0 / 2**(len(dim_mult) - 2) # init block self.conv1 = CausalConv3d(z_dim, dims[0], 3, padding=1) # middle blocks self.middle = nn.Sequential( ResidualBlock(dims[0], dims[0], dropout), AttentionBlock(dims[0]), ResidualBlock(dims[0], dims[0], dropout)) # upsample blocks upsamples = [] for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])): # residual (+attention) blocks if i == 1 or i == 2 or i == 3: in_dim = in_dim // 2 for _ in range(num_res_blocks + 1): upsamples.append(ResidualBlock(in_dim, out_dim, dropout)) if scale in attn_scales: upsamples.append(AttentionBlock(out_dim)) in_dim = out_dim # upsample block if i != len(dim_mult) - 1: mode = 'upsample3d' if temperal_upsample[i] else 'upsample2d' upsamples.append(Resample(out_dim, mode=mode)) scale *= 2.0 self.upsamples = nn.Sequential(*upsamples) # output blocks self.head = nn.Sequential( RMS_norm(out_dim, images=False), nn.SiLU(), CausalConv3d(out_dim, 3, 3, padding=1)) def forward(self, x, feat_cache=None, feat_idx=[0]): ## conv1 if feat_cache is not None: idx = feat_idx[0] cache_x = x[:, :, -CACHE_T:, :, :].clone() if cache_x.shape[2] < 2 and feat_cache[idx] is not None: # cache last frame of last two chunk cache_x = torch.cat([ feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( cache_x.device), cache_x ], dim=2) x = self.conv1(x, feat_cache[idx]) feat_cache[idx] = cache_x feat_idx[0] += 1 else: x = self.conv1(x) ## middle for layer in self.middle: if isinstance(layer, ResidualBlock) and feat_cache is not None: x = layer(x, feat_cache, feat_idx) else: x = layer(x) ## upsamples for layer in self.upsamples: if feat_cache is not None: x = layer(x, feat_cache, feat_idx) else: x = layer(x) ## head for layer in self.head: if isinstance(layer, CausalConv3d) and feat_cache is not None: idx = feat_idx[0] cache_x = x[:, :, -CACHE_T:, :, :].clone() if cache_x.shape[2] < 2 and feat_cache[idx] is not None: # cache last frame of last two chunk cache_x = torch.cat([ feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to( cache_x.device), cache_x ], dim=2) x = layer(x, feat_cache[idx]) feat_cache[idx] = cache_x feat_idx[0] += 1 else: x = layer(x) return x def count_conv3d(model): count = 0 for m in model.modules(): if isinstance(m, CausalConv3d): count += 1 return count class WanVAE(nn.Module): def __init__(self, dim=128, z_dim=4, dim_mult=[1, 2, 4, 4], num_res_blocks=2, attn_scales=[], temperal_downsample=[True, True, False], dropout=0.0): super().__init__() self.dim = dim self.z_dim = z_dim self.dim_mult = dim_mult self.num_res_blocks = num_res_blocks self.attn_scales = attn_scales self.temperal_downsample = temperal_downsample self.temperal_upsample = temperal_downsample[::-1] # modules self.encoder = Encoder3d(dim, z_dim * 2, dim_mult, num_res_blocks, attn_scales, self.temperal_downsample, dropout) self.conv1 = CausalConv3d(z_dim * 2, z_dim * 2, 1) self.conv2 = CausalConv3d(z_dim, z_dim, 1) self.decoder = Decoder3d(dim, z_dim, dim_mult, num_res_blocks, attn_scales, self.temperal_upsample, dropout) def forward(self, x): mu, log_var = self.encode(x) z = self.reparameterize(mu, log_var) x_recon = self.decode(z) return x_recon, mu, log_var def encode(self, x): self.clear_cache() ## cache t = x.shape[2] iter_ = 1 + (t - 1) // 4 ## 对encode输入的x,按时间拆分为1、4、4、4.... for i in range(iter_): self._enc_conv_idx = [0] if i == 0: out = self.encoder( x[:, :, :1, :, :], feat_cache=self._enc_feat_map, feat_idx=self._enc_conv_idx) else: out_ = self.encoder( x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :], feat_cache=self._enc_feat_map, feat_idx=self._enc_conv_idx) out = torch.cat([out, out_], 2) mu, log_var = self.conv1(out).chunk(2, dim=1) self.clear_cache() return mu def decode(self, z): self.clear_cache() # z: [b,c,t,h,w] iter_ = z.shape[2] x = self.conv2(z) for i in range(iter_): self._conv_idx = [0] if i == 0: out = self.decoder( x[:, :, i:i + 1, :, :], feat_cache=self._feat_map, feat_idx=self._conv_idx) else: out_ = self.decoder( x[:, :, i:i + 1, :, :], feat_cache=self._feat_map, feat_idx=self._conv_idx) out = torch.cat([out, out_], 2) self.clear_cache() return out def reparameterize(self, mu, log_var): std = torch.exp(0.5 * log_var) eps = torch.randn_like(std) return eps * std + mu def sample(self, imgs, deterministic=False): mu, log_var = self.encode(imgs) if deterministic: return mu std = torch.exp(0.5 * log_var.clamp(-30.0, 20.0)) return mu + std * torch.randn_like(std) def clear_cache(self): self._conv_num = count_conv3d(self.decoder) self._conv_idx = [0] self._feat_map = [None] * self._conv_num #cache encode self._enc_conv_num = count_conv3d(self.encoder) self._enc_conv_idx = [0] self._enc_feat_map = [None] * self._enc_conv_num ================================================ FILE: web/js/RES4LYF_dynamicWidgets.js ================================================ import { app } from "../../scripts/app.js"; import { ComfyWidgets } from "../../scripts/widgets.js"; let RESDEBUG = false; let TOP_CLOWNDOG = true; let DISPLAY_CATEGORY = true; let nodeCounter = 1; const processedNodeMap = new WeakMap(); const originalGetNodeTypesCategories = typeof LiteGraph.getNodeTypesCategories === 'function' ? LiteGraph.getNodeTypesCategories : null; // Override the getNodeTypesCategories method if it exists if (originalGetNodeTypesCategories) { LiteGraph.getNodeTypesCategories = function(filter) { if (TOP_CLOWNDOG == false) { return originalGetNodeTypesCategories.call(this, filter); } try { // Get the original categories const categories = originalGetNodeTypesCategories.call(this, filter); categories.sort((a, b) => { const isARes4Lyf = a.startsWith("RES4LYF"); const isBRes4Lyf = b.startsWith("RES4LYF"); if (isARes4Lyf && !isBRes4Lyf) return -1; if (!isARes4Lyf && isBRes4Lyf) return 1; // Do the other auto sorting if enabled if (LiteGraph.auto_sort_node_types) { return a.localeCompare(b); } return 0; }); return categories; } catch (error) { return originalGetNodeTypesCategories.call(this, filter); } }; } function debugLog(...args) { let force = false; if (typeof args[args.length - 1] === "boolean") { force = args.pop(); } if (RESDEBUG || force) { console.log(...args); // Attempt to post the log text to the Python backend const logText = args.join(' '); fetch('/reslyf/log', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ log: logText }) }).catch(error => { console.error('Error posting log to backend:', error); }); } } const resDebugLog = debugLog; // Adapted from essentials.DisplayAny from ComfyUI_essentials app.registerExtension({ name: "Comfy.RES4LYF.DisplayInfo", async beforeRegisterNodeDef(nodeType, nodeData, app) { if (!nodeData?.category?.startsWith("RES4LYF")) { return; } if (nodeData.name === "Latent Display State Info") { const onExecuted = nodeType.prototype.onExecuted; nodeType.prototype.onExecuted = function (message) { onExecuted?.apply(this, arguments); if (this.widgets && this.widgets.length === 0) { for (let i = 1; i < this.widgets.length; i++) { this.widgets[i].onRemove?.(); } this.widgets.length = 0; } // Check if the "text" widget already exists. let textWidget = this.widgets && this.widgets.length > 0 && this.widgets.find(w => w.name === "displaytext"); if (!textWidget) { textWidget = ComfyWidgets["STRING"](this, "displaytext", ["STRING", { multiline: true }], app).widget; textWidget.inputEl.readOnly = true; textWidget.inputEl.style.border = "none"; textWidget.inputEl.style.backgroundColor = "transparent"; } textWidget.value = message["text"].join(""); }; } }, }); app.registerExtension({ name: "Comfy.RES4LYF.DynamicWidgets", async setup(app) { app.ui.settings.addSetting({ id: "RES4LYF.topClownDog", name: "RES4LYF: Top ClownDog", defaultValue: true, type: "boolean", options: [ { value: true, text: "On" }, { value: false, text: "Off" }, ], onChange: (value) => { TOP_CLOWNDOG = value; debugLog(`Top ClownDog ${value ? "enabled" : "disabled"}`); // Send to backend fetch('/reslyf/settings', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ setting: "topClownDog", value: value }) }).catch(error => { debugLog(`Error updating topClownDog setting: ${error}`); }); }, }); app.ui.settings.addSetting({ id: "RES4LYF.enableDebugLogs", name: "RES4LYF: Enable debug logging to console", defaultValue: false, type: "boolean", options: [ { value: true, text: "On" }, { value: false, text: "Off" }, ], onChange: (value) => { RESDEBUG = value; debugLog(`Debug logging ${value ? "enabled" : "disabled"}`); // Send to backend fetch('/reslyf/settings', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ setting: "enableDebugLogs", value: value }) }).catch(error => { debugLog(`Error updating enableDebugLogs setting: ${error}`); }); }, }); app.ui.settings.addSetting({ id: "RES4LYF.displayCategory", name: "RES4LYF: Display Category in Sampler Names (requires browser refresh)", defaultValue: true, type: "boolean", options: [ { value: true, text: "On" }, { value: false, text: "Off" }, ], onChange: (value) => { DISPLAY_CATEGORY = value; resDebugLog(`Display Category ${value ? "enabled" : "disabled"}`); // Send to backend fetch('/reslyf/settings', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ setting: "displayCategory", value: value }) }).catch(error => { resDebugLog(`Error updating displayCategory setting: ${error}`); }); }, }); }, nodeCreated(node) { if (NODES_WITH_EXPANDABLE_OPTIONS.includes(node.comfyClass)) { //debugLog(`Setting up expandable options for ${node.comfyClass}`, true); setupExpandableOptions(node); } } }); const NODES_WITH_EXPANDABLE_OPTIONS = [ "ClownsharKSampler_Beta", "ClownsharkChainsampler_Beta", "SharkChainsampler_Beta", "ClownSampler_Beta", "ClownSamplerAdvanced_Beta", "SharkSampler", "SharkSampler_Beta", "SharkSamplerAdvanced_Beta", "ClownOptions_Combine", ] function setupExpandableOptions(node) { if (!processedNodeMap.has(node)) { processedNodeMap.set(node, ++nodeCounter); //debugLog(`Assigned ID ${nodeCounter} to node ${node.comfyClass}`); } else { //debugLog(`Node ${node.comfyClass} already processed with ID ${processedNodeMap.get(node)} - skipping`); return; } const originalOnConnectionsChange = node.onConnectionsChange; const hasOptionsInput = node.inputs.some(input => input.name === "options"); if (!hasOptionsInput) { //debugLog(`Node ${node.comfyClass} doesn't have an options input - skipping`); return; } node.onConnectionsChange = function(type, index, connected, link_info) { if (originalOnConnectionsChange) { originalOnConnectionsChange.call(this, type, index, connected, link_info); } if (type === LiteGraph.INPUT && !connected) { const input = this.inputs[index]; if (!input || !input.name.startsWith("options")) { return; } //debugLog(`Options input disconnected: ${input.name}`); // setTimeout to let the graph update first setTimeout(() => { cleanupOptionsInputs(this); }, 100); return; } if (type === LiteGraph.INPUT && connected && link_info) { const input = this.inputs[index]; if (!input || !input.name.startsWith("options")) { return; } let hasEmptyOptions = false; for (let i = 0; i < this.inputs.length; i++) { const input = this.inputs[i]; if (input.name.startsWith("options") && input.link === null) { hasEmptyOptions = true; break; } } if (!hasEmptyOptions) { //debugLog(`All options inputs are connected, adding a new one`); // Find the highest index number in existing options inputs let maxIndex = 0; for (let i = 0; i < this.inputs.length; i++) { const input = this.inputs[i]; if (input.name === "options") { continue; // Skip the base "options" input } else if (input.name.startsWith("options ")) { const match = input.name.match(/options (\d+)/); if (match) { const index = parseInt(match[1]) - 1; maxIndex = Math.max(maxIndex, index); } } } const newName = maxIndex === 0 ? "options 2" : `options ${maxIndex + 2}`; this.addInput(newName, "OPTIONS"); //debugLog(`Created new options input: ${newName}`); this.setDirtyCanvas(true, true); } } }; const optionsInputs = node.inputs.filter(input => input.name.startsWith("options") ); const baseOptionsInput = optionsInputs.find(input => input.name === "options"); const hasOptionsWithIndex = optionsInputs.some(input => input.name !== "options"); // if (baseOptionsInput && !hasOptionsWithIndex) { // debugLog(`Adding initial options 1 input to ${node.comfyClass}`); // node.addInput("options 1", "OPTIONS"); // node.setDirtyCanvas(true, true); // } const originalOnConfigure = node.onConfigure; node.onConfigure = function(info) { if (originalOnConfigure) { originalOnConfigure.call(this, info); } let hasEmptyOptions = false; for (let i = 0; i < this.inputs.length; i++) { const input = this.inputs[i]; if (input.name.startsWith("options") && input.link === null) { hasEmptyOptions = true; break; } } if (!hasEmptyOptions && this.inputs.some(i => i.name.startsWith("options"))) { let maxIndex = 0; for (let i = 0; i < this.inputs.length; i++) { const input = this.inputs[i]; if (input.name === "options") { continue; } else if (input.name.startsWith("options ")) { const match = input.name.match(/options (\d+)/); if (match) { const index = parseInt(match[1]) - 1; maxIndex = Math.max(maxIndex, index); } } } const newName = maxIndex === 0 ? "options 2" : `options ${maxIndex + 2}`; this.addInput(newName, "OPTIONS"); } }; function cleanupOptionsInputs(node) { const optionsInputs = []; for (let i = 0; i < node.inputs.length; i++) { const input = node.inputs[i]; if (input.name.startsWith("options")) { optionsInputs.push({ index: i, name: input.name, connected: input.link !== null, isBase: input.name === "options" }); } } const baseInput = optionsInputs.find(info => info.isBase); const nonBaseInputs = optionsInputs.filter(info => !info.isBase); let needsRenumbering = false; if (baseInput && !baseInput.connected && nonBaseInputs.every(info => !info.connected)) { nonBaseInputs.sort((a, b) => b.index - a.index); for (const inputInfo of nonBaseInputs) { //debugLog(`Removing unnecessary options input: ${inputInfo.name} (index ${inputInfo.index})`); node.removeInput(inputInfo.index); needsRenumbering = true; } node.setDirtyCanvas(true, true); return; } const disconnectedInputs = nonBaseInputs.filter(info => !info.connected); if (disconnectedInputs.length > 1) { disconnectedInputs.sort((a, b) => b.index - a.index); for (let i = 1; i < disconnectedInputs.length; i++) { //debugLog(`Removing unnecessary options input: ${disconnectedInputs[i].name} (index ${disconnectedInputs[i].index})`); node.removeInput(disconnectedInputs[i].index); needsRenumbering = true; } } const hasConnectedOptions = optionsInputs.some(info => info.connected); const hasEmptyOptions = optionsInputs.some(info => !info.connected && !info.isBase); if (hasConnectedOptions && !hasEmptyOptions) { node.addInput("options temp", "OPTIONS"); //debugLog(`Added new empty options input`); needsRenumbering = true; } if (needsRenumbering) { renumberOptionsInputs(node); node.setDirtyCanvas(true, true); } } function renumberOptionsInputs(node) { const optionsInfo = []; for (let i = 0; i < node.inputs.length; i++) { const input = node.inputs[i]; if (input.name.startsWith("options")) { if (input.name === "options") { continue; } optionsInfo.push({ index: i, connected: input.link !== null, name: input.name }); } } optionsInfo.sort((a, b) => { if (a.connected !== b.connected) { return b.connected ? 1 : -1; // Connected inputs first } return a.index - b.index; }); for (let i = 0; i < optionsInfo.length; i++) { const inputInfo = optionsInfo[i]; const newName = `options ${i + 2}`; if (inputInfo.name !== newName) { //debugLog(`Renaming ${inputInfo.name} to ${newName}`); node.inputs[inputInfo.index].name = newName; } } } } ================================================ FILE: web/js/conditioningToBase64.js ================================================ import { app } from "../../../scripts/app.js"; import { ComfyWidgets } from "../../../scripts/widgets.js"; // Displays input text on a node app.registerExtension({ name: "res4lyf.ConditioningToBase64", async beforeRegisterNodeDef(nodeType, nodeData, app) { if (nodeData.name === "ConditioningToBase64") { function populate(text) { if (this.widgets) { for (let i = 1; i < this.widgets.length; i++) { this.widgets[i].onRemove?.(); } this.widgets.length = 1; } const v = [...text]; if (!v[0]) { v.shift(); } for (const list of v) { const w = ComfyWidgets["STRING"](this, "text2", ["STRING", { multiline: true }], app).widget; w.inputEl.readOnly = true; w.inputEl.style.opacity = 0.6; w.value = list; } requestAnimationFrame(() => { const sz = this.computeSize(); if (sz[0] < this.size[0]) { sz[0] = this.size[0]; } if (sz[1] < this.size[1]) { sz[1] = this.size[1]; } this.onResize?.(sz); app.graph.setDirtyCanvas(true, false); }); } // When the node is executed we will be sent the input text, display this in the widget const onExecuted = nodeType.prototype.onExecuted; nodeType.prototype.onExecuted = function (message) { onExecuted?.apply(this, arguments); populate.call(this, message.text); }; const onConfigure = nodeType.prototype.onConfigure; nodeType.prototype.onConfigure = function () { onConfigure?.apply(this, arguments); if (this.widgets_values?.length) { populate.call(this, this.widgets_values.slice(+this.widgets_values.length > 1)); } }; } }, }); ================================================ FILE: web/js/res4lyf.default.json ================================================ { "name": "RES4LYF", "topClownDog": true, "enableDebugLogs": false, "displayCategory": true }