Showing preview only (1,389K chars total). Download the full file or copy to clipboard to get everything.
Repository: CASIA-LMC-Lab/FastSAM
Branch: main
Commit: b4ed20c2fed7
Files: 179
Total size: 1.3 MB
Directory structure:
gitextract_nx_m3n2d/
├── .gitignore
├── Inference.py
├── LICENSE
├── MORE_USAGES.md
├── README.md
├── app_gradio.py
├── cog.yaml
├── fastsam/
│ ├── __init__.py
│ ├── decoder.py
│ ├── model.py
│ ├── predict.py
│ ├── prompt.py
│ └── utils.py
├── predict.py
├── requirements.txt
├── segpredict.py
├── setup.py
├── ultralytics/
│ ├── .pre-commit-config.yaml
│ ├── __init__.py
│ ├── datasets/
│ │ ├── Argoverse.yaml
│ │ ├── GlobalWheat2020.yaml
│ │ ├── ImageNet.yaml
│ │ ├── Objects365.yaml
│ │ ├── SKU-110K.yaml
│ │ ├── VOC.yaml
│ │ ├── VisDrone.yaml
│ │ ├── coco-pose.yaml
│ │ ├── coco.yaml
│ │ ├── coco128-seg.yaml
│ │ ├── coco128.yaml
│ │ ├── coco8-pose.yaml
│ │ ├── coco8-seg.yaml
│ │ ├── coco8.yaml
│ │ └── xView.yaml
│ ├── hub/
│ │ ├── __init__.py
│ │ ├── auth.py
│ │ ├── session.py
│ │ └── utils.py
│ ├── models/
│ │ ├── README.md
│ │ ├── rt-detr/
│ │ │ ├── rtdetr-l.yaml
│ │ │ └── rtdetr-x.yaml
│ │ ├── v3/
│ │ │ ├── yolov3-spp.yaml
│ │ │ ├── yolov3-tiny.yaml
│ │ │ └── yolov3.yaml
│ │ ├── v5/
│ │ │ ├── yolov5-p6.yaml
│ │ │ └── yolov5.yaml
│ │ ├── v6/
│ │ │ └── yolov6.yaml
│ │ └── v8/
│ │ ├── yolov8-cls.yaml
│ │ ├── yolov8-p2.yaml
│ │ ├── yolov8-p6.yaml
│ │ ├── yolov8-pose-p6.yaml
│ │ ├── yolov8-pose.yaml
│ │ ├── yolov8-rtdetr.yaml
│ │ ├── yolov8-seg.yaml
│ │ └── yolov8.yaml
│ ├── nn/
│ │ ├── __init__.py
│ │ ├── autobackend.py
│ │ ├── autoshape.py
│ │ ├── modules/
│ │ │ ├── __init__.py
│ │ │ ├── block.py
│ │ │ ├── conv.py
│ │ │ ├── head.py
│ │ │ ├── transformer.py
│ │ │ └── utils.py
│ │ └── tasks.py
│ ├── tracker/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── cfg/
│ │ │ ├── botsort.yaml
│ │ │ └── bytetrack.yaml
│ │ ├── track.py
│ │ ├── trackers/
│ │ │ ├── __init__.py
│ │ │ ├── basetrack.py
│ │ │ ├── bot_sort.py
│ │ │ └── byte_tracker.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── gmc.py
│ │ ├── kalman_filter.py
│ │ └── matching.py
│ ├── vit/
│ │ ├── __init__.py
│ │ ├── rtdetr/
│ │ │ ├── __init__.py
│ │ │ ├── model.py
│ │ │ ├── predict.py
│ │ │ ├── train.py
│ │ │ └── val.py
│ │ ├── sam/
│ │ │ ├── __init__.py
│ │ │ ├── amg.py
│ │ │ ├── autosize.py
│ │ │ ├── build.py
│ │ │ ├── model.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── decoders.py
│ │ │ │ ├── encoders.py
│ │ │ │ ├── mask_generator.py
│ │ │ │ ├── prompt_predictor.py
│ │ │ │ ├── sam.py
│ │ │ │ └── transformer.py
│ │ │ └── predict.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── loss.py
│ │ └── ops.py
│ └── yolo/
│ ├── __init__.py
│ ├── cfg/
│ │ ├── __init__.py
│ │ └── default.yaml
│ ├── data/
│ │ ├── __init__.py
│ │ ├── annotator.py
│ │ ├── augment.py
│ │ ├── base.py
│ │ ├── build.py
│ │ ├── converter.py
│ │ ├── dataloaders/
│ │ │ ├── __init__.py
│ │ │ ├── stream_loaders.py
│ │ │ ├── v5augmentations.py
│ │ │ └── v5loader.py
│ │ ├── dataset.py
│ │ ├── dataset_wrappers.py
│ │ ├── scripts/
│ │ │ ├── download_weights.sh
│ │ │ ├── get_coco.sh
│ │ │ ├── get_coco128.sh
│ │ │ └── get_imagenet.sh
│ │ └── utils.py
│ ├── engine/
│ │ ├── __init__.py
│ │ ├── exporter.py
│ │ ├── model.py
│ │ ├── predictor.py
│ │ ├── results.py
│ │ ├── trainer.py
│ │ └── validator.py
│ ├── nas/
│ │ ├── __init__.py
│ │ ├── model.py
│ │ ├── predict.py
│ │ └── val.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── autobatch.py
│ │ ├── benchmarks.py
│ │ ├── callbacks/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── clearml.py
│ │ │ ├── comet.py
│ │ │ ├── dvc.py
│ │ │ ├── hub.py
│ │ │ ├── mlflow.py
│ │ │ ├── neptune.py
│ │ │ ├── raytune.py
│ │ │ ├── tensorboard.py
│ │ │ └── wb.py
│ │ ├── checks.py
│ │ ├── dist.py
│ │ ├── downloads.py
│ │ ├── errors.py
│ │ ├── files.py
│ │ ├── instance.py
│ │ ├── loss.py
│ │ ├── metrics.py
│ │ ├── ops.py
│ │ ├── patches.py
│ │ ├── plotting.py
│ │ ├── tal.py
│ │ ├── torch_utils.py
│ │ └── tuner.py
│ └── v8/
│ ├── __init__.py
│ ├── classify/
│ │ ├── __init__.py
│ │ ├── predict.py
│ │ ├── train.py
│ │ └── val.py
│ ├── detect/
│ │ ├── __init__.py
│ │ ├── predict.py
│ │ ├── train.py
│ │ └── val.py
│ ├── pose/
│ │ ├── __init__.py
│ │ ├── predict.py
│ │ ├── train.py
│ │ └── val.py
│ └── segment/
│ ├── __init__.py
│ ├── predict.py
│ ├── train.py
│ └── val.py
└── utils/
├── __init__.py
├── tools.py
└── tools_gradio.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.pyc
*.pyo
*.pyd
.DS_Store
.idea
weights
build/
*.egg-info/
gradio_cached_examples
================================================
FILE: Inference.py
================================================
import argparse
from fastsam import FastSAM, FastSAMPrompt
import ast
import torch
from PIL import Image
from utils.tools import convert_box_xywh_to_xyxy
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_path", type=str, default="./weights/FastSAM.pt", help="model"
)
parser.add_argument(
"--img_path", type=str, default="./images/dogs.jpg", help="path to image file"
)
parser.add_argument("--imgsz", type=int, default=1024, help="image size")
parser.add_argument(
"--iou",
type=float,
default=0.9,
help="iou threshold for filtering the annotations",
)
parser.add_argument(
"--text_prompt", type=str, default=None, help='use text prompt eg: "a dog"'
)
parser.add_argument(
"--conf", type=float, default=0.4, help="object confidence threshold"
)
parser.add_argument(
"--output", type=str, default="./output/", help="image save path"
)
parser.add_argument(
"--randomcolor", type=bool, default=True, help="mask random color"
)
parser.add_argument(
"--point_prompt", type=str, default="[[0,0]]", help="[[x1,y1],[x2,y2]]"
)
parser.add_argument(
"--point_label",
type=str,
default="[0]",
help="[1,0] 0:background, 1:foreground",
)
parser.add_argument("--box_prompt", type=str, default="[[0,0,0,0]]", help="[[x,y,w,h],[x2,y2,w2,h2]] support multiple boxes")
parser.add_argument(
"--better_quality",
type=str,
default=False,
help="better quality using morphologyEx",
)
device = torch.device(
"cuda"
if torch.cuda.is_available()
else "mps"
if torch.backends.mps.is_available()
else "cpu"
)
parser.add_argument(
"--device", type=str, default=device, help="cuda:[0,1,2,3,4] or cpu"
)
parser.add_argument(
"--retina",
type=bool,
default=True,
help="draw high-resolution segmentation masks",
)
parser.add_argument(
"--withContours", type=bool, default=False, help="draw the edges of the masks"
)
return parser.parse_args()
def main(args):
# load model
model = FastSAM(args.model_path)
args.point_prompt = ast.literal_eval(args.point_prompt)
args.box_prompt = convert_box_xywh_to_xyxy(ast.literal_eval(args.box_prompt))
args.point_label = ast.literal_eval(args.point_label)
input = Image.open(args.img_path)
input = input.convert("RGB")
everything_results = model(
input,
device=args.device,
retina_masks=args.retina,
imgsz=args.imgsz,
conf=args.conf,
iou=args.iou
)
bboxes = None
points = None
point_label = None
prompt_process = FastSAMPrompt(input, everything_results, device=args.device)
if args.box_prompt[0][2] != 0 and args.box_prompt[0][3] != 0:
ann = prompt_process.box_prompt(bboxes=args.box_prompt)
bboxes = args.box_prompt
elif args.text_prompt != None:
ann = prompt_process.text_prompt(text=args.text_prompt)
elif args.point_prompt[0] != [0, 0]:
ann = prompt_process.point_prompt(
points=args.point_prompt, pointlabel=args.point_label
)
points = args.point_prompt
point_label = args.point_label
else:
ann = prompt_process.everything_prompt()
prompt_process.plot(
annotations=ann,
output_path=args.output+args.img_path.split("/")[-1],
bboxes = bboxes,
points = points,
point_label = point_label,
withContours=args.withContours,
better_quality=args.better_quality,
)
if __name__ == "__main__":
args = parse_args()
main(args)
================================================
FILE: LICENSE
================================================
GNU AFFERO GENERAL PUBLIC LICENSE
Version 3, 19 November 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.
A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate. Many developers of free software are heartened and
encouraged by the resulting cooperation. However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.
The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community. It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server. Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.
An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals. This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU Affero General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Remote Network Interaction; Use with the GNU General Public License.
Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software. This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time. Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If your software can interact with users remotely through a computer
network, you should also make sure that it provides a way for users to
get its source. For example, if your program is a web application, its
interface could display a "Source" link that leads users to an archive
of the code. There are many ways you could offer source, and different
solutions will be better for different programs; see section 13 for the
specific requirements.
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<https://www.gnu.org/licenses/>.
================================================
FILE: MORE_USAGES.md
================================================
# MORE_USAGES
### Everything mode
Use --imgsz to change different input sizes.
```shell
python Inference.py --model_path ./weights/FastSAM.pt \
--img_path ./images/dogs.jpg \
--imgsz 720 \
```

### Use more points
p
```shell
python Inference.py --model_path ./weights/FastSAM.pt \
--img_path ./images/dogs.jpg \
--point_prompt "[[520,360],[620,300],[520,300],[620,360]]" \
--point_label "[1,0,1,0]"
```

### draw mask edge
Use `--withContours True` to draw the edge of the mask.
When `--better_quality True` is set, the edge will be more smooth.
```shell
python Inference.py --model_path ./weights/FastSAM.pt \
--img_path ./images/dogs.jpg \
--point_prompt "[[620,360]]" \
--point_label "[1]" \
--withContours True \
--better_quality True
```

### use box prompt
Use `--box_prompt [x,y,w,h]` to specify the bounding box of the foreground object
```shell
python Inference.py --model_path ./weights/FastSAM.pt \
--img_path ./images/dogs.jpg \
--box_prompt "[[570,200,230,400]]"
```

### use text prompt
Use `--text_prompt "text"` to specify the text prompt
```shell
python Inference.py --model_path ./weights/FastSAM.pt \
--img_path ./images/cat.jpg \
--text_prompt "cat" \
--better_quality True \
--withContours True
```

================================================
FILE: README.md
================================================

# Fast Segment Anything
[[`📕Paper`](https://arxiv.org/pdf/2306.12156.pdf)] [[`🤗HuggingFace Demo`](https://huggingface.co/spaces/An-619/FastSAM)] [[`Colab demo`](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing)] [[`Replicate demo & API`](https://replicate.com/casia-iva-lab/fastsam)] [~~[`OpenXLab Demo`](https://openxlab.org.cn/apps/detail/zxair/FastSAM)~~] [[`Model Zoo`](#model-checkpoints)] [[`BibTeX`](#citing-fastsam)] [[`Video Demo`](https://youtu.be/yHNPyqazYYU)]

The **Fast Segment Anything Model(FastSAM)** is a CNN Segment Anything Model trained using only 2% of the SA-1B dataset published by SAM authors. FastSAM achieves comparable performance with
the SAM method at **50× higher run-time speed**.

**🍇 Updates**
- **`2024/6/25`** The edge jaggies issue has been slightly improved [#231](https://github.com/CASIA-IVA-Lab/FastSAM/pull/231), and the strategy has also been synchronized to the ultralytics project[#13939](https://github.com/ultralytics/ultralytics/pull/13939),[#13912](https://github.com/ultralytics/ultralytics/pull/13912). The [huggingface demo](https://huggingface.co/spaces/An-619/FastSAM) is updated.
- **`2023/11/28`** Recommendation: [Semantic FastSAM](https://github.com/KBH00/Semantic-Fast-SAM), which add the semantic class labels to FastSAM. Thanks to [KBH00](https://github.com/KBH00/Semantic-Fast-SAM) for this valuable contribution.
- **`2023/09/11`** Release [Training and Validation Code](https://github.com/CASIA-IVA-Lab/FastSAM/releases).
- **`2023/08/17`** Release [OpenXLab Demo](https://openxlab.org.cn/apps/detail/zxair/FastSAM). Thanks to OpenXLab Team for help.
- **`2023/07/06`** Added to [Ultralytics (YOLOv8) Model Hub](https://docs.ultralytics.com/models/fast-sam/). Thanks to [Ultralytics](https://github.com/ultralytics/ultralytics) for help 🌹.
- **`2023/06/29`** Support [text mode](https://huggingface.co/spaces/An-619/FastSAM) in HuggingFace Space. Thanks a lot to [gaoxinge](https://github.com/gaoxinge) for help 🌹.
- **`2023/06/29`** Release [FastSAM_Awesome_TensorRT](https://github.com/ChuRuaNh0/FastSam_Awsome_TensorRT). Thanks a lot to [ChuRuaNh0](https://github.com/ChuRuaNh0) for providing the TensorRT model of FastSAM 🌹.
- **`2023/06/26`** Release [FastSAM Replicate Online Demo](https://replicate.com/casia-iva-lab/fastsam). Thanks a lot to [Chenxi](https://chenxwh.github.io/) for providing this nice demo 🌹.
- **`2023/06/26`** Support [points mode](https://huggingface.co/spaces/An-619/FastSAM) in HuggingFace Space. Better and faster interaction will come soon!
- **`2023/06/24`** Thanks a lot to [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything) for Combining Grounding-DINO with FastSAM in [Grounded-FastSAM](https://github.com/IDEA-Research/Grounded-Segment-Anything/tree/main/EfficientSAM) 🌹.
## Installation
Clone the repository locally:
```shell
git clone https://github.com/CASIA-IVA-Lab/FastSAM.git
```
Create the conda env. The code requires `python>=3.7`, as well as `pytorch>=1.7` and `torchvision>=0.8`. Please follow the instructions [here](https://pytorch.org/get-started/locally/) to install both PyTorch and TorchVision dependencies. Installing both PyTorch and TorchVision with CUDA support is strongly recommended.
```shell
conda create -n FastSAM python=3.9
conda activate FastSAM
```
Install the packages:
```shell
cd FastSAM
pip install -r requirements.txt
```
Install CLIP(Required if the text prompt is being tested.):
```shell
pip install git+https://github.com/openai/CLIP.git
```
## <a name="GettingStarted"></a> Getting Started
First download a [model checkpoint](#model-checkpoints).
Then, you can run the scripts to try the everything mode and three prompt modes.
```shell
# Everything mode
python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg
```
```shell
# Text prompt
python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --text_prompt "the yellow dog"
```
```shell
# Box prompt (xywh)
python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --box_prompt "[[570,200,230,400]]"
```
```shell
# Points prompt
python Inference.py --model_path ./weights/FastSAM.pt --img_path ./images/dogs.jpg --point_prompt "[[520,360],[620,300]]" --point_label "[1,0]"
```
You can use the following code to generate all masks and visualize the results.
```shell
from fastsam import FastSAM, FastSAMPrompt
model = FastSAM('./weights/FastSAM.pt')
IMAGE_PATH = './images/dogs.jpg'
DEVICE = 'cpu'
everything_results = model(IMAGE_PATH, device=DEVICE, retina_masks=True, imgsz=1024, conf=0.4, iou=0.9,)
prompt_process = FastSAMPrompt(IMAGE_PATH, everything_results, device=DEVICE)
# everything prompt
ann = prompt_process.everything_prompt()
prompt_process.plot(annotations=ann,output_path='./output/dog.jpg',)
```
For point/box/text mode prompts, use:
```
# bbox default shape [0,0,0,0] -> [x1,y1,x2,y2]
ann = prompt_process.box_prompt(bboxes=[[200, 200, 300, 300]])
# text prompt
ann = prompt_process.text_prompt(text='a photo of a dog')
# point prompt
# points default [[0,0]] [[x1,y1],[x2,y2]]
# point_label default [0] [1,0] 0:background, 1:foreground
ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
prompt_process.plot(annotations=ann,output_path='./output/dog.jpg',)
```
You are also welcomed to try our Colab demo: [FastSAM_example.ipynb](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing).
## Different Inference Options
We provide various options for different purposes, details are in [MORE_USAGES.md](MORE_USAGES.md).
## Training or Validation
Training from scratch or validation: [Training and Validation Code](https://github.com/CASIA-IVA-Lab/FastSAM/releases).
## Web demo
### Gradio demo
- We also provide a UI for testing our method that is built with gradio. You can upload a custom image, select the mode and set the parameters, click the segment button, and get a satisfactory segmentation result. Currently, the UI supports interaction with the 'Everything mode' and 'points mode'. We plan to add support for additional modes in the future. Running the following command in a terminal will launch the demo:
```
# Download the pre-trained model in "./weights/FastSAM.pt"
python app_gradio.py
```
- This demo is also hosted on [HuggingFace Space](https://huggingface.co/spaces/An-619/FastSAM).
 
### Replicate demo
- [Replicate demo](https://replicate.com/casia-iva-lab/fastsam) has supported all modes, you can experience points/box/text mode.
  
## <a name="Models"></a>Model Checkpoints
Two model versions of the model are available with different sizes. Click the links below to download the checkpoint for the corresponding model type.
- **`default` or `FastSAM`: [YOLOv8x based Segment Anything Model](https://drive.google.com/file/d/1m1sjY4ihXBU1fZXdQ-Xdj-mDltW-2Rqv/view?usp=sharing) | [Baidu Cloud (pwd: 0000).](https://pan.baidu.com/s/18KzBmOTENjByoWWR17zdiQ?pwd=0000)**
- `FastSAM-s`: [YOLOv8s based Segment Anything Model.](https://drive.google.com/file/d/10XmSj6mmpmRb8NhXbtiuO9cTTBwR_9SV/view?usp=sharing)
## Results
All result were tested on a single NVIDIA GeForce RTX 3090.
### 1. Inference time
Running Speed under Different Point Prompt Numbers(ms).
| method | params | 1 | 10 | 100 | E(16x16) | E(32x32\*) | E(64x64) |
|:------------------:|:--------:|:-----:|:-----:|:-----:|:----------:|:-----------:|:----------:|
| SAM-H | 0.6G | 446 | 464 | 627 | 852 | 2099 | 6972 |
| SAM-B | 136M | 110 | 125 | 230 | 432 | 1383 | 5417 |
| FastSAM | 68M | 40 |40 | 40 | 40 | 40 | 40 |
### 2. Memory usage
| Dataset | Method | GPU Memory (MB) |
| :-------: | :-----: | :-------------: |
| COCO 2017 | FastSAM | 2608 |
| COCO 2017 | SAM-H | 7060 |
| COCO 2017 | SAM-B | 4670 |
### 3. Zero-shot Transfer Experiments
#### Edge Detection
Test on the BSDB500 dataset.
|method | year| ODS | OIS | AP | R50 |
|:----------:|:-------:|:--------:|:--------:|:------:|:-----:|
| HED | 2015| .788 | .808 | .840 | .923 |
| SAM | 2023| .768 | .786 | .794 | .928 |
| FastSAM | 2023| .750 | .790 | .793 | .903 |
#### Object Proposals
##### COCO
| method | AR10 | AR100 | AR1000 | AUC |
| :-------: | :--: | :---: | :-----: | :--: |
| SAM-H E64 | 15.5 | 45.6 | 67.7 | 32.1 |
| SAM-H E32 | 18.5 | 49.5 | 62.5 | 33.7 |
| SAM-B E32 | 11.4 | 39.6 | 59.1 | 27.3 |
| FastSAM | 15.7 | 47.3 | 63.7 | 32.2 |
##### LVIS
bbox AR@1000
| method | all | small | med. | large |
|:---------------:|:-----:|:------:|:-----:|:------:|
| ViTDet-H | 65.0 | 53.2 | 83.3 | 91.2 |
zero-shot transfer methods
| SAM-H E64 | 52.1 | 36.6 | 75.1 | 88.2 |
| SAM-H E32 | 50.3 | 33.1 | 76.2 | 89.8 |
| SAM-B E32 | 45.0 | 29.3 | 68.7 | 80.6 |
| FastSAM | 57.1 | 44.3 | 77.1 | 85.3 |
#### Instance Segmentation On COCO 2017
| method | AP | APS | APM | APL |
| :------: | :--: | :--: | :--: | :--: |
| ViTDet-H | .510 | .320 | .543 | .689 |
| SAM | .465 | .308 | .510 | .617 |
| FastSAM | .379 | .239 | .434 | .500 |
### 4. Performance Visualization
Several segmentation results:
#### Natural Images

#### Text to Mask

### 5.Downstream tasks
The results of several downstream tasks to show the effectiveness.
#### Anomaly Detection

#### Salient Object Detection

#### Building Extracting

## License
The model is licensed under the [Apache 2.0 license](LICENSE).
## Acknowledgement
- [Segment Anything](https://segment-anything.com/) provides the SA-1B dataset and the base codes.
- [YOLOv8](https://github.com/ultralytics/ultralytics) provides codes and pre-trained models.
- [YOLACT](https://arxiv.org/abs/2112.10003) provides powerful instance segmentation method.
- [Grounded-Segment-Anything](https://huggingface.co/spaces/yizhangliu/Grounded-Segment-Anything) provides a useful web demo template.
## Contributors
Our project wouldn't be possible without the contributions of these amazing people! Thank you all for making this project better.
<a href="https://github.com/CASIA-IVA-Lab/FastSAM/graphs/contributors">
<img src="https://contrib.rocks/image?repo=CASIA-IVA-Lab/FastSAM" />
</a>
## Citing FastSAM
If you find this project useful for your research, please consider citing the following BibTeX entry.
```
@misc{zhao2023fast,
title={Fast Segment Anything},
author={Xu Zhao and Wenchao Ding and Yongqi An and Yinglong Du and Tao Yu and Min Li and Ming Tang and Jinqiao Wang},
year={2023},
eprint={2306.12156},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
[](https://star-history.com/#CASIA-IVA-Lab/FastSAM&Date)
================================================
FILE: app_gradio.py
================================================
from ultralytics import YOLO
import gradio as gr
import torch
from utils.tools_gradio import fast_process
from utils.tools import format_results, box_prompt, point_prompt, text_prompt
from PIL import ImageDraw
import numpy as np
# Load the pre-trained model
model = YOLO('./weights/FastSAM.pt')
device = torch.device(
"cuda"
if torch.cuda.is_available()
else "mps"
if torch.backends.mps.is_available()
else "cpu"
)
# Description
title = "<center><strong><font size='8'>🏃 Fast Segment Anything 🤗</font></strong></center>"
news = """ # 📖 News
🔥 2023/07/14: Add a "wider result" button in text mode (Thanks for [gaoxinge](https://github.com/CASIA-IVA-Lab/FastSAM/pull/95)).
🔥 2023/06/29: Support the text mode (Thanks for [gaoxinge](https://github.com/CASIA-IVA-Lab/FastSAM/pull/47)).
🔥 2023/06/26: Support the points mode. (Better and faster interaction will come soon!)
🔥 2023/06/24: Add the 'Advanced options" in Everything mode to get a more detailed adjustment.
"""
description_e = """This is a demo on Github project 🏃 [Fast Segment Anything Model](https://github.com/CASIA-IVA-Lab/FastSAM). Welcome to give a star ⭐️ to it.
🎯 Upload an Image, segment it with Fast Segment Anything (Everything mode). The other modes will come soon.
⌛️ It takes about 6~ seconds to generate segment results. The concurrency_count of queue is 1, please wait for a moment when it is crowded.
🚀 To get faster results, you can use a smaller input size and leave high_visual_quality unchecked.
📣 You can also obtain the segmentation results of any Image through this Colab: [](https://colab.research.google.com/drive/1oX14f6IneGGw612WgVlAiy91UHwFAvr9?usp=sharing)
😚 A huge thanks goes out to the @HuggingFace Team for supporting us with GPU grant.
🏠 Check out our [Model Card 🏃](https://huggingface.co/An-619/FastSAM)
"""
description_p = """ # 🎯 Instructions for points mode
This is a demo on Github project 🏃 [Fast Segment Anything Model](https://github.com/CASIA-IVA-Lab/FastSAM). Welcome to give a star ⭐️ to it.
1. Upload an image or choose an example.
2. Choose the point label ('Add mask' means a positive point. 'Remove' Area means a negative point that is not segmented).
3. Add points one by one on the image.
4. Click the 'Segment with points prompt' button to get the segmentation results.
**5. If you get Error, click the 'Clear points' button and try again may help.**
"""
examples = [["examples/sa_8776.jpg"], ["examples/sa_414.jpg"], ["examples/sa_1309.jpg"], ["examples/sa_11025.jpg"],
["examples/sa_561.jpg"], ["examples/sa_192.jpg"], ["examples/sa_10039.jpg"], ["examples/sa_862.jpg"]]
default_example = examples[0]
css = "h1 { text-align: center } .about { text-align: justify; padding-left: 10%; padding-right: 10%; }"
def segment_everything(
input,
input_size=1024,
iou_threshold=0.7,
conf_threshold=0.25,
better_quality=False,
withContours=True,
use_retina=True,
text="",
wider=False,
mask_random_color=True,
):
input_size = int(input_size) # 确保 imgsz 是整数
# Thanks for the suggestion by hysts in HuggingFace.
w, h = input.size
scale = input_size / max(w, h)
new_w = int(w * scale)
new_h = int(h * scale)
input = input.resize((new_w, new_h))
results = model(input,
device=device,
retina_masks=True,
iou=iou_threshold,
conf=conf_threshold,
imgsz=input_size,)
if len(text) > 0:
results = format_results(results[0], 0)
annotations, _ = text_prompt(results, text, input, device=device, wider=wider)
annotations = np.array([annotations])
else:
annotations = results[0].masks.data
fig = fast_process(annotations=annotations,
image=input,
device=device,
scale=(1024 // input_size),
better_quality=better_quality,
mask_random_color=mask_random_color,
bbox=None,
use_retina=use_retina,
withContours=withContours,)
return fig
def segment_with_points(
input,
input_size=1024,
iou_threshold=0.7,
conf_threshold=0.25,
better_quality=False,
withContours=True,
use_retina=True,
mask_random_color=True,
):
global global_points
global global_point_label
input_size = int(input_size) # 确保 imgsz 是整数
# Thanks for the suggestion by hysts in HuggingFace.
w, h = input.size
scale = input_size / max(w, h)
new_w = int(w * scale)
new_h = int(h * scale)
input = input.resize((new_w, new_h))
scaled_points = [[int(x * scale) for x in point] for point in global_points]
results = model(input,
device=device,
retina_masks=True,
iou=iou_threshold,
conf=conf_threshold,
imgsz=input_size,)
results = format_results(results[0], 0)
annotations, _ = point_prompt(results, scaled_points, global_point_label, new_h, new_w)
annotations = np.array([annotations])
fig = fast_process(annotations=annotations,
image=input,
device=device,
scale=(1024 // input_size),
better_quality=better_quality,
mask_random_color=mask_random_color,
bbox=None,
use_retina=use_retina,
withContours=withContours,)
global_points = []
global_point_label = []
return fig, None
def get_points_with_draw(image, label, evt: gr.SelectData):
global global_points
global global_point_label
x, y = evt.index[0], evt.index[1]
point_radius, point_color = 15, (255, 255, 0) if label == 'Add Mask' else (255, 0, 255)
global_points.append([x, y])
global_point_label.append(1 if label == 'Add Mask' else 0)
print(x, y, label == 'Add Mask')
# 创建一个可以在图像上绘图的对象
draw = ImageDraw.Draw(image)
draw.ellipse([(x - point_radius, y - point_radius), (x + point_radius, y + point_radius)], fill=point_color)
return image
cond_img_e = gr.Image(label="Input", value=default_example[0], type='pil')
cond_img_p = gr.Image(label="Input with points", value=default_example[0], type='pil')
cond_img_t = gr.Image(label="Input with text", value="examples/dogs.jpg", type='pil')
segm_img_e = gr.Image(label="Segmented Image", interactive=False, type='pil')
segm_img_p = gr.Image(label="Segmented Image with points", interactive=False, type='pil')
segm_img_t = gr.Image(label="Segmented Image with text", interactive=False, type='pil')
global_points = []
global_point_label = []
input_size_slider = gr.components.Slider(minimum=512,
maximum=1024,
value=1024,
step=64,
label='Input_size',
info='Our model was trained on a size of 1024')
with gr.Blocks(css=css, title='Fast Segment Anything') as demo:
with gr.Row():
with gr.Column(scale=1):
# Title
gr.Markdown(title)
with gr.Column(scale=1):
# News
gr.Markdown(news)
with gr.Tab("Everything mode"):
# Images
with gr.Row(variant="panel"):
with gr.Column(scale=1):
cond_img_e.render()
with gr.Column(scale=1):
segm_img_e.render()
# Submit & Clear
with gr.Row():
with gr.Column():
input_size_slider.render()
with gr.Row():
contour_check = gr.Checkbox(value=True, label='withContours', info='draw the edges of the masks')
with gr.Column():
segment_btn_e = gr.Button("Segment Everything", variant='primary')
clear_btn_e = gr.Button("Clear", variant="secondary")
gr.Markdown("Try some of the examples below ⬇️")
gr.Examples(examples=examples,
inputs=[cond_img_e],
outputs=segm_img_e,
fn=segment_everything,
cache_examples=True,
examples_per_page=4)
with gr.Column():
with gr.Accordion("Advanced options", open=False):
iou_threshold = gr.Slider(0.1, 0.9, 0.7, step=0.1, label='iou', info='iou threshold for filtering the annotations')
conf_threshold = gr.Slider(0.1, 0.9, 0.25, step=0.05, label='conf', info='object confidence threshold')
with gr.Row():
mor_check = gr.Checkbox(value=False, label='better_visual_quality', info='better quality using morphologyEx')
with gr.Column():
retina_check = gr.Checkbox(value=True, label='use_retina', info='draw high-resolution segmentation masks')
# Description
gr.Markdown(description_e)
segment_btn_e.click(segment_everything,
inputs=[
cond_img_e,
input_size_slider,
iou_threshold,
conf_threshold,
mor_check,
contour_check,
retina_check,
],
outputs=segm_img_e)
with gr.Tab("Points mode"):
# Images
with gr.Row(variant="panel"):
with gr.Column(scale=1):
cond_img_p.render()
with gr.Column(scale=1):
segm_img_p.render()
# Submit & Clear
with gr.Row():
with gr.Column():
with gr.Row():
add_or_remove = gr.Radio(["Add Mask", "Remove Area"], value="Add Mask", label="Point_label (foreground/background)")
with gr.Column():
segment_btn_p = gr.Button("Segment with points prompt", variant='primary')
clear_btn_p = gr.Button("Clear points", variant='secondary')
gr.Markdown("Try some of the examples below ⬇️")
gr.Examples(examples=examples,
inputs=[cond_img_p],
# outputs=segm_img_p,
# fn=segment_with_points,
# cache_examples=True,
examples_per_page=4)
with gr.Column():
# Description
gr.Markdown(description_p)
cond_img_p.select(get_points_with_draw, [cond_img_p, add_or_remove], cond_img_p)
segment_btn_p.click(segment_with_points,
inputs=[cond_img_p],
outputs=[segm_img_p, cond_img_p])
with gr.Tab("Text mode"):
# Images
with gr.Row(variant="panel"):
with gr.Column(scale=1):
cond_img_t.render()
with gr.Column(scale=1):
segm_img_t.render()
# Submit & Clear
with gr.Row():
with gr.Column():
input_size_slider_t = gr.components.Slider(minimum=512,
maximum=1024,
value=1024,
step=64,
label='Input_size',
info='Our model was trained on a size of 1024')
with gr.Row():
with gr.Column():
contour_check = gr.Checkbox(value=True, label='withContours', info='draw the edges of the masks')
text_box = gr.Textbox(label="text prompt", value="a black dog")
with gr.Column():
segment_btn_t = gr.Button("Segment with text", variant='primary')
clear_btn_t = gr.Button("Clear", variant="secondary")
gr.Markdown("Try some of the examples below ⬇️")
gr.Examples(examples=[["examples/dogs.jpg"]] + examples,
inputs=[cond_img_e],
# outputs=segm_img_e,
# fn=segment_everything,
# cache_examples=True,
examples_per_page=4)
with gr.Column():
with gr.Accordion("Advanced options", open=False):
iou_threshold = gr.Slider(0.1, 0.9, 0.7, step=0.1, label='iou', info='iou threshold for filtering the annotations')
conf_threshold = gr.Slider(0.1, 0.9, 0.25, step=0.05, label='conf', info='object confidence threshold')
with gr.Row():
mor_check = gr.Checkbox(value=False, label='better_visual_quality', info='better quality using morphologyEx')
retina_check = gr.Checkbox(value=True, label='use_retina', info='draw high-resolution segmentation masks')
wider_check = gr.Checkbox(value=False, label='wider', info='wider result')
# Description
gr.Markdown(description_e)
segment_btn_t.click(segment_everything,
inputs=[
cond_img_t,
input_size_slider_t,
iou_threshold,
conf_threshold,
mor_check,
contour_check,
retina_check,
text_box,
wider_check,
],
outputs=segm_img_t)
def clear():
return None, None
def clear_text():
return None, None, None
clear_btn_e.click(clear, outputs=[cond_img_e, segm_img_e])
clear_btn_p.click(clear, outputs=[cond_img_p, segm_img_p])
clear_btn_t.click(clear_text, outputs=[cond_img_p, segm_img_p, text_box])
demo.queue()
demo.launch()
================================================
FILE: cog.yaml
================================================
# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
# Thanks for chenxwh.
build:
# set to true if your model requires a GPU
gpu: true
cuda: "11.7"
system_packages:
- "libgl1-mesa-glx"
- "libglib2.0-0"
python_version: "3.8"
python_packages:
- "matplotlib==3.7.1"
- "opencv-python==4.7.0.72"
- "Pillow==9.5.0"
- "PyYAML==6.0"
- "requests==2.31.0"
- "scipy==1.10.1"
- "torch==2.0.1"
- "torchvision==0.15.2"
- "tqdm==4.65.0"
- "pandas==2.0.2"
- "seaborn==0.12.0"
- "ultralytics==8.0.121"
- git+https://github.com/openai/CLIP.git
predict: "predict.py:Predictor"
================================================
FILE: fastsam/__init__.py
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
from .model import FastSAM
from .predict import FastSAMPredictor
from .prompt import FastSAMPrompt
# from .val import FastSAMValidator
from .decoder import FastSAMDecoder
__all__ = 'FastSAMPredictor', 'FastSAM', 'FastSAMPrompt', 'FastSAMDecoder'
================================================
FILE: fastsam/decoder.py
================================================
from .model import FastSAM
import numpy as np
from PIL import Image
from typing import Optional, List, Tuple, Union
class FastSAMDecoder:
def __init__(
self,
model: FastSAM,
device: str='cpu',
conf: float=0.4,
iou: float=0.9,
imgsz: int=1024,
retina_masks: bool=True,
):
self.model = model
self.device = device
self.retina_masks = retina_masks
self.imgsz = imgsz
self.conf = conf
self.iou = iou
self.image = None
self.image_embedding = None
def run_encoder(self, image):
if isinstance(image,str):
image = np.array(Image.open(image))
self.image = image
image_embedding = self.model(
self.image,
device=self.device,
retina_masks=self.retina_masks,
imgsz=self.imgsz,
conf=self.conf,
iou=self.iou
)
return image_embedding[0].numpy()
def run_decoder(
self,
image_embedding,
point_prompt: Optional[np.ndarray]=None,
point_label: Optional[np.ndarray]=None,
box_prompt: Optional[np.ndarray]=None,
text_prompt: Optional[str]=None,
)->np.ndarray:
self.image_embedding = image_embedding
if point_prompt is not None:
ann = self.point_prompt(points=point_prompt, pointlabel=point_label)
return ann
elif box_prompt is not None:
ann = self.box_prompt(bbox=box_prompt)
return ann
elif text_prompt is not None:
ann = self.text_prompt(text=text_prompt)
return ann
else:
return None
def box_prompt(self, bbox):
assert (bbox[2] != 0 and bbox[3] != 0)
masks = self.image_embedding.masks.data
target_height = self.image.shape[0]
target_width = self.image.shape[1]
h = masks.shape[1]
w = masks.shape[2]
if h != target_height or w != target_width:
bbox = [
int(bbox[0] * w / target_width),
int(bbox[1] * h / target_height),
int(bbox[2] * w / target_width),
int(bbox[3] * h / target_height), ]
bbox[0] = round(bbox[0]) if round(bbox[0]) > 0 else 0
bbox[1] = round(bbox[1]) if round(bbox[1]) > 0 else 0
bbox[2] = round(bbox[2]) if round(bbox[2]) < w else w
bbox[3] = round(bbox[3]) if round(bbox[3]) < h else h
# IoUs = torch.zeros(len(masks), dtype=torch.float32)
bbox_area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
masks_area = np.sum(masks[:, bbox[1]:bbox[3], bbox[0]:bbox[2]], axis=(1, 2))
orig_masks_area = np.sum(masks, axis=(1, 2))
union = bbox_area + orig_masks_area - masks_area
IoUs = masks_area / union
max_iou_index = np.argmax(IoUs)
return np.array([masks[max_iou_index].cpu().numpy()])
def point_prompt(self, points, pointlabel): # numpy
masks = self._format_results(self.image_embedding[0], 0)
target_height = self.image.shape[0]
target_width = self.image.shape[1]
h = masks[0]['segmentation'].shape[0]
w = masks[0]['segmentation'].shape[1]
if h != target_height or w != target_width:
points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points]
onemask = np.zeros((h, w))
masks = sorted(masks, key=lambda x: x['area'], reverse=True)
for i, annotation in enumerate(masks):
if type(annotation) == dict:
mask = annotation['segmentation']
else:
mask = annotation
for i, point in enumerate(points):
if mask[point[1], point[0]] == 1 and pointlabel[i] == 1:
onemask[mask] = 1
if mask[point[1], point[0]] == 1 and pointlabel[i] == 0:
onemask[mask] = 0
onemask = onemask >= 1
return np.array([onemask])
def _format_results(self, result, filter=0):
annotations = []
n = len(result.masks.data)
for i in range(n):
annotation = {}
mask = result.masks.data[i] == 1.0
if np.sum(mask) < filter:
continue
annotation['id'] = i
annotation['segmentation'] = mask
annotation['bbox'] = result.boxes.data[i]
annotation['score'] = result.boxes.conf[i]
annotation['area'] = annotation['segmentation'].sum()
annotations.append(annotation)
return annotations
================================================
FILE: fastsam/model.py
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
FastSAM model interface.
Usage - Predict:
from ultralytics import FastSAM
model = FastSAM('last.pt')
results = model.predict('ultralytics/assets/bus.jpg')
"""
from ultralytics.yolo.cfg import get_cfg
from ultralytics.yolo.engine.exporter import Exporter
from ultralytics.yolo.engine.model import YOLO
from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, ROOT, is_git_dir
from ultralytics.yolo.utils.checks import check_imgsz
from ultralytics.yolo.utils.torch_utils import model_info, smart_inference_mode
from .predict import FastSAMPredictor
class FastSAM(YOLO):
@smart_inference_mode()
def predict(self, source=None, stream=False, **kwargs):
"""
Perform prediction using the YOLO model.
Args:
source (str | int | PIL | np.ndarray): The source of the image to make predictions on.
Accepts all source types accepted by the YOLO model.
stream (bool): Whether to stream the predictions or not. Defaults to False.
**kwargs : Additional keyword arguments passed to the predictor.
Check the 'configuration' section in the documentation for all available options.
Returns:
(List[ultralytics.yolo.engine.results.Results]): The prediction results.
"""
if source is None:
source = ROOT / 'assets' if is_git_dir() else 'https://ultralytics.com/images/bus.jpg'
LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using 'source={source}'.")
overrides = self.overrides.copy()
overrides['conf'] = 0.25
overrides.update(kwargs) # prefer kwargs
overrides['mode'] = kwargs.get('mode', 'predict')
assert overrides['mode'] in ['track', 'predict']
overrides['save'] = kwargs.get('save', False) # do not save by default if called in Python
self.predictor = FastSAMPredictor(overrides=overrides)
self.predictor.setup_model(model=self.model, verbose=False)
try:
return self.predictor(source, stream=stream)
except Exception as e:
return None
def train(self, **kwargs):
"""Function trains models but raises an error as FastSAM models do not support training."""
raise NotImplementedError("Currently, the training codes are on the way.")
def val(self, **kwargs):
"""Run validation given dataset."""
overrides = dict(task='segment', mode='val')
overrides.update(kwargs) # prefer kwargs
args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
args.imgsz = check_imgsz(args.imgsz, max_dim=1)
validator = FastSAM(args=args)
validator(model=self.model)
self.metrics = validator.metrics
return validator.metrics
@smart_inference_mode()
def export(self, **kwargs):
"""
Export model.
Args:
**kwargs : Any other args accepted by the predictors. To see all args check 'configuration' section in docs
"""
overrides = dict(task='detect')
overrides.update(kwargs)
overrides['mode'] = 'export'
args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
args.task = self.task
if args.imgsz == DEFAULT_CFG.imgsz:
args.imgsz = self.model.args['imgsz'] # use trained imgsz unless custom value is passed
if args.batch == DEFAULT_CFG.batch:
args.batch = 1 # default to 1 if not modified
return Exporter(overrides=args)(model=self.model)
def info(self, detailed=False, verbose=True):
"""
Logs model info.
Args:
detailed (bool): Show detailed information about model.
verbose (bool): Controls verbosity.
"""
return model_info(self.model, detailed=detailed, verbose=verbose, imgsz=640)
def __call__(self, source=None, stream=False, **kwargs):
"""Calls the 'predict' function with given arguments to perform object detection."""
return self.predict(source, stream, **kwargs)
def __getattr__(self, attr):
"""Raises error if object has no requested attribute."""
name = self.__class__.__name__
raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
================================================
FILE: fastsam/predict.py
================================================
import torch
from ultralytics.yolo.engine.results import Results
from ultralytics.yolo.utils import DEFAULT_CFG, ops
from ultralytics.yolo.v8.detect.predict import DetectionPredictor
from .utils import bbox_iou
class FastSAMPredictor(DetectionPredictor):
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
super().__init__(cfg, overrides, _callbacks)
self.args.task = 'segment'
def postprocess(self, preds, img, orig_imgs):
"""TODO: filter by classes."""
p = ops.non_max_suppression(preds[0],
self.args.conf,
self.args.iou,
agnostic=self.args.agnostic_nms,
max_det=self.args.max_det,
nc=len(self.model.names),
classes=self.args.classes)
results = []
if len(p) == 0 or len(p[0]) == 0:
print("No object detected.")
return results
full_box = torch.zeros_like(p[0][0])
full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0
full_box = full_box.view(1, -1)
critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:])
if critical_iou_index.numel() != 0:
full_box[0][4] = p[0][critical_iou_index][:,4]
full_box[0][6:] = p[0][critical_iou_index][:,6:]
p[0][critical_iou_index] = full_box
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
for i, pred in enumerate(p):
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
path = self.batch[0]
img_path = path[i] if isinstance(path, list) else path
if not len(pred): # save empty boxes
results.append(Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6]))
continue
if self.args.retina_masks:
if not isinstance(orig_imgs, torch.Tensor):
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
else:
masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
if not isinstance(orig_imgs, torch.Tensor):
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
results.append(
Results(orig_img=orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks))
return results
================================================
FILE: fastsam/prompt.py
================================================
import os
import sys
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
from .utils import image_to_np_ndarray
from PIL import Image
class FastSAMPrompt:
def __init__(self, image, results, device='cuda'):
if isinstance(image, str) or isinstance(image, Image.Image):
image = image_to_np_ndarray(image)
self.device = device
self.results = results
self.img = image
def _segment_image(self, image, bbox):
if isinstance(image, Image.Image):
image_array = np.array(image)
else:
image_array = image
segmented_image_array = np.zeros_like(image_array)
x1, y1, x2, y2 = bbox
segmented_image_array[y1:y2, x1:x2] = image_array[y1:y2, x1:x2]
segmented_image = Image.fromarray(segmented_image_array)
black_image = Image.new('RGB', image.size, (255, 255, 255))
# transparency_mask = np.zeros_like((), dtype=np.uint8)
transparency_mask = np.zeros((image_array.shape[0], image_array.shape[1]), dtype=np.uint8)
transparency_mask[y1:y2, x1:x2] = 255
transparency_mask_image = Image.fromarray(transparency_mask, mode='L')
black_image.paste(segmented_image, mask=transparency_mask_image)
return black_image
def _format_results(self, result, filter=0):
annotations = []
n = len(result.masks.data)
for i in range(n):
annotation = {}
mask = result.masks.data[i] == 1.0
if torch.sum(mask) < filter:
continue
annotation['id'] = i
annotation['segmentation'] = mask.cpu().numpy()
annotation['bbox'] = result.boxes.data[i]
annotation['score'] = result.boxes.conf[i]
annotation['area'] = annotation['segmentation'].sum()
annotations.append(annotation)
return annotations
def filter_masks(annotations): # filte the overlap mask
annotations.sort(key=lambda x: x['area'], reverse=True)
to_remove = set()
for i in range(0, len(annotations)):
a = annotations[i]
for j in range(i + 1, len(annotations)):
b = annotations[j]
if i != j and j not in to_remove:
# check if
if b['area'] < a['area']:
if (a['segmentation'] & b['segmentation']).sum() / b['segmentation'].sum() > 0.8:
to_remove.add(j)
return [a for i, a in enumerate(annotations) if i not in to_remove], to_remove
def _get_bbox_from_mask(self, mask):
mask = mask.astype(np.uint8)
contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
x1, y1, w, h = cv2.boundingRect(contours[0])
x2, y2 = x1 + w, y1 + h
if len(contours) > 1:
for b in contours:
x_t, y_t, w_t, h_t = cv2.boundingRect(b)
# Merge multiple bounding boxes into one.
x1 = min(x1, x_t)
y1 = min(y1, y_t)
x2 = max(x2, x_t + w_t)
y2 = max(y2, y_t + h_t)
h = y2 - y1
w = x2 - x1
return [x1, y1, x2, y2]
def plot_to_result(self,
annotations,
bboxes=None,
points=None,
point_label=None,
mask_random_color=True,
better_quality=True,
retina=False,
withContours=True) -> np.ndarray:
if isinstance(annotations[0], dict):
annotations = [annotation['segmentation'] for annotation in annotations]
image = self.img
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
original_h = image.shape[0]
original_w = image.shape[1]
if sys.platform == "darwin":
plt.switch_backend("TkAgg")
plt.figure(figsize=(original_w / 100, original_h / 100))
# Add subplot with no margin.
plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
plt.margins(0, 0)
plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator())
plt.imshow(image)
if better_quality:
if isinstance(annotations[0], torch.Tensor):
annotations = np.array(annotations.cpu())
for i, mask in enumerate(annotations):
mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
annotations[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8))
if self.device == 'cpu':
annotations = np.array(annotations)
self.fast_show_mask(
annotations,
plt.gca(),
random_color=mask_random_color,
bboxes=bboxes,
points=points,
pointlabel=point_label,
retinamask=retina,
target_height=original_h,
target_width=original_w,
)
else:
if isinstance(annotations[0], np.ndarray):
annotations = torch.from_numpy(annotations)
self.fast_show_mask_gpu(
annotations,
plt.gca(),
random_color=mask_random_color,
bboxes=bboxes,
points=points,
pointlabel=point_label,
retinamask=retina,
target_height=original_h,
target_width=original_w,
)
if isinstance(annotations, torch.Tensor):
annotations = annotations.cpu().numpy()
if withContours:
contour_all = []
temp = np.zeros((original_h, original_w, 1))
for i, mask in enumerate(annotations):
if type(mask) == dict:
mask = mask['segmentation']
annotation = mask.astype(np.uint8)
if not retina:
annotation = cv2.resize(
annotation,
(original_w, original_h),
interpolation=cv2.INTER_NEAREST,
)
contours, hierarchy = cv2.findContours(annotation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
contour_all.append(contour)
cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2)
color = np.array([0 / 255, 0 / 255, 255 / 255, 0.8])
contour_mask = temp / 255 * color.reshape(1, 1, -1)
plt.imshow(contour_mask)
plt.axis('off')
fig = plt.gcf()
plt.draw()
try:
buf = fig.canvas.tostring_rgb()
except AttributeError:
fig.canvas.draw()
buf = fig.canvas.tostring_rgb()
cols, rows = fig.canvas.get_width_height()
img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3)
result = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
plt.close()
return result
# Remark for refactoring: IMO a function should do one thing only, storing the image and plotting should be seperated and do not necessarily need to be class functions but standalone utility functions that the user can chain in his scripts to have more fine-grained control.
def plot(self,
annotations,
output_path,
bboxes=None,
points=None,
point_label=None,
mask_random_color=True,
better_quality=True,
retina=False,
withContours=True):
if len(annotations) == 0:
return None
result = self.plot_to_result(
annotations,
bboxes,
points,
point_label,
mask_random_color,
better_quality,
retina,
withContours,
)
path = os.path.dirname(os.path.abspath(output_path))
if not os.path.exists(path):
os.makedirs(path)
result = result[:, :, ::-1]
cv2.imwrite(output_path, result)
# CPU post process
def fast_show_mask(
self,
annotation,
ax,
random_color=False,
bboxes=None,
points=None,
pointlabel=None,
retinamask=True,
target_height=960,
target_width=960,
):
msak_sum = annotation.shape[0]
height = annotation.shape[1]
weight = annotation.shape[2]
#Sort annotations based on area.
areas = np.sum(annotation, axis=(1, 2))
sorted_indices = np.argsort(areas)
annotation = annotation[sorted_indices]
index = (annotation != 0).argmax(axis=0)
if random_color:
color = np.random.random((msak_sum, 1, 1, 3))
else:
color = np.ones((msak_sum, 1, 1, 3)) * np.array([30 / 255, 144 / 255, 255 / 255])
transparency = np.ones((msak_sum, 1, 1, 1)) * 0.6
visual = np.concatenate([color, transparency], axis=-1)
mask_image = np.expand_dims(annotation, -1) * visual
show = np.zeros((height, weight, 4))
h_indices, w_indices = np.meshgrid(np.arange(height), np.arange(weight), indexing='ij')
indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
# Use vectorized indexing to update the values of 'show'.
show[h_indices, w_indices, :] = mask_image[indices]
if bboxes is not None:
for bbox in bboxes:
x1, y1, x2, y2 = bbox
ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
# draw point
if points is not None:
plt.scatter(
[point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
[point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
s=20,
c='y',
)
plt.scatter(
[point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
[point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
s=20,
c='m',
)
if not retinamask:
show = cv2.resize(show, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
ax.imshow(show)
def fast_show_mask_gpu(
self,
annotation,
ax,
random_color=False,
bboxes=None,
points=None,
pointlabel=None,
retinamask=True,
target_height=960,
target_width=960,
):
msak_sum = annotation.shape[0]
height = annotation.shape[1]
weight = annotation.shape[2]
areas = torch.sum(annotation, dim=(1, 2))
sorted_indices = torch.argsort(areas, descending=False)
annotation = annotation[sorted_indices]
# Find the index of the first non-zero value at each position.
index = (annotation != 0).to(torch.long).argmax(dim=0)
if random_color:
color = torch.rand((msak_sum, 1, 1, 3)).to(annotation.device)
else:
color = torch.ones((msak_sum, 1, 1, 3)).to(annotation.device) * torch.tensor([
30 / 255, 144 / 255, 255 / 255]).to(annotation.device)
transparency = torch.ones((msak_sum, 1, 1, 1)).to(annotation.device) * 0.6
visual = torch.cat([color, transparency], dim=-1)
mask_image = torch.unsqueeze(annotation, -1) * visual
# Select data according to the index. The index indicates which batch's data to choose at each position, converting the mask_image into a single batch form.
show = torch.zeros((height, weight, 4)).to(annotation.device)
try:
h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight), indexing='ij')
except:
h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight))
indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
# Use vectorized indexing to update the values of 'show'.
show[h_indices, w_indices, :] = mask_image[indices]
show_cpu = show.cpu().numpy()
if bboxes is not None:
for bbox in bboxes:
x1, y1, x2, y2 = bbox
ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
# draw point
if points is not None:
plt.scatter(
[point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
[point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
s=20,
c='y',
)
plt.scatter(
[point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
[point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
s=20,
c='m',
)
if not retinamask:
show_cpu = cv2.resize(show_cpu, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
ax.imshow(show_cpu)
# clip
@torch.no_grad()
def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
preprocessed_images = [preprocess(image).to(device) for image in elements]
try:
import clip # for linear_assignment
except (ImportError, AssertionError, AttributeError):
from ultralytics.yolo.utils.checks import check_requirements
check_requirements('git+https://github.com/openai/CLIP.git') # required before installing lap from source
import clip
tokenized_text = clip.tokenize([search_text]).to(device)
stacked_images = torch.stack(preprocessed_images)
image_features = model.encode_image(stacked_images)
text_features = model.encode_text(tokenized_text)
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
probs = 100.0 * image_features @ text_features.T
return probs[:, 0].softmax(dim=0)
def _crop_image(self, format_results):
image = Image.fromarray(cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB))
ori_w, ori_h = image.size
annotations = format_results
mask_h, mask_w = annotations[0]['segmentation'].shape
if ori_w != mask_w or ori_h != mask_h:
image = image.resize((mask_w, mask_h))
cropped_boxes = []
cropped_images = []
not_crop = []
filter_id = []
# annotations, _ = filter_masks(annotations)
# filter_id = list(_)
for _, mask in enumerate(annotations):
if np.sum(mask['segmentation']) <= 100:
filter_id.append(_)
continue
bbox = self._get_bbox_from_mask(mask['segmentation']) # mask 的 bbox
cropped_boxes.append(self._segment_image(image, bbox))
# cropped_boxes.append(segment_image(image,mask["segmentation"]))
cropped_images.append(bbox) # Save the bounding box of the cropped image.
return cropped_boxes, cropped_images, not_crop, filter_id, annotations
def box_prompt(self, bbox=None, bboxes=None):
if self.results == None:
return []
assert bbox or bboxes
if bboxes is None:
bboxes = [bbox]
max_iou_index = []
for bbox in bboxes:
assert (bbox[2] != 0 and bbox[3] != 0)
masks = self.results[0].masks.data
target_height = self.img.shape[0]
target_width = self.img.shape[1]
h = masks.shape[1]
w = masks.shape[2]
if h != target_height or w != target_width:
bbox = [
int(bbox[0] * w / target_width),
int(bbox[1] * h / target_height),
int(bbox[2] * w / target_width),
int(bbox[3] * h / target_height), ]
bbox[0] = round(bbox[0]) if round(bbox[0]) > 0 else 0
bbox[1] = round(bbox[1]) if round(bbox[1]) > 0 else 0
bbox[2] = round(bbox[2]) if round(bbox[2]) < w else w
bbox[3] = round(bbox[3]) if round(bbox[3]) < h else h
# IoUs = torch.zeros(len(masks), dtype=torch.float32)
bbox_area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
masks_area = torch.sum(masks[:, bbox[1]:bbox[3], bbox[0]:bbox[2]], dim=(1, 2))
orig_masks_area = torch.sum(masks, dim=(1, 2))
union = bbox_area + orig_masks_area - masks_area
IoUs = masks_area / union
max_iou_index.append(int(torch.argmax(IoUs)))
max_iou_index = list(set(max_iou_index))
return np.array(masks[max_iou_index].cpu().numpy())
def point_prompt(self, points, pointlabel): # numpy
if self.results == None:
return []
masks = self._format_results(self.results[0], 0)
target_height = self.img.shape[0]
target_width = self.img.shape[1]
h = masks[0]['segmentation'].shape[0]
w = masks[0]['segmentation'].shape[1]
if h != target_height or w != target_width:
points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points]
onemask = np.zeros((h, w))
masks = sorted(masks, key=lambda x: x['area'], reverse=True)
for i, annotation in enumerate(masks):
if type(annotation) == dict:
mask = annotation['segmentation']
else:
mask = annotation
for i, point in enumerate(points):
if mask[point[1], point[0]] == 1 and pointlabel[i] == 1:
onemask[mask] = 1
if mask[point[1], point[0]] == 1 and pointlabel[i] == 0:
onemask[mask] = 0
onemask = onemask >= 1
return np.array([onemask])
def text_prompt(self, text):
if self.results == None:
return []
format_results = self._format_results(self.results[0], 0)
cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
clip_model, preprocess = clip.load('ViT-B/32', device=self.device)
scores = self.retrieve(clip_model, preprocess, cropped_boxes, text, device=self.device)
max_idx = scores.argsort()
max_idx = max_idx[-1]
max_idx += sum(np.array(filter_id) <= int(max_idx))
return np.array([annotations[max_idx]['segmentation']])
def everything_prompt(self):
if self.results == None:
return []
return self.results[0].masks.data
================================================
FILE: fastsam/utils.py
================================================
import numpy as np
import torch
from PIL import Image
def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
'''Adjust bounding boxes to stick to image border if they are within a certain threshold.
Args:
boxes: (n, 4)
image_shape: (height, width)
threshold: pixel threshold
Returns:
adjusted_boxes: adjusted bounding boxes
'''
# Image dimensions
h, w = image_shape
# Adjust boxes
boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
return boxes
def convert_box_xywh_to_xyxy(box):
x1 = box[0]
y1 = box[1]
x2 = box[0] + box[2]
y2 = box[1] + box[3]
return [x1, y1, x2, y2]
def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
'''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
Args:
box1: (4, )
boxes: (n, 4)
Returns:
high_iou_indices: Indices of boxes with IoU > thres
'''
boxes = adjust_bboxes_to_image_border(boxes, image_shape)
# obtain coordinates for intersections
x1 = torch.max(box1[0], boxes[:, 0])
y1 = torch.max(box1[1], boxes[:, 1])
x2 = torch.min(box1[2], boxes[:, 2])
y2 = torch.min(box1[3], boxes[:, 3])
# compute the area of intersection
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
# compute the area of both individual boxes
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
# compute the area of union
union = box1_area + box2_area - intersection
# compute the IoU
iou = intersection / union # Should be shape (n, )
if raw_output:
if iou.numel() == 0:
return 0
return iou
# get indices of boxes with IoU > thres
high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
return high_iou_indices
def image_to_np_ndarray(image):
if type(image) is str:
return np.array(Image.open(image))
elif issubclass(type(image), Image.Image):
return np.array(image)
elif type(image) is np.ndarray:
return image
return None
================================================
FILE: predict.py
================================================
# Prediction interface for Cog ⚙️
# https://github.com/replicate/cog/blob/main/docs/python.md
# Thanks for chenxwh.
import argparse
import cv2
import shutil
import ast
from cog import BasePredictor, Input, Path
from ultralytics import YOLO
from utils.tools import *
class Predictor(BasePredictor):
def setup(self):
"""Load the model into memory to make running multiple predictions efficient"""
self.models = {k: YOLO(f"{k}.pt") for k in ["FastSAM-s", "FastSAM-x"]}
def predict(
self,
input_image: Path = Input(description="Input image"),
model_name: str = Input(
description="choose a model",
choices=["FastSAM-x", "FastSAM-s"],
default="FastSAM-x",
),
iou: float = Input(
description="iou threshold for filtering the annotations", default=0.7
),
text_prompt: str = Input(
description='use text prompt eg: "a black dog"', default=None
),
conf: float = Input(description="object confidence threshold", default=0.25),
retina: bool = Input(
description="draw high-resolution segmentation masks", default=True
),
box_prompt: str = Input(default="[0,0,0,0]", description="[x,y,w,h]"),
point_prompt: str = Input(default="[[0,0]]", description="[[x1,y1],[x2,y2]]"),
point_label: str = Input(default="[0]", description="[1,0] 0:background, 1:foreground"),
withContours: bool = Input(
description="draw the edges of the masks", default=False
),
better_quality: bool = Input(
description="better quality using morphologyEx", default=False
),
) -> Path:
"""Run a single prediction on the model"""
# default params
out_path = "output"
if os.path.exists(out_path):
shutil.rmtree(out_path)
os.makedirs(out_path, exist_ok=True)
device = torch.device(
"cuda"
if torch.cuda.is_available()
else "mps"
if torch.backends.mps.is_available()
else "cpu"
)
args = argparse.Namespace(
better_quality=better_quality,
box_prompt=box_prompt,
conf=conf,
device=device,
img_path=str(input_image),
imgsz=1024,
iou=iou,
model_path="FastSAM-x.pt",
output=out_path,
point_label=point_label,
point_prompt=point_prompt,
randomcolor=True,
retina=retina,
text_prompt=text_prompt,
withContours=withContours,
)
args.point_prompt = ast.literal_eval(args.point_prompt)
args.box_prompt = ast.literal_eval(args.box_prompt)
args.point_label = ast.literal_eval(args.point_label)
model = self.models[model_name]
results = model(
str(input_image),
imgsz=args.imgsz,
device=args.device,
retina_masks=args.retina,
iou=args.iou,
conf=args.conf,
max_det=100,
)
if args.box_prompt[2] != 0 and args.box_prompt[3] != 0:
annotations = prompt(results, args, box=True)
annotations = np.array([annotations])
fast_process(
annotations=annotations,
args=args,
mask_random_color=args.randomcolor,
bbox=convert_box_xywh_to_xyxy(args.box_prompt),
)
elif args.text_prompt != None:
results = format_results(results[0], 0)
annotations = prompt(results, args, text=True)
annotations = np.array([annotations])
fast_process(
annotations=annotations, args=args, mask_random_color=args.randomcolor
)
elif args.point_prompt[0] != [0, 0]:
results = format_results(results[0], 0)
annotations = prompt(results, args, point=True)
# list to numpy
annotations = np.array([annotations])
fast_process(
annotations=annotations,
args=args,
mask_random_color=args.randomcolor,
points=args.point_prompt,
)
else:
fast_process(
annotations=results[0].masks.data,
args=args,
mask_random_color=args.randomcolor,
)
out = "/tmp.out.png"
shutil.copy(os.path.join(out_path, os.listdir(out_path)[0]), out)
return Path(out)
def prompt(results, args, box=None, point=None, text=None):
ori_img = cv2.imread(args.img_path)
ori_h = ori_img.shape[0]
ori_w = ori_img.shape[1]
if box:
mask, idx = box_prompt(
results[0].masks.data,
convert_box_xywh_to_xyxy(args.box_prompt),
ori_h,
ori_w,
)
elif point:
mask, idx = point_prompt(
results, args.point_prompt, args.point_label, ori_h, ori_w
)
elif text:
mask, idx = text_prompt(results, args.text_prompt, args.img_path, args.device)
else:
return None
return mask
================================================
FILE: requirements.txt
================================================
# Base-----------------------------------
matplotlib>=3.2.2
opencv-python>=4.6.0
Pillow>=7.1.2
PyYAML>=5.3.1
requests>=2.23.0
scipy>=1.4.1
torch>=1.7.0
torchvision>=0.8.1
tqdm>=4.64.0
pandas>=1.1.4
seaborn>=0.11.0
gradio==3.35.2
# Ultralytics-----------------------------------
# ultralytics == 8.0.120
================================================
FILE: segpredict.py
================================================
from fastsam import FastSAM, FastSAMPrompt
import torch
model = FastSAM('FastSAM.pt')
IMAGE_PATH = './images/dogs.jpg'
DEVICE = torch.device(
"cuda"
if torch.cuda.is_available()
else "mps"
if torch.backends.mps.is_available()
else "cpu"
)
everything_results = model(
IMAGE_PATH,
device=DEVICE,
retina_masks=True,
imgsz=1024,
conf=0.4,
iou=0.9,
)
prompt_process = FastSAMPrompt(IMAGE_PATH, everything_results, device=DEVICE)
# # everything prompt
ann = prompt_process.everything_prompt()
# # bbox prompt
# # bbox default shape [0,0,0,0] -> [x1,y1,x2,y2]
# bboxes default shape [[0,0,0,0]] -> [[x1,y1,x2,y2]]
# ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300])
# ann = prompt_process.box_prompt(bboxes=[[200, 200, 300, 300], [500, 500, 600, 600]])
# # text prompt
# ann = prompt_process.text_prompt(text='a photo of a dog')
# # point prompt
# # points default [[0,0]] [[x1,y1],[x2,y2]]
# # point_label default [0] [1,0] 0:background, 1:foreground
# ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
# point prompt
# points default [[0,0]] [[x1,y1],[x2,y2]]
# point_label default [0] [1,0] 0:background, 1:foreground
ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
prompt_process.plot(
annotations=ann,
output='./output/',
mask_random_color=True,
better_quality=True,
retina=False,
withContours=True,
)
================================================
FILE: setup.py
================================================
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
from setuptools import find_packages, setup
REQUIREMENTS = [i.strip() for i in open("requirements.txt").readlines()]
REQUIREMENTS += [
"CLIP @ git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33#egg=CLIP"
]
setup(
name="fastsam",
version="0.1.1",
install_requires=REQUIREMENTS,
packages=["fastsam", "fastsam_tools"],
package_dir= {
"fastsam": "fastsam",
"fastsam_tools": "utils",
},
url="https://github.com/CASIA-IVA-Lab/FastSAM"
)
================================================
FILE: ultralytics/.pre-commit-config.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Pre-commit hooks. For more information see https://github.com/pre-commit/pre-commit-hooks/blob/main/README.md
exclude: 'docs/'
# Define bot property if installed via https://github.com/marketplace/pre-commit-ci
ci:
autofix_prs: true
autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
autoupdate_schedule: monthly
# submodules: true
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- id: check-case-conflict
# - id: check-yaml
- id: check-docstring-first
- id: double-quote-string-fixer
- id: detect-private-key
- repo: https://github.com/asottile/pyupgrade
rev: v3.4.0
hooks:
- id: pyupgrade
name: Upgrade code
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
name: Sort imports
- repo: https://github.com/google/yapf
rev: v0.33.0
hooks:
- id: yapf
name: YAPF formatting
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.16
hooks:
- id: mdformat
name: MD formatting
additional_dependencies:
- mdformat-gfm
- mdformat-black
# exclude: "README.md|README.zh-CN.md|CONTRIBUTING.md"
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
hooks:
- id: flake8
name: PEP8
- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
hooks:
- id: codespell
args:
- --ignore-words-list=crate,nd,strack,dota
# - repo: https://github.com/asottile/yesqa
# rev: v1.4.0
# hooks:
# - id: yesqa
# - repo: https://github.com/asottile/dead
# rev: v1.5.0
# hooks:
# - id: dead
================================================
FILE: ultralytics/__init__.py
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = '8.0.120'
from ultralytics.hub import start
from ultralytics.vit.rtdetr import RTDETR
from ultralytics.vit.sam import SAM
from ultralytics.yolo.engine.model import YOLO
from ultralytics.yolo.nas import NAS
from ultralytics.yolo.utils.checks import check_yolo as checks
__all__ = '__version__', 'YOLO', 'NAS', 'SAM', 'RTDETR', 'checks', 'start' # allow simpler import
================================================
FILE: ultralytics/datasets/Argoverse.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
# Example usage: yolo train data=Argoverse.yaml
# parent
# ├── ultralytics
# └── datasets
# └── Argoverse ← downloads here (31.3 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/Argoverse # dataset root dir
train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
# Classes
names:
0: person
1: bicycle
2: car
3: motorcycle
4: bus
5: truck
6: traffic_light
7: stop_sign
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import json
from tqdm import tqdm
from ultralytics.yolo.utils.downloads import download
from pathlib import Path
def argoverse2yolo(set):
labels = {}
a = json.load(open(set, "rb"))
for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
img_id = annot['image_id']
img_name = a['images'][img_id]['name']
img_label_name = f'{img_name[:-3]}txt'
cls = annot['category_id'] # instance class id
x_center, y_center, width, height = annot['bbox']
x_center = (x_center + width / 2) / 1920.0 # offset and scale
y_center = (y_center + height / 2) / 1200.0 # offset and scale
width /= 1920.0 # scale
height /= 1200.0 # scale
img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
if not img_dir.exists():
img_dir.mkdir(parents=True, exist_ok=True)
k = str(img_dir / img_label_name)
if k not in labels:
labels[k] = []
labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
for k in labels:
with open(k, "w") as f:
f.writelines(labels[k])
# Download
dir = Path(yaml['path']) # dataset root dir
urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
download(urls, dir=dir)
# Convert
annotations_dir = 'Argoverse-HD/annotations/'
(dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images'
for d in "train.json", "val.json":
argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels
================================================
FILE: ultralytics/datasets/GlobalWheat2020.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan
# Example usage: yolo train data=GlobalWheat2020.yaml
# parent
# ├── ultralytics
# └── datasets
# └── GlobalWheat2020 ← downloads here (7.0 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/GlobalWheat2020 # dataset root dir
train: # train images (relative to 'path') 3422 images
- images/arvalis_1
- images/arvalis_2
- images/arvalis_3
- images/ethz_1
- images/rres_1
- images/inrae_1
- images/usask_1
val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
- images/ethz_1
test: # test images (optional) 1276 images
- images/utokyo_1
- images/utokyo_2
- images/nau_1
- images/uq_1
# Classes
names:
0: wheat_head
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
from ultralytics.yolo.utils.downloads import download
from pathlib import Path
# Download
dir = Path(yaml['path']) # dataset root dir
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
download(urls, dir=dir)
# Make Directories
for p in 'annotations', 'images', 'labels':
(dir / p).mkdir(parents=True, exist_ok=True)
# Move
for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
(dir / 'global-wheat-codalab-official' / p).rename(dir / 'images' / p) # move to /images
f = (dir / 'global-wheat-codalab-official' / p).with_suffix('.json') # json file
if f.exists():
f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations
================================================
FILE: ultralytics/datasets/ImageNet.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
# Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
# Example usage: yolo train task=classify data=imagenet
# parent
# ├── ultralytics
# └── datasets
# └── imagenet ← downloads here (144 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/imagenet # dataset root dir
train: train # train images (relative to 'path') 1281167 images
val: val # val images (relative to 'path') 50000 images
test: # test images (optional)
# Classes
names:
0: tench
1: goldfish
2: great white shark
3: tiger shark
4: hammerhead shark
5: electric ray
6: stingray
7: cock
8: hen
9: ostrich
10: brambling
11: goldfinch
12: house finch
13: junco
14: indigo bunting
15: American robin
16: bulbul
17: jay
18: magpie
19: chickadee
20: American dipper
21: kite
22: bald eagle
23: vulture
24: great grey owl
25: fire salamander
26: smooth newt
27: newt
28: spotted salamander
29: axolotl
30: American bullfrog
31: tree frog
32: tailed frog
33: loggerhead sea turtle
34: leatherback sea turtle
35: mud turtle
36: terrapin
37: box turtle
38: banded gecko
39: green iguana
40: Carolina anole
41: desert grassland whiptail lizard
42: agama
43: frilled-necked lizard
44: alligator lizard
45: Gila monster
46: European green lizard
47: chameleon
48: Komodo dragon
49: Nile crocodile
50: American alligator
51: triceratops
52: worm snake
53: ring-necked snake
54: eastern hog-nosed snake
55: smooth green snake
56: kingsnake
57: garter snake
58: water snake
59: vine snake
60: night snake
61: boa constrictor
62: African rock python
63: Indian cobra
64: green mamba
65: sea snake
66: Saharan horned viper
67: eastern diamondback rattlesnake
68: sidewinder
69: trilobite
70: harvestman
71: scorpion
72: yellow garden spider
73: barn spider
74: European garden spider
75: southern black widow
76: tarantula
77: wolf spider
78: tick
79: centipede
80: black grouse
81: ptarmigan
82: ruffed grouse
83: prairie grouse
84: peacock
85: quail
86: partridge
87: grey parrot
88: macaw
89: sulphur-crested cockatoo
90: lorikeet
91: coucal
92: bee eater
93: hornbill
94: hummingbird
95: jacamar
96: toucan
97: duck
98: red-breasted merganser
99: goose
100: black swan
101: tusker
102: echidna
103: platypus
104: wallaby
105: koala
106: wombat
107: jellyfish
108: sea anemone
109: brain coral
110: flatworm
111: nematode
112: conch
113: snail
114: slug
115: sea slug
116: chiton
117: chambered nautilus
118: Dungeness crab
119: rock crab
120: fiddler crab
121: red king crab
122: American lobster
123: spiny lobster
124: crayfish
125: hermit crab
126: isopod
127: white stork
128: black stork
129: spoonbill
130: flamingo
131: little blue heron
132: great egret
133: bittern
134: crane (bird)
135: limpkin
136: common gallinule
137: American coot
138: bustard
139: ruddy turnstone
140: dunlin
141: common redshank
142: dowitcher
143: oystercatcher
144: pelican
145: king penguin
146: albatross
147: grey whale
148: killer whale
149: dugong
150: sea lion
151: Chihuahua
152: Japanese Chin
153: Maltese
154: Pekingese
155: Shih Tzu
156: King Charles Spaniel
157: Papillon
158: toy terrier
159: Rhodesian Ridgeback
160: Afghan Hound
161: Basset Hound
162: Beagle
163: Bloodhound
164: Bluetick Coonhound
165: Black and Tan Coonhound
166: Treeing Walker Coonhound
167: English foxhound
168: Redbone Coonhound
169: borzoi
170: Irish Wolfhound
171: Italian Greyhound
172: Whippet
173: Ibizan Hound
174: Norwegian Elkhound
175: Otterhound
176: Saluki
177: Scottish Deerhound
178: Weimaraner
179: Staffordshire Bull Terrier
180: American Staffordshire Terrier
181: Bedlington Terrier
182: Border Terrier
183: Kerry Blue Terrier
184: Irish Terrier
185: Norfolk Terrier
186: Norwich Terrier
187: Yorkshire Terrier
188: Wire Fox Terrier
189: Lakeland Terrier
190: Sealyham Terrier
191: Airedale Terrier
192: Cairn Terrier
193: Australian Terrier
194: Dandie Dinmont Terrier
195: Boston Terrier
196: Miniature Schnauzer
197: Giant Schnauzer
198: Standard Schnauzer
199: Scottish Terrier
200: Tibetan Terrier
201: Australian Silky Terrier
202: Soft-coated Wheaten Terrier
203: West Highland White Terrier
204: Lhasa Apso
205: Flat-Coated Retriever
206: Curly-coated Retriever
207: Golden Retriever
208: Labrador Retriever
209: Chesapeake Bay Retriever
210: German Shorthaired Pointer
211: Vizsla
212: English Setter
213: Irish Setter
214: Gordon Setter
215: Brittany
216: Clumber Spaniel
217: English Springer Spaniel
218: Welsh Springer Spaniel
219: Cocker Spaniels
220: Sussex Spaniel
221: Irish Water Spaniel
222: Kuvasz
223: Schipperke
224: Groenendael
225: Malinois
226: Briard
227: Australian Kelpie
228: Komondor
229: Old English Sheepdog
230: Shetland Sheepdog
231: collie
232: Border Collie
233: Bouvier des Flandres
234: Rottweiler
235: German Shepherd Dog
236: Dobermann
237: Miniature Pinscher
238: Greater Swiss Mountain Dog
239: Bernese Mountain Dog
240: Appenzeller Sennenhund
241: Entlebucher Sennenhund
242: Boxer
243: Bullmastiff
244: Tibetan Mastiff
245: French Bulldog
246: Great Dane
247: St. Bernard
248: husky
249: Alaskan Malamute
250: Siberian Husky
251: Dalmatian
252: Affenpinscher
253: Basenji
254: pug
255: Leonberger
256: Newfoundland
257: Pyrenean Mountain Dog
258: Samoyed
259: Pomeranian
260: Chow Chow
261: Keeshond
262: Griffon Bruxellois
263: Pembroke Welsh Corgi
264: Cardigan Welsh Corgi
265: Toy Poodle
266: Miniature Poodle
267: Standard Poodle
268: Mexican hairless dog
269: grey wolf
270: Alaskan tundra wolf
271: red wolf
272: coyote
273: dingo
274: dhole
275: African wild dog
276: hyena
277: red fox
278: kit fox
279: Arctic fox
280: grey fox
281: tabby cat
282: tiger cat
283: Persian cat
284: Siamese cat
285: Egyptian Mau
286: cougar
287: lynx
288: leopard
289: snow leopard
290: jaguar
291: lion
292: tiger
293: cheetah
294: brown bear
295: American black bear
296: polar bear
297: sloth bear
298: mongoose
299: meerkat
300: tiger beetle
301: ladybug
302: ground beetle
303: longhorn beetle
304: leaf beetle
305: dung beetle
306: rhinoceros beetle
307: weevil
308: fly
309: bee
310: ant
311: grasshopper
312: cricket
313: stick insect
314: cockroach
315: mantis
316: cicada
317: leafhopper
318: lacewing
319: dragonfly
320: damselfly
321: red admiral
322: ringlet
323: monarch butterfly
324: small white
325: sulphur butterfly
326: gossamer-winged butterfly
327: starfish
328: sea urchin
329: sea cucumber
330: cottontail rabbit
331: hare
332: Angora rabbit
333: hamster
334: porcupine
335: fox squirrel
336: marmot
337: beaver
338: guinea pig
339: common sorrel
340: zebra
341: pig
342: wild boar
343: warthog
344: hippopotamus
345: ox
346: water buffalo
347: bison
348: ram
349: bighorn sheep
350: Alpine ibex
351: hartebeest
352: impala
353: gazelle
354: dromedary
355: llama
356: weasel
357: mink
358: European polecat
359: black-footed ferret
360: otter
361: skunk
362: badger
363: armadillo
364: three-toed sloth
365: orangutan
366: gorilla
367: chimpanzee
368: gibbon
369: siamang
370: guenon
371: patas monkey
372: baboon
373: macaque
374: langur
375: black-and-white colobus
376: proboscis monkey
377: marmoset
378: white-headed capuchin
379: howler monkey
380: titi
381: Geoffroy's spider monkey
382: common squirrel monkey
383: ring-tailed lemur
384: indri
385: Asian elephant
386: African bush elephant
387: red panda
388: giant panda
389: snoek
390: eel
391: coho salmon
392: rock beauty
393: clownfish
394: sturgeon
395: garfish
396: lionfish
397: pufferfish
398: abacus
399: abaya
400: academic gown
401: accordion
402: acoustic guitar
403: aircraft carrier
404: airliner
405: airship
406: altar
407: ambulance
408: amphibious vehicle
409: analog clock
410: apiary
411: apron
412: waste container
413: assault rifle
414: backpack
415: bakery
416: balance beam
417: balloon
418: ballpoint pen
419: Band-Aid
420: banjo
421: baluster
422: barbell
423: barber chair
424: barbershop
425: barn
426: barometer
427: barrel
428: wheelbarrow
429: baseball
430: basketball
431: bassinet
432: bassoon
433: swimming cap
434: bath towel
435: bathtub
436: station wagon
437: lighthouse
438: beaker
439: military cap
440: beer bottle
441: beer glass
442: bell-cot
443: bib
444: tandem bicycle
445: bikini
446: ring binder
447: binoculars
448: birdhouse
449: boathouse
450: bobsleigh
451: bolo tie
452: poke bonnet
453: bookcase
454: bookstore
455: bottle cap
456: bow
457: bow tie
458: brass
459: bra
460: breakwater
461: breastplate
462: broom
463: bucket
464: buckle
465: bulletproof vest
466: high-speed train
467: butcher shop
468: taxicab
469: cauldron
470: candle
471: cannon
472: canoe
473: can opener
474: cardigan
475: car mirror
476: carousel
477: tool kit
478: carton
479: car wheel
480: automated teller machine
481: cassette
482: cassette player
483: castle
484: catamaran
485: CD player
486: cello
487: mobile phone
488: chain
489: chain-link fence
490: chain mail
491: chainsaw
492: chest
493: chiffonier
494: chime
495: china cabinet
496: Christmas stocking
497: church
498: movie theater
499: cleaver
500: cliff dwelling
501: cloak
502: clogs
503: cocktail shaker
504: coffee mug
505: coffeemaker
506: coil
507: combination lock
508: computer keyboard
509: confectionery store
510: container ship
511: convertible
512: corkscrew
513: cornet
514: cowboy boot
515: cowboy hat
516: cradle
517: crane (machine)
518: crash helmet
519: crate
520: infant bed
521: Crock Pot
522: croquet ball
523: crutch
524: cuirass
525: dam
526: desk
527: desktop computer
528: rotary dial telephone
529: diaper
530: digital clock
531: digital watch
532: dining table
533: dishcloth
534: dishwasher
535: disc brake
536: dock
537: dog sled
538: dome
539: doormat
540: drilling rig
541: drum
542: drumstick
543: dumbbell
544: Dutch oven
545: electric fan
546: electric guitar
547: electric locomotive
548: entertainment center
549: envelope
550: espresso machine
551: face powder
552: feather boa
553: filing cabinet
554: fireboat
555: fire engine
556: fire screen sheet
557: flagpole
558: flute
559: folding chair
560: football helmet
561: forklift
562: fountain
563: fountain pen
564: four-poster bed
565: freight car
566: French horn
567: frying pan
568: fur coat
569: garbage truck
570: gas mask
571: gas pump
572: goblet
573: go-kart
574: golf ball
575: golf cart
576: gondola
577: gong
578: gown
579: grand piano
580: greenhouse
581: grille
582: grocery store
583: guillotine
584: barrette
585: hair spray
586: half-track
587: hammer
588: hamper
589: hair dryer
590: hand-held computer
591: handkerchief
592: hard disk drive
593: harmonica
594: harp
595: harvester
596: hatchet
597: holster
598: home theater
599: honeycomb
600: hook
601: hoop skirt
602: horizontal bar
603: horse-drawn vehicle
604: hourglass
605: iPod
606: clothes iron
607: jack-o'-lantern
608: jeans
609: jeep
610: T-shirt
611: jigsaw puzzle
612: pulled rickshaw
613: joystick
614: kimono
615: knee pad
616: knot
617: lab coat
618: ladle
619: lampshade
620: laptop computer
621: lawn mower
622: lens cap
623: paper knife
624: library
625: lifeboat
626: lighter
627: limousine
628: ocean liner
629: lipstick
630: slip-on shoe
631: lotion
632: speaker
633: loupe
634: sawmill
635: magnetic compass
636: mail bag
637: mailbox
638: tights
639: tank suit
640: manhole cover
641: maraca
642: marimba
643: mask
644: match
645: maypole
646: maze
647: measuring cup
648: medicine chest
649: megalith
650: microphone
651: microwave oven
652: military uniform
653: milk can
654: minibus
655: miniskirt
656: minivan
657: missile
658: mitten
659: mixing bowl
660: mobile home
661: Model T
662: modem
663: monastery
664: monitor
665: moped
666: mortar
667: square academic cap
668: mosque
669: mosquito net
670: scooter
671: mountain bike
672: tent
673: computer mouse
674: mousetrap
675: moving van
676: muzzle
677: nail
678: neck brace
679: necklace
680: nipple
681: notebook computer
682: obelisk
683: oboe
684: ocarina
685: odometer
686: oil filter
687: organ
688: oscilloscope
689: overskirt
690: bullock cart
691: oxygen mask
692: packet
693: paddle
694: paddle wheel
695: padlock
696: paintbrush
697: pajamas
698: palace
699: pan flute
700: paper towel
701: parachute
702: parallel bars
703: park bench
704: parking meter
705: passenger car
706: patio
707: payphone
708: pedestal
709: pencil case
710: pencil sharpener
711: perfume
712: Petri dish
713: photocopier
714: plectrum
715: Pickelhaube
716: picket fence
717: pickup truck
718: pier
719: piggy bank
720: pill bottle
721: pillow
722: ping-pong ball
723: pinwheel
724: pirate ship
725: pitcher
726: hand plane
727: planetarium
728: plastic bag
729: plate rack
730: plow
731: plunger
732: Polaroid camera
733: pole
734: police van
735: poncho
736: billiard table
737: soda bottle
738: pot
739: potter's wheel
740: power drill
741: prayer rug
742: printer
743: prison
744: projectile
745: projector
746: hockey puck
747: punching bag
748: purse
749: quill
750: quilt
751: race car
752: racket
753: radiator
754: radio
755: radio telescope
756: rain barrel
757: recreational vehicle
758: reel
759: reflex camera
760: refrigerator
761: remote control
762: restaurant
763: revolver
764: rifle
765: rocking chair
766: rotisserie
767: eraser
768: rugby ball
769: ruler
770: running shoe
771: safe
772: safety pin
773: salt shaker
774: sandal
775: sarong
776: saxophone
777: scabbard
778: weighing scale
779: school bus
780: schooner
781: scoreboard
782: CRT screen
783: screw
784: screwdriver
785: seat belt
786: sewing machine
787: shield
788: shoe store
789: shoji
790: shopping basket
791: shopping cart
792: shovel
793: shower cap
794: shower curtain
795: ski
796: ski mask
797: sleeping bag
798: slide rule
799: sliding door
800: slot machine
801: snorkel
802: snowmobile
803: snowplow
804: soap dispenser
805: soccer ball
806: sock
807: solar thermal collector
808: sombrero
809: soup bowl
810: space bar
811: space heater
812: space shuttle
813: spatula
814: motorboat
815: spider web
816: spindle
817: sports car
818: spotlight
819: stage
820: steam locomotive
821: through arch bridge
822: steel drum
823: stethoscope
824: scarf
825: stone wall
826: stopwatch
827: stove
828: strainer
829: tram
830: stretcher
831: couch
832: stupa
833: submarine
834: suit
835: sundial
836: sunglass
837: sunglasses
838: sunscreen
839: suspension bridge
840: mop
841: sweatshirt
842: swimsuit
843: swing
844: switch
845: syringe
846: table lamp
847: tank
848: tape player
849: teapot
850: teddy bear
851: television
852: tennis ball
853: thatched roof
854: front curtain
855: thimble
856: threshing machine
857: throne
858: tile roof
859: toaster
860: tobacco shop
861: toilet seat
862: torch
863: totem pole
864: tow truck
865: toy store
866: tractor
867: semi-trailer truck
868: tray
869: trench coat
870: tricycle
871: trimaran
872: tripod
873: triumphal arch
874: trolleybus
875: trombone
876: tub
877: turnstile
878: typewriter keyboard
879: umbrella
880: unicycle
881: upright piano
882: vacuum cleaner
883: vase
884: vault
885: velvet
886: vending machine
887: vestment
888: viaduct
889: violin
890: volleyball
891: waffle iron
892: wall clock
893: wallet
894: wardrobe
895: military aircraft
896: sink
897: washing machine
898: water bottle
899: water jug
900: water tower
901: whiskey jug
902: whistle
903: wig
904: window screen
905: window shade
906: Windsor tie
907: wine bottle
908: wing
909: wok
910: wooden spoon
911: wool
912: split-rail fence
913: shipwreck
914: yawl
915: yurt
916: website
917: comic book
918: crossword
919: traffic sign
920: traffic light
921: dust jacket
922: menu
923: plate
924: guacamole
925: consomme
926: hot pot
927: trifle
928: ice cream
929: ice pop
930: baguette
931: bagel
932: pretzel
933: cheeseburger
934: hot dog
935: mashed potato
936: cabbage
937: broccoli
938: cauliflower
939: zucchini
940: spaghetti squash
941: acorn squash
942: butternut squash
943: cucumber
944: artichoke
945: bell pepper
946: cardoon
947: mushroom
948: Granny Smith
949: strawberry
950: orange
951: lemon
952: fig
953: pineapple
954: banana
955: jackfruit
956: custard apple
957: pomegranate
958: hay
959: carbonara
960: chocolate syrup
961: dough
962: meatloaf
963: pizza
964: pot pie
965: burrito
966: red wine
967: espresso
968: cup
969: eggnog
970: alp
971: bubble
972: cliff
973: coral reef
974: geyser
975: lakeshore
976: promontory
977: shoal
978: seashore
979: valley
980: volcano
981: baseball player
982: bridegroom
983: scuba diver
984: rapeseed
985: daisy
986: yellow lady's slipper
987: corn
988: acorn
989: rose hip
990: horse chestnut seed
991: coral fungus
992: agaric
993: gyromitra
994: stinkhorn mushroom
995: earth star
996: hen-of-the-woods
997: bolete
998: ear
999: toilet paper
# Imagenet class codes to human-readable names
map:
n01440764: tench
n01443537: goldfish
n01484850: great_white_shark
n01491361: tiger_shark
n01494475: hammerhead
n01496331: electric_ray
n01498041: stingray
n01514668: cock
n01514859: hen
n01518878: ostrich
n01530575: brambling
n01531178: goldfinch
n01532829: house_finch
n01534433: junco
n01537544: indigo_bunting
n01558993: robin
n01560419: bulbul
n01580077: jay
n01582220: magpie
n01592084: chickadee
n01601694: water_ouzel
n01608432: kite
n01614925: bald_eagle
n01616318: vulture
n01622779: great_grey_owl
n01629819: European_fire_salamander
n01630670: common_newt
n01631663: eft
n01632458: spotted_salamander
n01632777: axolotl
n01641577: bullfrog
n01644373: tree_frog
n01644900: tailed_frog
n01664065: loggerhead
n01665541: leatherback_turtle
n01667114: mud_turtle
n01667778: terrapin
n01669191: box_turtle
n01675722: banded_gecko
n01677366: common_iguana
n01682714: American_chameleon
n01685808: whiptail
n01687978: agama
n01688243: frilled_lizard
n01689811: alligator_lizard
n01692333: Gila_monster
n01693334: green_lizard
n01694178: African_chameleon
n01695060: Komodo_dragon
n01697457: African_crocodile
n01698640: American_alligator
n01704323: triceratops
n01728572: thunder_snake
n01728920: ringneck_snake
n01729322: hognose_snake
n01729977: green_snake
n01734418: king_snake
n01735189: garter_snake
n01737021: water_snake
n01739381: vine_snake
n01740131: night_snake
n01742172: boa_constrictor
n01744401: rock_python
n01748264: Indian_cobra
n01749939: green_mamba
n01751748: sea_snake
n01753488: horned_viper
n01755581: diamondback
n01756291: sidewinder
n01768244: trilobite
n01770081: harvestman
n01770393: scorpion
n01773157: black_and_gold_garden_spider
n01773549: barn_spider
n01773797: garden_spider
n01774384: black_widow
n01774750: tarantula
n01775062: wolf_spider
n01776313: tick
n01784675: centipede
n01795545: black_grouse
n01796340: ptarmigan
n01797886: ruffed_grouse
n01798484: prairie_chicken
n01806143: peacock
n01806567: quail
n01807496: partridge
n01817953: African_grey
n01818515: macaw
n01819313: sulphur-crested_cockatoo
n01820546: lorikeet
n01824575: coucal
n01828970: bee_eater
n01829413: hornbill
n01833805: hummingbird
n01843065: jacamar
n01843383: toucan
n01847000: drake
n01855032: red-breasted_merganser
n01855672: goose
n01860187: black_swan
n01871265: tusker
n01872401: echidna
n01873310: platypus
n01877812: wallaby
n01882714: koala
n01883070: wombat
n01910747: jellyfish
n01914609: sea_anemone
n01917289: brain_coral
n01924916: flatworm
n01930112: nematode
n01943899: conch
n01944390: snail
n01945685: slug
n01950731: sea_slug
n01955084: chiton
n01968897: chambered_nautilus
n01978287: Dungeness_crab
n01978455: rock_crab
n01980166: fiddler_crab
n01981276: king_crab
n01983481: American_lobster
n01984695: spiny_lobster
n01985128: crayfish
n01986214: hermit_crab
n01990800: isopod
n02002556: white_stork
n02002724: black_stork
n02006656: spoonbill
n02007558: flamingo
n02009229: little_blue_heron
n02009912: American_egret
n02011460: bittern
n02012849: crane_(bird)
n02013706: limpkin
n02017213: European_gallinule
n02018207: American_coot
n02018795: bustard
n02025239: ruddy_turnstone
n02027492: red-backed_sandpiper
n02028035: redshank
n02033041: dowitcher
n02037110: oystercatcher
n02051845: pelican
n02056570: king_penguin
n02058221: albatross
n02066245: grey_whale
n02071294: killer_whale
n02074367: dugong
n02077923: sea_lion
n02085620: Chihuahua
n02085782: Japanese_spaniel
n02085936: Maltese_dog
n02086079: Pekinese
n02086240: Shih-Tzu
n02086646: Blenheim_spaniel
n02086910: papillon
n02087046: toy_terrier
n02087394: Rhodesian_ridgeback
n02088094: Afghan_hound
n02088238: basset
n02088364: beagle
n02088466: bloodhound
n02088632: bluetick
n02089078: black-and-tan_coonhound
n02089867: Walker_hound
n02089973: English_foxhound
n02090379: redbone
n02090622: borzoi
n02090721: Irish_wolfhound
n02091032: Italian_greyhound
n02091134: whippet
n02091244: Ibizan_hound
n02091467: Norwegian_elkhound
n02091635: otterhound
n02091831: Saluki
n02092002: Scottish_deerhound
n02092339: Weimaraner
n02093256: Staffordshire_bullterrier
n02093428: American_Staffordshire_terrier
n02093647: Bedlington_terrier
n02093754: Border_terrier
n02093859: Kerry_blue_terrier
n02093991: Irish_terrier
n02094114: Norfolk_terrier
n02094258: Norwich_terrier
n02094433: Yorkshire_terrier
n02095314: wire-haired_fox_terrier
n02095570: Lakeland_terrier
n02095889: Sealyham_terrier
n02096051: Airedale
n02096177: cairn
n02096294: Australian_terrier
n02096437: Dandie_Dinmont
n02096585: Boston_bull
n02097047: miniature_schnauzer
n02097130: giant_schnauzer
n02097209: standard_schnauzer
n02097298: Scotch_terrier
n02097474: Tibetan_terrier
n02097658: silky_terrier
n02098105: soft-coated_wheaten_terrier
n02098286: West_Highland_white_terrier
n02098413: Lhasa
n02099267: flat-coated_retriever
n02099429: curly-coated_retriever
n02099601: golden_retriever
n02099712: Labrador_retriever
n02099849: Chesapeake_Bay_retriever
n02100236: German_short-haired_pointer
n02100583: vizsla
n02100735: English_setter
n02100877: Irish_setter
n02101006: Gordon_setter
n02101388: Brittany_spaniel
n02101556: clumber
n02102040: English_springer
n02102177: Welsh_springer_spaniel
n02102318: cocker_spaniel
n02102480: Sussex_spaniel
n02102973: Irish_water_spaniel
n02104029: kuvasz
n02104365: schipperke
n02105056: groenendael
n02105162: malinois
n02105251: briard
n02105412: kelpie
n02105505: komondor
n02105641: Old_English_sheepdog
n02105855: Shetland_sheepdog
n02106030: collie
n02106166: Border_collie
n02106382: Bouvier_des_Flandres
n02106550: Rottweiler
n02106662: German_shepherd
n02107142: Doberman
n02107312: miniature_pinscher
n02107574: Greater_Swiss_Mountain_dog
n02107683: Bernese_mountain_dog
n02107908: Appenzeller
n02108000: EntleBucher
n02108089: boxer
n02108422: bull_mastiff
n02108551: Tibetan_mastiff
n02108915: French_bulldog
n02109047: Great_Dane
n02109525: Saint_Bernard
n02109961: Eskimo_dog
n02110063: malamute
n02110185: Siberian_husky
n02110341: dalmatian
n02110627: affenpinscher
n02110806: basenji
n02110958: pug
n02111129: Leonberg
n02111277: Newfoundland
n02111500: Great_Pyrenees
n02111889: Samoyed
n02112018: Pomeranian
n02112137: chow
n02112350: keeshond
n02112706: Brabancon_griffon
n02113023: Pembroke
n02113186: Cardigan
n02113624: toy_poodle
n02113712: miniature_poodle
n02113799: standard_poodle
n02113978: Mexican_hairless
n02114367: timber_wolf
n02114548: white_wolf
n02114712: red_wolf
n02114855: coyote
n02115641: dingo
n02115913: dhole
n02116738: African_hunting_dog
n02117135: hyena
n02119022: red_fox
n02119789: kit_fox
n02120079: Arctic_fox
n02120505: grey_fox
n02123045: tabby
n02123159: tiger_cat
n02123394: Persian_cat
n02123597: Siamese_cat
n02124075: Egyptian_cat
n02125311: cougar
n02127052: lynx
n02128385: leopard
n02128757: snow_leopard
n02128925: jaguar
n02129165: lion
n02129604: tiger
n02130308: cheetah
n02132136: brown_bear
n02133161: American_black_bear
n02134084: ice_bear
n02134418: sloth_bear
n02137549: mongoose
n02138441: meerkat
n02165105: tiger_beetle
n02165456: ladybug
n02167151: ground_beetle
n02168699: long-horned_beetle
n02169497: leaf_beetle
n02172182: dung_beetle
n02174001: rhinoceros_beetle
n02177972: weevil
n02190166: fly
n02206856: bee
n02219486: ant
n02226429: grasshopper
n02229544: cricket
n02231487: walking_stick
n02233338: cockroach
n02236044: mantis
n02256656: cicada
n02259212: leafhopper
n02264363: lacewing
n02268443: dragonfly
n02268853: damselfly
n02276258: admiral
n02277742: ringlet
n02279972: monarch
n02280649: cabbage_butterfly
n02281406: sulphur_butterfly
n02281787: lycaenid
n02317335: starfish
n02319095: sea_urchin
n02321529: sea_cucumber
n02325366: wood_rabbit
n02326432: hare
n02328150: Angora
n02342885: hamster
n02346627: porcupine
n02356798: fox_squirrel
n02361337: marmot
n02363005: beaver
n02364673: guinea_pig
n02389026: sorrel
n02391049: zebra
n02395406: hog
n02396427: wild_boar
n02397096: warthog
n02398521: hippopotamus
n02403003: ox
n02408429: water_buffalo
n02410509: bison
n02412080: ram
n02415577: bighorn
n02417914: ibex
n02422106: hartebeest
n02422699: impala
n02423022: gazelle
n02437312: Arabian_camel
n02437616: llama
n02441942: weasel
n02442845: mink
n02443114: polecat
n02443484: black-footed_ferret
n02444819: otter
n02445715: skunk
n02447366: badger
n02454379: armadillo
n02457408: three-toed_sloth
n02480495: orangutan
n02480855: gorilla
n02481823: chimpanzee
n02483362: gibbon
n02483708: siamang
n02484975: guenon
n02486261: patas
n02486410: baboon
n02487347: macaque
n02488291: langur
n02488702: colobus
n02489166: proboscis_monkey
n02490219: marmoset
n02492035: capuchin
n02492660: howler_monkey
n02493509: titi
n02493793: spider_monkey
n02494079: squirrel_monkey
n02497673: Madagascar_cat
n02500267: indri
n02504013: Indian_elephant
n02504458: African_elephant
n02509815: lesser_panda
n02510455: giant_panda
n02514041: barracouta
n02526121: eel
n02536864: coho
n02606052: rock_beauty
n02607072: anemone_fish
n02640242: sturgeon
n02641379: gar
n02643566: lionfish
n02655020: puffer
n02666196: abacus
n02667093: abaya
n02669723: academic_gown
n02672831: accordion
n02676566: acoustic_guitar
n02687172: aircraft_carrier
n02690373: airliner
n02692877: airship
n02699494: altar
n02701002: ambulance
n02704792: amphibian
n02708093: analog_clock
n02727426: apiary
n02730930: apron
n02747177: ashcan
n02749479: assault_rifle
n02769748: backpack
n02776631: bakery
n02777292: balance_beam
n02782093: balloon
n02783161: ballpoint
n02786058: Band_Aid
n02787622: banjo
n02788148: bannister
n02790996: barbell
n02791124: barber_chair
n02791270: barbershop
n02793495: barn
n02794156: barometer
n02795169: barrel
n02797295: barrow
n02799071: baseball
n02802426: basketball
n02804414: bassinet
n02804610: bassoon
n02807133: bathing_cap
n02808304: bath_towel
n02808440: bathtub
n02814533: beach_wagon
n02814860: beacon
n02815834: beaker
n02817516: bearskin
n02823428: beer_bottle
n02823750: beer_glass
n02825657: bell_cote
n02834397: bib
n02835271: bicycle-built-for-two
n02837789: bikini
n02840245: binder
n02841315: binoculars
n02843684: birdhouse
n02859443: boathouse
n02860847: bobsled
n02865351: bolo_tie
n02869837: bonnet
n02870880: bookcase
n02871525: bookshop
n02877765: bottlecap
n02879718: bow
n02883205: bow_tie
n02892201: brass
n02892767: brassiere
n02894605: breakwater
n02895154: breastplate
n02906734: broom
n02909870: bucket
n02910353: buckle
n02916936: bulletproof_vest
n02917067: bullet_train
n02927161: butcher_shop
n02930766: cab
n02939185: caldron
n02948072: candle
n02950826: cannon
n02951358: canoe
n02951585: can_opener
n02963159: cardigan
n02965783: car_mirror
n02966193: carousel
n02966687: carpenter's_kit
n02971356: carton
n02974003: car_wheel
n02977058: cash_machine
n02978881: cassette
n02979186: cassette_player
n02980441: castle
n02981792: catamaran
n02988304: CD_player
n02992211: cello
n02992529: cellular_telephone
n02999410: chain
n03000134: chainlink_fence
n03000247: chain_mail
n03000684: chain_saw
n03014705: chest
n03016953: chiffonier
n03017168: chime
n03018349: china_cabinet
n03026506: Christmas_stocking
n03028079: church
n03032252: cinema
n03041632: cleaver
n03042490: cliff_dwelling
n03045698: cloak
n03047690: clog
n03062245: cocktail_shaker
n03063599: coffee_mug
n03063689: coffeepot
n03065424: coil
n03075370: combination_lock
n03085013: computer_keyboard
n03089624: confectionery
n03095699: container_ship
n03100240: convertible
n03109150: corkscrew
n03110669: cornet
n03124043: cowboy_boot
n03124170: cowboy_hat
n03125729: cradle
n03126707: crane_(machine)
n03127747: crash_helmet
n03127925: crate
n03131574: crib
n03133878: Crock_Pot
n03134739: croquet_ball
n03141823: crutch
n03146219: cuirass
n03160309: dam
n03179701: desk
n03180011: desktop_computer
n03187595: dial_telephone
n03188531: diaper
n03196217: digital_clock
n03197337: digital_watch
n03201208: dining_table
n03207743: dishrag
n03207941: dishwasher
n03208938: disk_brake
n03216828: dock
n03218198: dogsled
n03220513: dome
n03223299: doormat
n03240683: drilling_platform
n03249569: drum
n03250847: drumstick
n03255030: dumbbell
n03259280: Dutch_oven
n03271574: electric_fan
n03272010: electric_guitar
n03272562: electric_locomotive
n03290653: entertainment_center
n03291819: envelope
n03297495: espresso_maker
n03314780: face_powder
n03325584: feather_boa
n03337140: file
n03344393: fireboat
n03345487: fire_engine
n03347037: fire_screen
n03355925: flagpole
n03372029: flute
n03376595: folding_chair
n03379051: football_helmet
n03384352: forklift
n03388043: fountain
n03388183: fountain_pen
n03388549: four-poster
n03393912: freight_car
n03394916: French_horn
n03400231: frying_pan
n03404251: fur_coat
n03417042: garbage_truck
n03424325: gasmask
n03425413: gas_pump
n03443371: goblet
n03444034: go-kart
n03445777: golf_ball
n03445924: golfcart
n03447447: gondola
n03447721: gong
n03450230: gown
n03452741: grand_piano
n03457902: greenhouse
n03459775: grille
n03461385: grocery_store
n03467068: guillotine
n03476684: hair_slide
n03476991: hair_spray
n03478589: half_track
n03481172: hammer
n03482405: hamper
n03483316: hand_blower
n03485407: hand-held_computer
n03485794: handkerchief
n03492542: hard_disc
n03494278: harmonica
n03495258: harp
n03496892: harvester
n03498962: hatchet
n03527444: holster
n03529860: home_theater
n03530642: honeycomb
n03532672: hook
n03534580: hoopskirt
n03535780: horizontal_bar
n03538406: horse_cart
n03544143: hourglass
n03584254: iPod
n03584829: iron
n03590841: jack-o'-lantern
n03594734: jean
n03594945: jeep
n03595614: jersey
n03598930: jigsaw_puzzle
n03599486: jinrikisha
n03602883: joystick
n03617480: kimono
n03623198: knee_pad
n03627232: knot
n03630383: lab_coat
n03633091: ladle
n03637318: lampshade
n03642806: laptop
n03649909: lawn_mower
n03657121: lens_cap
n03658185: letter_opener
n03661043: library
n03662601: lifeboat
n03666591: lighter
n03670208: limousine
n03673027: liner
n03676483: lipstick
n03680355: Loafer
n03690938: lotion
n03691459: loudspeaker
n03692522: loupe
n03697007: lumbermill
n03706229: magnetic_compass
n03709823: mailbag
n03710193: mailbox
n03710637: maillot_(tights)
n03710721: maillot_(tank_suit)
n03717622: manhole_cover
n03720891: maraca
n03721384: marimba
n03724870: mask
n03729826: matchstick
n03733131: maypole
n03733281: maze
n03733805: measuring_cup
n03742115: medicine_chest
n03743016: megalith
n03759954: microphone
n03761084: microwave
n03763968: military_uniform
n03764736: milk_can
n03769881: minibus
n03770439: miniskirt
n03770679: minivan
n03773504: missile
n03775071: mitten
n03775546: mixing_bowl
n03776460: mobile_home
n03777568: Model_T
n03777754: modem
n03781244: monastery
n03782006: monitor
n03785016: moped
n03786901: mortar
n03787032: mortarboard
n03788195: mosque
n03788365: mosquito_net
n03791053: motor_scooter
n03792782: mountain_bike
n03792972: mountain_tent
n03793489: mouse
n03794056: mousetrap
n03796401: moving_van
n03803284: muzzle
n03804744: nail
n03814639: neck_brace
n03814906: necklace
n03825788: nipple
n03832673: notebook
n03837869: obelisk
n03838899: oboe
n03840681: ocarina
n03841143: odometer
n03843555: oil_filter
n03854065: organ
n03857828: oscilloscope
n03866082: overskirt
n03868242: oxcart
n03868863: oxygen_mask
n03871628: packet
n03873416: paddle
n03874293: paddlewheel
n03874599: padlock
n03876231: paintbrush
n03877472: pajama
n03877845: palace
n03884397: panpipe
n03887697: paper_towel
n03888257: parachute
n03888605: parallel_bars
n03891251: park_bench
n03891332: parking_meter
n03895866: passenger_car
n03899768: patio
n03902125: pay-phone
n03903868: pedestal
n03908618: pencil_box
n03908714: pencil_sharpener
n03916031: perfume
n03920288: Petri_dish
n03924679: photocopier
n03929660: pick
n03929855: pickelhaube
n03930313: picket_fence
n03930630: pickup
n03933933: pier
n03935335: piggy_bank
n03937543: pill_bottle
n03938244: pillow
n03942813: ping-pong_ball
n03944341: pinwheel
n03947888: pirate
n03950228: pitcher
n03954731: plane
n03956157: planetarium
n03958227: plastic_bag
n03961711: plate_rack
n03967562: plow
n03970156: plunger
n03976467: Polaroid_camera
n03976657: pole
n03977966: police_van
n03980874: poncho
n03982430: pool_table
n03983396: pop_bottle
n03991062: pot
n03992509: potter's_wheel
n03995372: power_drill
n03998194: prayer_rug
n04004767: printer
n04005630: prison
n04008634: projectile
n04009552: projector
n04019541: puck
n04023962: punching_bag
n04026417: purse
n04033901: quill
n04033995: quilt
n04037443: racer
n04039381: racket
n04040759: radiator
n04041544: radio
n04044716: radio_telescope
n04049303: rain_barrel
n04065272: recreational_vehicle
n04067472: reel
n04069434: reflex_camera
n04070727: refrigerator
n04074963: remote_control
n04081281: restaurant
n04086273: revolver
n04090263: rifle
n04099969: rocking_chair
n04111531: rotisserie
n04116512: rubber_eraser
n04118538: rugby_ball
n04118776: rule
n04120489: running_shoe
n04125021: safe
n04127249: safety_pin
n04131690: saltshaker
n04133789: sandal
n04136333: sarong
n04141076: sax
n04141327: scabbard
n04141975: scale
n04146614: school_bus
n04147183: schooner
n04149813: scoreboard
n04152593: screen
n04153751: screw
n04154565: screwdriver
n04162706: seat_belt
n04179913: sewing_machine
n04192698: shield
n04200800: shoe_shop
n04201297: shoji
n04204238: shopping_basket
n04204347: shopping_cart
n04208210: shovel
n04209133: shower_cap
n04209239: shower_curtain
n04228054: ski
n04229816: ski_mask
n04235860: sleeping_bag
n04238763: slide_rule
n04239074: sliding_door
n04243546: slot
n04251144: snorkel
n04252077: snowmobile
n04252225: snowplow
n04254120: soap_dispenser
n04254680: soccer_ball
n04254777: sock
n04258138: solar_dish
n04259630: sombrero
n04263257: soup_bowl
n04264628: space_bar
n04265275: space_heater
n04266014: space_shuttle
n04270147: spatula
n04273569: speedboat
n04275548: spider_web
n04277352: spindle
n04285008: sports_car
n04286575: spotlight
n04296562: stage
n04310018: steam_locomotive
n04311004: steel_arch_bridge
n04311174: steel_drum
n04317175: stethoscope
n04325704: stole
n04326547: stone_wall
n04328186: stopwatch
n04330267: stove
n04332243: strainer
n04335435: streetcar
n04336792: stretcher
n04344873: studio_couch
n04346328: stupa
n04347754: submarine
n04350905: suit
n04355338: sundial
n04355933: sunglass
n04356056: sunglasses
n04357314: sunscreen
n04366367: suspension_bridge
n04367480: swab
n04370456: sweatshirt
n04371430: swimming_trunks
n04371774: swing
n04372370: switch
n04376876: syringe
n04380533: table_lamp
n04389033: tank
n04392985: tape_player
n04398044: teapot
n04399382: teddy
n04404412: television
n04409515: tennis_ball
n04417672: thatch
n04418357: theater_curtain
n04423845: thimble
n04428191: thresher
n04429376: throne
n04435653: tile_roof
n04442312: toaster
n04443257: tobacco_shop
n04447861: toilet_seat
n04456115: torch
n04458633: totem_pole
n04461696: tow_truck
n04462240: toyshop
n04465501: tractor
n04467665: trailer_truck
n04476259: tray
n04479046: trench_coat
n04482393: tricycle
n04483307: trimaran
n04485082: tripod
n04486054: triumphal_arch
n04487081: trolleybus
n04487394: trombone
n04493381: tub
n04501370: turnstile
n04505470: typewriter_keyboard
n04507155: umbrella
n04509417: unicycle
n04515003: upright
n04517823: vacuum
n04522168: vase
n04523525: vault
n04525038: velvet
n04525305: vending_machine
n04532106: vestment
n04532670: viaduct
n04536866: violin
n04540053: volleyball
n04542943: waffle_iron
n04548280: wall_clock
n04548362: wallet
n04550184: wardrobe
n04552348: warplane
n04553703: washbasin
n04554684: washer
n04557648: water_bottle
n04560804: water_jug
n04562935: water_tower
n04579145: whiskey_jug
n04579432: whistle
n04584207: wig
n04589890: window_screen
n04590129: window_shade
n04591157: Windsor_tie
n04591713: wine_bottle
n04592741: wing
n04596742: wok
n04597913: wooden_spoon
n04599235: wool
n04604644: worm_fence
n04606251: wreck
n04612504: yawl
n04613696: yurt
n06359193: web_site
n06596364: comic_book
n06785654: crossword_puzzle
n06794110: street_sign
n06874185: traffic_light
n07248320: book_jacket
n07565083: menu
n07579787: plate
n07583066: guacamole
n07584110: consomme
n07590611: hot_pot
n07613480: trifle
n07614500: ice_cream
n07615774: ice_lolly
n07684084: French_loaf
n07693725: bagel
n07695742: pretzel
n07697313: cheeseburger
n07697537: hotdog
n07711569: mashed_potato
n07714571: head_cabbage
n07714990: broccoli
n07715103: cauliflower
n07716358: zucchini
n07716906: spaghetti_squash
n07717410: acorn_squash
n07717556: butternut_squash
n07718472: cucumber
n07718747: artichoke
n07720875: bell_pepper
n07730033: cardoon
n07734744: mushroom
n07742313: Granny_Smith
n07745940: strawberry
n07747607: orange
n07749582: lemon
n07753113: fig
n07753275: pineapple
n07753592: banana
n07754684: jackfruit
n07760859: custard_apple
n07768694: pomegranate
n07802026: hay
n07831146: carbonara
n07836838: chocolate_sauce
n07860988: dough
n07871810: meat_loaf
n07873807: pizza
n07875152: potpie
n07880968: burrito
n07892512: red_wine
n07920052: espresso
n07930864: cup
n07932039: eggnog
n09193705: alp
n09229709: bubble
n09246464: cliff
n09256479: coral_reef
n09288635: geyser
n09332890: lakeside
n09399592: promontory
n09421951: sandbar
n09428293: seashore
n09468604: valley
n09472597: volcano
n09835506: ballplayer
n10148035: groom
n10565667: scuba_diver
n11879895: rapeseed
n11939491: daisy
n12057211: yellow_lady's_slipper
n12144580: corn
n12267677: acorn
n12620546: hip
n12768682: buckeye
n12985857: coral_fungus
n12998815: agaric
n13037406: gyromitra
n13040303: stinkhorn
n13044778: earthstar
n13052670: hen-of-the-woods
n13054560: bolete
n13133613: ear
n15075141: toilet_tissue
# Download script/URL (optional)
download: yolo/data/scripts/get_imagenet.sh
================================================
FILE: ultralytics/datasets/Objects365.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Objects365 dataset https://www.objects365.org/ by Megvii
# Example usage: yolo train data=Objects365.yaml
# parent
# ├── ultralytics
# └── datasets
# └── Objects365 ← downloads here (712 GB = 367G data + 345G zips)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/Objects365 # dataset root dir
train: images/train # train images (relative to 'path') 1742289 images
val: images/val # val images (relative to 'path') 80000 images
test: # test images (optional)
# Classes
names:
0: Person
1: Sneakers
2: Chair
3: Other Shoes
4: Hat
5: Car
6: Lamp
7: Glasses
8: Bottle
9: Desk
10: Cup
11: Street Lights
12: Cabinet/shelf
13: Handbag/Satchel
14: Bracelet
15: Plate
16: Picture/Frame
17: Helmet
18: Book
19: Gloves
20: Storage box
21: Boat
22: Leather Shoes
23: Flower
24: Bench
25: Potted Plant
26: Bowl/Basin
27: Flag
28: Pillow
29: Boots
30: Vase
31: Microphone
32: Necklace
33: Ring
34: SUV
35: Wine Glass
36: Belt
37: Monitor/TV
38: Backpack
39: Umbrella
40: Traffic Light
41: Speaker
42: Watch
43: Tie
44: Trash bin Can
45: Slippers
46: Bicycle
47: Stool
48: Barrel/bucket
49: Van
50: Couch
51: Sandals
52: Basket
53: Drum
54: Pen/Pencil
55: Bus
56: Wild Bird
57: High Heels
58: Motorcycle
59: Guitar
60: Carpet
61: Cell Phone
62: Bread
63: Camera
64: Canned
65: Truck
66: Traffic cone
67: Cymbal
68: Lifesaver
69: Towel
70: Stuffed Toy
71: Candle
72: Sailboat
73: Laptop
74: Awning
75: Bed
76: Faucet
77: Tent
78: Horse
79: Mirror
80: Power outlet
81: Sink
82: Apple
83: Air Conditioner
84: Knife
85: Hockey Stick
86: Paddle
87: Pickup Truck
88: Fork
89: Traffic Sign
90: Balloon
91: Tripod
92: Dog
93: Spoon
94: Clock
95: Pot
96: Cow
97: Cake
98: Dinning Table
99: Sheep
100: Hanger
101: Blackboard/Whiteboard
102: Napkin
103: Other Fish
104: Orange/Tangerine
105: Toiletry
106: Keyboard
107: Tomato
108: Lantern
109: Machinery Vehicle
110: Fan
111: Green Vegetables
112: Banana
113: Baseball Glove
114: Airplane
115: Mouse
116: Train
117: Pumpkin
118: Soccer
119: Skiboard
120: Luggage
121: Nightstand
122: Tea pot
123: Telephone
124: Trolley
125: Head Phone
126: Sports Car
127: Stop Sign
128: Dessert
129: Scooter
130: Stroller
131: Crane
132: Remote
133: Refrigerator
134: Oven
135: Lemon
136: Duck
137: Baseball Bat
138: Surveillance Camera
139: Cat
140: Jug
141: Broccoli
142: Piano
143: Pizza
144: Elephant
145: Skateboard
146: Surfboard
147: Gun
148: Skating and Skiing shoes
149: Gas stove
150: Donut
151: Bow Tie
152: Carrot
153: Toilet
154: Kite
155: Strawberry
156: Other Balls
157: Shovel
158: Pepper
159: Computer Box
160: Toilet Paper
161: Cleaning Products
162: Chopsticks
163: Microwave
164: Pigeon
165: Baseball
166: Cutting/chopping Board
167: Coffee Table
168: Side Table
169: Scissors
170: Marker
171: Pie
172: Ladder
173: Snowboard
174: Cookies
175: Radiator
176: Fire Hydrant
177: Basketball
178: Zebra
179: Grape
180: Giraffe
181: Potato
182: Sausage
183: Tricycle
184: Violin
185: Egg
186: Fire Extinguisher
187: Candy
188: Fire Truck
189: Billiards
190: Converter
191: Bathtub
192: Wheelchair
193: Golf Club
194: Briefcase
195: Cucumber
196: Cigar/Cigarette
197: Paint Brush
198: Pear
199: Heavy Truck
200: Hamburger
201: Extractor
202: Extension Cord
203: Tong
204: Tennis Racket
205: Folder
206: American Football
207: earphone
208: Mask
209: Kettle
210: Tennis
211: Ship
212: Swing
213: Coffee Machine
214: Slide
215: Carriage
216: Onion
217: Green beans
218: Projector
219: Frisbee
220: Washing Machine/Drying Machine
221: Chicken
222: Printer
223: Watermelon
224: Saxophone
225: Tissue
226: Toothbrush
227: Ice cream
228: Hot-air balloon
229: Cello
230: French Fries
231: Scale
232: Trophy
233: Cabbage
234: Hot dog
235: Blender
236: Peach
237: Rice
238: Wallet/Purse
239: Volleyball
240: Deer
241: Goose
242: Tape
243: Tablet
244: Cosmetics
245: Trumpet
246: Pineapple
247: Golf Ball
248: Ambulance
249: Parking meter
250: Mango
251: Key
252: Hurdle
253: Fishing Rod
254: Medal
255: Flute
256: Brush
257: Penguin
258: Megaphone
259: Corn
260: Lettuce
261: Garlic
262: Swan
263: Helicopter
264: Green Onion
265: Sandwich
266: Nuts
267: Speed Limit Sign
268: Induction Cooker
269: Broom
270: Trombone
271: Plum
272: Rickshaw
273: Goldfish
274: Kiwi fruit
275: Router/modem
276: Poker Card
277: Toaster
278: Shrimp
279: Sushi
280: Cheese
281: Notepaper
282: Cherry
283: Pliers
284: CD
285: Pasta
286: Hammer
287: Cue
288: Avocado
289: Hamimelon
290: Flask
291: Mushroom
292: Screwdriver
293: Soap
294: Recorder
295: Bear
296: Eggplant
297: Board Eraser
298: Coconut
299: Tape Measure/Ruler
300: Pig
301: Showerhead
302: Globe
303: Chips
304: Steak
305: Crosswalk Sign
306: Stapler
307: Camel
308: Formula 1
309: Pomegranate
310: Dishwasher
311: Crab
312: Hoverboard
313: Meat ball
314: Rice Cooker
315: Tuba
316: Calculator
317: Papaya
318: Antelope
319: Parrot
320: Seal
321: Butterfly
322: Dumbbell
323: Donkey
324: Lion
325: Urinal
326: Dolphin
327: Electric Drill
328: Hair Dryer
329: Egg tart
330: Jellyfish
331: Treadmill
332: Lighter
333: Grapefruit
334: Game board
335: Mop
336: Radish
337: Baozi
338: Target
339: French
340: Spring Rolls
341: Monkey
342: Rabbit
343: Pencil Case
344: Yak
345: Red Cabbage
346: Binoculars
347: Asparagus
348: Barbell
349: Scallop
350: Noddles
351: Comb
352: Dumpling
353: Oyster
354: Table Tennis paddle
355: Cosmetics Brush/Eyeliner Pencil
356: Chainsaw
357: Eraser
358: Lobster
359: Durian
360: Okra
361: Lipstick
362: Cosmetics Mirror
363: Curling
364: Table Tennis
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
from tqdm import tqdm
from ultralytics.yolo.utils.checks import check_requirements
from ultralytics.yolo.utils.downloads import download
from ultralytics.yolo.utils.ops import xyxy2xywhn
import numpy as np
from pathlib import Path
check_requirements(('pycocotools>=2.0',))
from pycocotools.coco import COCO
# Make Directories
dir = Path(yaml['path']) # dataset root dir
for p in 'images', 'labels':
(dir / p).mkdir(parents=True, exist_ok=True)
for q in 'train', 'val':
(dir / p / q).mkdir(parents=True, exist_ok=True)
# Train, Val Splits
for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
print(f"Processing {split} in {patches} patches ...")
images, labels = dir / 'images' / split, dir / 'labels' / split
# Download
url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
if split == 'train':
download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir) # annotations json
download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, threads=8)
elif split == 'val':
download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir) # annotations json
download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, threads=8)
download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, threads=8)
# Move
for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
f.rename(images / f.name) # move to /images/{split}
# Labels
coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
for cid, cat in enumerate(names):
catIds = coco.getCatIds(catNms=[cat])
imgIds = coco.getImgIds(catIds=catIds)
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
width, height = im["width"], im["height"]
path = Path(im["file_name"]) # image filename
try:
with open(labels / path.with_suffix('.txt').name, 'a') as file:
annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
for a in coco.loadAnns(annIds):
x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
xyxy = np.array([x, y, x + w, y + h])[None] # pixels(1,4)
x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0] # normalized and clipped
file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
except Exception as e:
print(e)
================================================
FILE: ultralytics/datasets/SKU-110K.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
# Example usage: yolo train data=SKU-110K.yaml
# parent
# ├── ultralytics
# └── datasets
# └── SKU-110K ← downloads here (13.6 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/SKU-110K # dataset root dir
train: train.txt # train images (relative to 'path') 8219 images
val: val.txt # val images (relative to 'path') 588 images
test: test.txt # test images (optional) 2936 images
# Classes
names:
0: object
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import shutil
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
from ultralytics.yolo.utils.downloads import download
from ultralytics.yolo.utils.ops import xyxy2xywh
# Download
dir = Path(yaml['path']) # dataset root dir
parent = Path(dir.parent) # download dir
urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
download(urls, dir=parent)
# Rename directories
if dir.exists():
shutil.rmtree(dir)
(parent / 'SKU110K_fixed').rename(dir) # rename dir
(dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
# Convert labels
names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names
for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations
images, unique_images = x[:, 0], np.unique(x[:, 0])
with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
f.writelines(f'./images/{s}\n' for s in unique_images)
for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
cls = 0 # single-class dataset
with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
for r in x[images == im]:
w, h = r[6], r[7] # image width, height
xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance
f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label
================================================
FILE: ultralytics/datasets/VOC.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Example usage: yolo train data=VOC.yaml
# parent
# ├── ultralytics
# └── datasets
# └── VOC ← downloads here (2.8 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VOC
train: # train images (relative to 'path') 16551 images
- images/train2012
- images/train2007
- images/val2012
- images/val2007
val: # val images (relative to 'path') 4952 images
- images/test2007
test: # test images (optional)
- images/test2007
# Classes
names:
0: aeroplane
1: bicycle
2: bird
3: boat
4: bottle
5: bus
6: car
7: cat
8: chair
9: cow
10: diningtable
11: dog
12: horse
13: motorbike
14: person
15: pottedplant
16: sheep
17: sofa
18: train
19: tvmonitor
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import xml.etree.ElementTree as ET
from tqdm import tqdm
from ultralytics.yolo.utils.downloads import download
from pathlib import Path
def convert_label(path, lb_path, year, image_id):
def convert_box(size, box):
dw, dh = 1. / size[0], 1. / size[1]
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh
in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
out_file = open(lb_path, 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
names = list(yaml['names'].values()) # names list
for obj in root.iter('object'):
cls = obj.find('name').text
if cls in names and int(obj.find('difficult').text) != 1:
xmlbox = obj.find('bndbox')
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
cls_id = names.index(cls) # class id
out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
# Download
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [f'{url}VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
f'{url}VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
f'{url}VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
download(urls, dir=dir / 'images', curl=True, threads=3)
# Convert
path = dir / 'images/VOCdevkit'
for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
imgs_path = dir / 'images' / f'{image_set}{year}'
lbs_path = dir / 'labels' / f'{image_set}{year}'
imgs_path.mkdir(exist_ok=True, parents=True)
lbs_path.mkdir(exist_ok=True, parents=True)
with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
image_ids = f.read().strip().split()
for id in tqdm(image_ids, desc=f'{image_set}{year}'):
f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
f.rename(imgs_path / f.name) # move image
convert_label(path, lb_path, year, id) # convert labels to YOLO format
================================================
FILE: ultralytics/datasets/VisDrone.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
# Example usage: yolo train data=VisDrone.yaml
# parent
# ├── ultralytics
# └── datasets
# └── VisDrone ← downloads here (2.3 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VisDrone # dataset root dir
train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
# Classes
names:
0: pedestrian
1: people
2: bicycle
3: car
4: van
5: truck
6: tricycle
7: awning-tricycle
8: bus
9: motor
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import os
from pathlib import Path
from ultralytics.yolo.utils.downloads import download
def visdrone2yolo(dir):
from PIL import Image
from tqdm import tqdm
def convert_box(size, box):
# Convert VisDrone box to YOLO xywh box
dw = 1. / size[0]
dh = 1. / size[1]
return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
(dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
for f in pbar:
img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
lines = []
with open(f, 'r') as file: # read annotation.txt
for row in [x.split(',') for x in file.read().strip().splitlines()]:
if row[4] == '0': # VisDrone 'ignored regions' class 0
continue
cls = int(row[5]) - 1
box = convert_box(img_size, tuple(map(int, row[:4])))
lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
with open(str(f).replace(f'{os.sep}annotations{os.sep}', f'{os.sep}labels{os.sep}'), 'w') as fl:
fl.writelines(lines) # write label.txt
# Download
dir = Path(yaml['path']) # dataset root dir
urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
download(urls, dir=dir, curl=True, threads=4)
# Convert
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels
================================================
FILE: ultralytics/datasets/coco-pose.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO 2017 dataset http://cocodataset.org by Microsoft
# Example usage: yolo train data=coco-pose.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco-pose ← downloads here (20.1 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco-pose # dataset root dir
train: train2017.txt # train images (relative to 'path') 118287 images
val: val2017.txt # val images (relative to 'path') 5000 images
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
# Keypoints
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
# Classes
names:
0: person
# Download script/URL (optional)
download: |
from ultralytics.yolo.utils.downloads import download
from pathlib import Path
# Download labels
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + 'coco2017labels-pose.zip'] # labels
download(urls, dir=dir.parent)
# Download data
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
download(urls, dir=dir / 'images', threads=3)
================================================
FILE: ultralytics/datasets/coco.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO 2017 dataset http://cocodataset.org by Microsoft
# Example usage: yolo train data=coco.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco ← downloads here (20.1 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco # dataset root dir
train: train2017.txt # train images (relative to 'path') 118287 images
val: val2017.txt # val images (relative to 'path') 5000 images
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
# Classes
names:
0: person
1: bicycle
2: car
3: motorcycle
4: airplane
5: bus
6: train
7: truck
8: boat
9: traffic light
10: fire hydrant
11: stop sign
12: parking meter
13: bench
14: bird
15: cat
16: dog
17: horse
18: sheep
19: cow
20: elephant
21: bear
22: zebra
23: giraffe
24: backpack
25: umbrella
26: handbag
27: tie
28: suitcase
29: frisbee
30: skis
31: snowboard
32: sports ball
33: kite
34: baseball bat
35: baseball glove
36: skateboard
37: surfboard
38: tennis racket
39: bottle
40: wine glass
41: cup
42: fork
43: knife
44: spoon
45: bowl
46: banana
47: apple
48: sandwich
49: orange
50: broccoli
51: carrot
52: hot dog
53: pizza
54: donut
55: cake
56: chair
57: couch
58: potted plant
59: bed
60: dining table
61: toilet
62: tv
63: laptop
64: mouse
65: remote
66: keyboard
67: cell phone
68: microwave
69: oven
70: toaster
71: sink
72: refrigerator
73: book
74: clock
75: vase
76: scissors
77: teddy bear
78: hair drier
79: toothbrush
# Download script/URL (optional)
download: |
from ultralytics.yolo.utils.downloads import download
from pathlib import Path
# Download labels
segments = True # segment or box labels
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
download(urls, dir=dir.parent)
# Download data
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
download(urls, dir=dir / 'images', threads=3)
================================================
FILE: ultralytics/datasets/coco128-seg.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
# Example usage: yolo train data=coco128.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco128-seg ← downloads here (7 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco128-seg # dataset root dir
train: images/train2017 # train images (relative to 'path') 128 images
val: images/train2017 # val images (relative to 'path') 128 images
test: # test images (optional)
# Classes
names:
0: person
1: bicycle
2: car
3: motorcycle
4: airplane
5: bus
6: train
7: truck
8: boat
9: traffic light
10: fire hydrant
11: stop sign
12: parking meter
13: bench
14: bird
15: cat
16: dog
17: horse
18: sheep
19: cow
20: elephant
21: bear
22: zebra
23: giraffe
24: backpack
25: umbrella
26: handbag
27: tie
28: suitcase
29: frisbee
30: skis
31: snowboard
32: sports ball
33: kite
34: baseball bat
35: baseball glove
36: skateboard
37: surfboard
38: tennis racket
39: bottle
40: wine glass
41: cup
42: fork
43: knife
44: spoon
45: bowl
46: banana
47: apple
48: sandwich
49: orange
50: broccoli
51: carrot
52: hot dog
53: pizza
54: donut
55: cake
56: chair
57: couch
58: potted plant
59: bed
60: dining table
61: toilet
62: tv
63: laptop
64: mouse
65: remote
66: keyboard
67: cell phone
68: microwave
69: oven
70: toaster
71: sink
72: refrigerator
73: book
74: clock
75: vase
76: scissors
77: teddy bear
78: hair drier
79: toothbrush
# Download script/URL (optional)
download: https://ultralytics.com/assets/coco128-seg.zip
================================================
FILE: ultralytics/datasets/coco128.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
# Example usage: yolo train data=coco128.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco128 ← downloads here (7 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco128 # dataset root dir
train: images/train2017 # train images (relative to 'path') 128 images
val: images/train2017 # val images (relative to 'path') 128 images
test: # test images (optional)
# Classes
names:
0: person
1: bicycle
2: car
3: motorcycle
4: airplane
5: bus
6: train
7: truck
8: boat
9: traffic light
10: fire hydrant
11: stop sign
12: parking meter
13: bench
14: bird
15: cat
16: dog
17: horse
18: sheep
19: cow
20: elephant
21: bear
22: zebra
23: giraffe
24: backpack
25: umbrella
26: handbag
27: tie
28: suitcase
29: frisbee
30: skis
31: snowboard
32: sports ball
33: kite
34: baseball bat
35: baseball glove
36: skateboard
37: surfboard
38: tennis racket
39: bottle
40: wine glass
41: cup
42: fork
43: knife
44: spoon
45: bowl
46: banana
47: apple
48: sandwich
49: orange
50: broccoli
51: carrot
52: hot dog
53: pizza
54: donut
55: cake
56: chair
57: couch
58: potted plant
59: bed
60: dining table
61: toilet
62: tv
63: laptop
64: mouse
65: remote
66: keyboard
67: cell phone
68: microwave
69: oven
70: toaster
71: sink
72: refrigerator
73: book
74: clock
75: vase
76: scissors
77: teddy bear
78: hair drier
79: toothbrush
# Download script/URL (optional)
download: https://ultralytics.com/assets/coco128.zip
================================================
FILE: ultralytics/datasets/coco8-pose.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
# Example usage: yolo train data=coco8-pose.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco8-pose ← downloads here (1 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco8-pose # dataset root dir
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
test: # test images (optional)
# Keypoints
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
# Classes
names:
0: person
# Download script/URL (optional)
download: https://ultralytics.com/assets/coco8-pose.zip
================================================
FILE: ultralytics/datasets/coco8-seg.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics
# Example usage: yolo train data=coco8-seg.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco8-seg ← downloads here (1 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco8-seg # dataset root dir
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
test: # test images (optional)
# Classes
names:
0: person
1: bicycle
2: car
3: motorcycle
4: airplane
5: bus
6: train
7: truck
8: boat
9: traffic light
10: fire hydrant
11: stop sign
12: parking meter
13: bench
14: bird
15: cat
16: dog
17: horse
18: sheep
19: cow
20: elephant
21: bear
22: zebra
23: giraffe
24: backpack
25: umbrella
26: handbag
27: tie
28: suitcase
29: frisbee
30: skis
31: snowboard
32: sports ball
33: kite
34: baseball bat
35: baseball glove
36: skateboard
37: surfboard
38: tennis racket
39: bottle
40: wine glass
41: cup
42: fork
43: knife
44: spoon
45: bowl
46: banana
47: apple
48: sandwich
49: orange
50: broccoli
51: carrot
52: hot dog
53: pizza
54: donut
55: cake
56: chair
57: couch
58: potted plant
59: bed
60: dining table
61: toilet
62: tv
63: laptop
64: mouse
65: remote
66: keyboard
67: cell phone
68: microwave
69: oven
70: toaster
71: sink
72: refrigerator
73: book
74: clock
75: vase
76: scissors
77: teddy bear
78: hair drier
79: toothbrush
# Download script/URL (optional)
download: https://ultralytics.com/assets/coco8-seg.zip
================================================
FILE: ultralytics/datasets/coco8.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
# Example usage: yolo train data=coco8.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco8 ← downloads here (1 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco8 # dataset root dir
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
test: # test images (optional)
# Classes
names:
0: person
1: bicycle
2: car
3: motorcycle
4: airplane
5: bus
6: train
7: truck
8: boat
9: traffic light
10: fire hydrant
11: stop sign
12: parking meter
13: bench
14: bird
15: cat
16: dog
17: horse
18: sheep
19: cow
20: elephant
21: bear
22: zebra
23: giraffe
24: backpack
25: umbrella
26: handbag
27: tie
28: suitcase
29: frisbee
30: skis
31: snowboard
32: sports ball
33: kite
34: baseball bat
35: baseball glove
36: skateboard
37: surfboard
38: tennis racket
39: bottle
40: wine glass
41: cup
42: fork
43: knife
44: spoon
45: bowl
46: banana
47: apple
48: sandwich
49: orange
50: broccoli
51: carrot
52: hot dog
53: pizza
54: donut
55: cake
56: chair
57: couch
58: potted plant
59: bed
60: dining table
61: toilet
62: tv
63: laptop
64: mouse
65: remote
66: keyboard
67: cell phone
68: microwave
69: oven
70: toaster
71: sink
72: refrigerator
73: book
74: clock
75: vase
76: scissors
77: teddy bear
78: hair drier
79: toothbrush
# Download script/URL (optional)
download: https://ultralytics.com/assets/coco8.zip
================================================
FILE: ultralytics/datasets/xView.yaml
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
# -------- DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command! --------
# Example usage: yolo train data=xView.yaml
# parent
# ├── ultralytics
# └── datasets
# └── xView ← downloads here (20.7 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/xView # dataset root dir
train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
# Classes
names:
0: Fixed-wing Aircraft
1: Small Aircraft
2: Cargo Plane
3: Helicopter
4: Passenger Vehicle
5: Small Car
6: Bus
7: Pickup Truck
8: Utility Truck
9: Truck
10: Cargo Truck
11: Truck w/Box
12: Truck Tractor
13: Trailer
14: Truck w/Flatbed
15: Truck w/Liquid
16: Crane Truck
17: Railway Vehicle
18: Passenger Car
19: Cargo Car
20: Flat Car
21: Tank car
22: Locomotive
23: Maritime Vessel
24: Motorboat
25: Sailboat
26: Tugboat
27: Barge
28: Fishing Vessel
29: Ferry
30: Yacht
31: Container Ship
32: Oil Tanker
33: Engineering Vehicle
34: Tower crane
35: Container Crane
36: Reach Stacker
37: Straddle Carrier
38: Mobile Crane
39: Dump Truck
40: Haul Truck
41: Scraper/Tractor
42: Front loader/Bulldozer
43: Excavator
44: Cement Mixer
45: Ground Grader
46: Hut/Tent
47: Shed
48: Building
49: Aircraft Hangar
50: Damaged Building
51: Facility
52: Construction Site
53: Vehicle Lot
54: Helipad
55: Storage Tank
56: Shipping container lot
57: Shipping Container
58: Pylon
59: Tower
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import json
import os
from pathlib import Path
import numpy as np
from PIL import Image
from tqdm import tqdm
from ultralytics.yolo.data.dataloaders.v5loader import autosplit
from ultralytics.yolo.utils.ops import xyxy2xywhn
def convert_labels(fname=Path('xView/xView_train.geojson')):
# Convert xView geoJSON labels to YOLO format
path = fname.parent
with open(fname) as f:
print(f'Loading {fname}...')
data = json.load(f)
# Make dirs
labels = Path(path / 'labels' / 'train')
os.system(f'rm -rf {labels}')
labels.mkdir(parents=True, exist_ok=True)
# xView classes 11-94 to 0-59
xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
shapes = {}
for feature in tqdm(data['features'], desc=f'Converting {fname}'):
p = feature['properties']
if p['bounds_imcoords']:
id = p['image_id']
file = path / 'train_images' / id
if file.exists(): # 1395.tif missing
try:
box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
cls = p['type_id']
cls = xview_class2index[int(cls)] # xView class to 0-60
assert 59 >= cls >= 0, f'incorrect class index {cls}'
# Write YOLO label
if id not in shapes:
shapes[id] = Image.open(file).size
box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
with open((labels / id).with_suffix('.txt'), 'a') as f:
f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt
except Exception as e:
print(f'WARNING: skipping one label for {file}: {e}')
# Download manually from https://challenge.xviewdataset.org
dir = Path(yaml['path']) # dataset root dir
# urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip', # train labels
# 'https://d307kc0mrhucc3.cloudfront.net/train_images.zip', # 15G, 847 train images
# 'https://d307kc0mrhucc3.cloudfront.net/val_images.zip'] # 5G, 282 val images (no labels)
# download(urls, dir=dir)
# Convert labels
convert_labels(dir / 'xView_train.geojson')
# Move images
images = Path(dir / 'images')
images.mkdir(parents=True, exist_ok=True)
Path(dir / 'train_images').rename(dir / 'images' / 'train')
Path(dir / 'val_images').rename(dir / 'images' / 'val')
# Split
autosplit(dir / 'images' / 'train')
================================================
FILE: ultralytics/hub/__init__.py
================================================
# Ultralytics YOLO 🚀, AGPL-3.0 license
import requests
from ultralytics.hub.auth import Auth
from ultralytics.hub.utils import PREFIX
from ultralytics.yolo.data.utils import HUBDatasetStats
from ultralytics.yolo.utils import LOGGER, SETTINGS, USER_CONFIG_DIR, yaml_save
def login(api_key=''):
"""
Log in to the Ultralytics HUB API using the provided API key.
Args:
api_key (str, optional): May be an API key or a combination API key and model ID, i.e. key_id
Example:
from ultralytics import hub
hub.login('API_KEY')
"""
Auth(api_key, verbose=True)
def logout():
"""
Log out of Ultralytics HUB by removing the API key from the settings file. To log in again, use 'yolo hub login'.
Example:
from ultralytics import hub
hub.logout()
"""
SETTINGS['api_key'] = ''
yaml_save(USER_CONFIG_DIR / 'settings.yaml', SETTINGS)
LOGGER.info(f"{PREFIX}logged out ✅. To log in again, use 'yolo hub login'.")
def start(key=''):
"""
Start training models with Ultralytics HUB (DEPRECATED).
Args:
key (str, optional): A string containing either the API key and model ID combination (apikey_modelid),
or the full model URL (https://hub.ultralytics.com/models/apikey_modelid).
"""
api_key, model_id = key.split('_')
LOGGER.warning(f"""
WARNING ⚠️ ultralytics.start() is deprecated after 8.0.60. Updated usage to train Ultralytics HUB models is:
from ultralytics import YOLO, hub
hub.login('{api_key}')
mod
gitextract_nx_m3n2d/
├── .gitignore
├── Inference.py
├── LICENSE
├── MORE_USAGES.md
├── README.md
├── app_gradio.py
├── cog.yaml
├── fastsam/
│ ├── __init__.py
│ ├── decoder.py
│ ├── model.py
│ ├── predict.py
│ ├── prompt.py
│ └── utils.py
├── predict.py
├── requirements.txt
├── segpredict.py
├── setup.py
├── ultralytics/
│ ├── .pre-commit-config.yaml
│ ├── __init__.py
│ ├── datasets/
│ │ ├── Argoverse.yaml
│ │ ├── GlobalWheat2020.yaml
│ │ ├── ImageNet.yaml
│ │ ├── Objects365.yaml
│ │ ├── SKU-110K.yaml
│ │ ├── VOC.yaml
│ │ ├── VisDrone.yaml
│ │ ├── coco-pose.yaml
│ │ ├── coco.yaml
│ │ ├── coco128-seg.yaml
│ │ ├── coco128.yaml
│ │ ├── coco8-pose.yaml
│ │ ├── coco8-seg.yaml
│ │ ├── coco8.yaml
│ │ └── xView.yaml
│ ├── hub/
│ │ ├── __init__.py
│ │ ├── auth.py
│ │ ├── session.py
│ │ └── utils.py
│ ├── models/
│ │ ├── README.md
│ │ ├── rt-detr/
│ │ │ ├── rtdetr-l.yaml
│ │ │ └── rtdetr-x.yaml
│ │ ├── v3/
│ │ │ ├── yolov3-spp.yaml
│ │ │ ├── yolov3-tiny.yaml
│ │ │ └── yolov3.yaml
│ │ ├── v5/
│ │ │ ├── yolov5-p6.yaml
│ │ │ └── yolov5.yaml
│ │ ├── v6/
│ │ │ └── yolov6.yaml
│ │ └── v8/
│ │ ├── yolov8-cls.yaml
│ │ ├── yolov8-p2.yaml
│ │ ├── yolov8-p6.yaml
│ │ ├── yolov8-pose-p6.yaml
│ │ ├── yolov8-pose.yaml
│ │ ├── yolov8-rtdetr.yaml
│ │ ├── yolov8-seg.yaml
│ │ └── yolov8.yaml
│ ├── nn/
│ │ ├── __init__.py
│ │ ├── autobackend.py
│ │ ├── autoshape.py
│ │ ├── modules/
│ │ │ ├── __init__.py
│ │ │ ├── block.py
│ │ │ ├── conv.py
│ │ │ ├── head.py
│ │ │ ├── transformer.py
│ │ │ └── utils.py
│ │ └── tasks.py
│ ├── tracker/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── cfg/
│ │ │ ├── botsort.yaml
│ │ │ └── bytetrack.yaml
│ │ ├── track.py
│ │ ├── trackers/
│ │ │ ├── __init__.py
│ │ │ ├── basetrack.py
│ │ │ ├── bot_sort.py
│ │ │ └── byte_tracker.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── gmc.py
│ │ ├── kalman_filter.py
│ │ └── matching.py
│ ├── vit/
│ │ ├── __init__.py
│ │ ├── rtdetr/
│ │ │ ├── __init__.py
│ │ │ ├── model.py
│ │ │ ├── predict.py
│ │ │ ├── train.py
│ │ │ └── val.py
│ │ ├── sam/
│ │ │ ├── __init__.py
│ │ │ ├── amg.py
│ │ │ ├── autosize.py
│ │ │ ├── build.py
│ │ │ ├── model.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── decoders.py
│ │ │ │ ├── encoders.py
│ │ │ │ ├── mask_generator.py
│ │ │ │ ├── prompt_predictor.py
│ │ │ │ ├── sam.py
│ │ │ │ └── transformer.py
│ │ │ └── predict.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── loss.py
│ │ └── ops.py
│ └── yolo/
│ ├── __init__.py
│ ├── cfg/
│ │ ├── __init__.py
│ │ └── default.yaml
│ ├── data/
│ │ ├── __init__.py
│ │ ├── annotator.py
│ │ ├── augment.py
│ │ ├── base.py
│ │ ├── build.py
│ │ ├── converter.py
│ │ ├── dataloaders/
│ │ │ ├── __init__.py
│ │ │ ├── stream_loaders.py
│ │ │ ├── v5augmentations.py
│ │ │ └── v5loader.py
│ │ ├── dataset.py
│ │ ├── dataset_wrappers.py
│ │ ├── scripts/
│ │ │ ├── download_weights.sh
│ │ │ ├── get_coco.sh
│ │ │ ├── get_coco128.sh
│ │ │ └── get_imagenet.sh
│ │ └── utils.py
│ ├── engine/
│ │ ├── __init__.py
│ │ ├── exporter.py
│ │ ├── model.py
│ │ ├── predictor.py
│ │ ├── results.py
│ │ ├── trainer.py
│ │ └── validator.py
│ ├── nas/
│ │ ├── __init__.py
│ │ ├── model.py
│ │ ├── predict.py
│ │ └── val.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── autobatch.py
│ │ ├── benchmarks.py
│ │ ├── callbacks/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── clearml.py
│ │ │ ├── comet.py
│ │ │ ├── dvc.py
│ │ │ ├── hub.py
│ │ │ ├── mlflow.py
│ │ │ ├── neptune.py
│ │ │ ├── raytune.py
│ │ │ ├── tensorboard.py
│ │ │ └── wb.py
│ │ ├── checks.py
│ │ ├── dist.py
│ │ ├── downloads.py
│ │ ├── errors.py
│ │ ├── files.py
│ │ ├── instance.py
│ │ ├── loss.py
│ │ ├── metrics.py
│ │ ├── ops.py
│ │ ├── patches.py
│ │ ├── plotting.py
│ │ ├── tal.py
│ │ ├── torch_utils.py
│ │ └── tuner.py
│ └── v8/
│ ├── __init__.py
│ ├── classify/
│ │ ├── __init__.py
│ │ ├── predict.py
│ │ ├── train.py
│ │ └── val.py
│ ├── detect/
│ │ ├── __init__.py
│ │ ├── predict.py
│ │ ├── train.py
│ │ └── val.py
│ ├── pose/
│ │ ├── __init__.py
│ │ ├── predict.py
│ │ ├── train.py
│ │ └── val.py
│ └── segment/
│ ├── __init__.py
│ ├── predict.py
│ ├── train.py
│ └── val.py
└── utils/
├── __init__.py
├── tools.py
└── tools_gradio.py
SYMBOL INDEX (1574 symbols across 105 files)
FILE: Inference.py
function parse_args (line 9) | def parse_args():
function main (line 74) | def main(args):
FILE: app_gradio.py
function segment_everything (line 72) | def segment_everything(
function segment_with_points (line 118) | def segment_with_points(
function get_points_with_draw (line 167) | def get_points_with_draw(image, label, evt: gr.SelectData):
function clear (line 363) | def clear():
function clear_text (line 366) | def clear_text():
FILE: fastsam/decoder.py
class FastSAMDecoder (line 7) | class FastSAMDecoder:
method __init__ (line 8) | def __init__(
method run_encoder (line 26) | def run_encoder(self, image):
method run_decoder (line 40) | def run_decoder(
method box_prompt (line 61) | def box_prompt(self, bbox):
method point_prompt (line 91) | def point_prompt(self, points, pointlabel): # numpy
method _format_results (line 115) | def _format_results(self, result, filter=0):
FILE: fastsam/model.py
class FastSAM (line 22) | class FastSAM(YOLO):
method predict (line 25) | def predict(self, source=None, stream=False, **kwargs):
method train (line 55) | def train(self, **kwargs):
method val (line 59) | def val(self, **kwargs):
method export (line 71) | def export(self, **kwargs):
method info (line 89) | def info(self, detailed=False, verbose=True):
method __call__ (line 99) | def __call__(self, source=None, stream=False, **kwargs):
method __getattr__ (line 103) | def __getattr__(self, attr):
FILE: fastsam/predict.py
class FastSAMPredictor (line 8) | class FastSAMPredictor(DetectionPredictor):
method __init__ (line 10) | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
method postprocess (line 14) | def postprocess(self, preds, img, orig_imgs):
FILE: fastsam/prompt.py
class FastSAMPrompt (line 11) | class FastSAMPrompt:
method __init__ (line 13) | def __init__(self, image, results, device='cuda'):
method _segment_image (line 20) | def _segment_image(self, image, bbox):
method _format_results (line 37) | def _format_results(self, result, filter=0):
method filter_masks (line 54) | def filter_masks(annotations): # filte the overlap mask
method _get_bbox_from_mask (line 69) | def _get_bbox_from_mask(self, mask):
method plot_to_result (line 86) | def plot_to_result(self,
method plot (line 183) | def plot(self,
method fast_show_mask (line 213) | def fast_show_mask(
method fast_show_mask_gpu (line 270) | def fast_show_mask_gpu(
method retrieve (line 332) | def retrieve(self, model, preprocess, elements, search_text: str, devi...
method _crop_image (line 353) | def _crop_image(self, format_results):
method box_prompt (line 378) | def box_prompt(self, bbox=None, bboxes=None):
method point_prompt (line 415) | def point_prompt(self, points, pointlabel): # numpy
method text_prompt (line 440) | def text_prompt(self, text):
method everything_prompt (line 452) | def everything_prompt(self):
FILE: fastsam/utils.py
function adjust_bboxes_to_image_border (line 6) | def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
function convert_box_xywh_to_xyxy (line 33) | def convert_box_xywh_to_xyxy(box):
function bbox_iou (line 41) | def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_out...
function image_to_np_ndarray (line 79) | def image_to_np_ndarray(image):
FILE: predict.py
class Predictor (line 14) | class Predictor(BasePredictor):
method setup (line 15) | def setup(self):
method predict (line 19) | def predict(
function prompt (line 140) | def prompt(results, args, box=None, point=None, text=None):
FILE: ultralytics/hub/__init__.py
function login (line 11) | def login(api_key=''):
function logout (line 25) | def logout():
function start (line 38) | def start(key=''):
function reset_model (line 57) | def reset_model(model_id=''):
function export_fmts_hub (line 66) | def export_fmts_hub():
function export_model (line 72) | def export_model(model_id='', format='torchscript'):
function get_export (line 82) | def get_export(model_id='', format='torchscript'):
function check_dataset (line 94) | def check_dataset(path='', task='detect'):
FILE: ultralytics/hub/auth.py
class Auth (line 11) | class Auth:
method __init__ (line 14) | def __init__(self, api_key='', verbose=False):
method request_api_key (line 55) | def request_api_key(self, max_attempts=3):
method authenticate (line 68) | def authenticate(self) -> bool:
method auth_with_cookies (line 88) | def auth_with_cookies(self) -> bool:
method get_auth_header (line 109) | def get_auth_header(self):
method get_state (line 123) | def get_state(self) -> bool:
method set_api_key (line 132) | def set_api_key(self, key: str):
FILE: ultralytics/hub/session.py
class HUBTrainingSession (line 16) | class HUBTrainingSession:
method __init__ (line 36) | def __init__(self, url):
method _register_signal_handlers (line 78) | def _register_signal_handlers(self):
method _handle_signal (line 83) | def _handle_signal(self, signum, frame):
method _stop_heartbeat (line 93) | def _stop_heartbeat(self):
method upload_metrics (line 97) | def upload_metrics(self):
method _get_model (line 102) | def _get_model(self):
method upload_model (line 139) | def upload_model(self, epoch, weights, is_best=False, map=0.0, final=F...
method _start_heartbeat (line 176) | def _start_heartbeat(self):
FILE: ultralytics/hub/utils.py
function request_with_credentials (line 23) | def request_with_credentials(url: str) -> any:
function requests_with_progress (line 60) | def requests_with_progress(method, url, **kwargs):
function smart_request (line 88) | def smart_request(method, url, retry=3, timeout=30, thread=True, code=-1...
class Events (line 145) | class Events:
method __init__ (line 159) | def __init__(self):
method __call__ (line 181) | def __call__(self, cfg):
FILE: ultralytics/nn/autobackend.py
function check_class_names (line 24) | def check_class_names(names):
class AutoBackend (line 41) | class AutoBackend(nn.Module):
method __init__ (line 43) | def __init__(self,
method forward (line 295) | def forward(self, im, augment=False, visualize=False):
method from_numpy (line 402) | def from_numpy(self, x):
method warmup (line 414) | def warmup(self, imgsz=(1, 3, 640, 640)):
method _apply_default_class_names (line 431) | def _apply_default_class_names(data):
method _model_type (line 438) | def _model_type(p='path/to/model.pt'):
FILE: ultralytics/nn/autoshape.py
class AutoShape (line 26) | class AutoShape(nn.Module):
method __init__ (line 36) | def __init__(self, model, verbose=True):
method _apply (line 50) | def _apply(self, fn):
method forward (line 62) | def forward(self, ims, size=640, augment=False, profile=False):
class Detections (line 126) | class Detections:
method __init__ (line 129) | def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shap...
method _run (line 147) | def _run(self, pprint=False, show=False, save=False, crop=False, rende...
method show (line 193) | def show(self, labels=True):
method save (line 197) | def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
method crop (line 202) | def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
method render (line 207) | def render(self, labels=True):
method pandas (line 212) | def pandas(self):
method tolist (line 223) | def tolist(self):
method print (line 232) | def print(self):
method __len__ (line 236) | def __len__(self): # override len(results)
method __str__ (line 239) | def __str__(self): # override print(results)
method __repr__ (line 242) | def __repr__(self):
FILE: ultralytics/nn/modules/block.py
class DFL (line 17) | class DFL(nn.Module):
method __init__ (line 23) | def __init__(self, c1=16):
method forward (line 31) | def forward(self, x):
class Proto (line 38) | class Proto(nn.Module):
method __init__ (line 41) | def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, num...
method forward (line 48) | def forward(self, x):
class HGStem (line 53) | class HGStem(nn.Module):
method __init__ (line 58) | def __init__(self, c1, cm, c2):
method forward (line 67) | def forward(self, x):
class HGBlock (line 81) | class HGBlock(nn.Module):
method __init__ (line 86) | def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=Fal...
method forward (line 94) | def forward(self, x):
class SPP (line 102) | class SPP(nn.Module):
method __init__ (line 105) | def __init__(self, c1, c2, k=(5, 9, 13)):
method forward (line 113) | def forward(self, x):
class SPPF (line 119) | class SPPF(nn.Module):
method __init__ (line 122) | def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
method forward (line 129) | def forward(self, x):
class C1 (line 137) | class C1(nn.Module):
method __init__ (line 140) | def __init__(self, c1, c2, n=1): # ch_in, ch_out, number
method forward (line 145) | def forward(self, x):
class C2 (line 151) | class C2(nn.Module):
method __init__ (line 154) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ...
method forward (line 162) | def forward(self, x):
class C2f (line 168) | class C2f(nn.Module):
method __init__ (line 171) | def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in,...
method forward (line 178) | def forward(self, x):
method forward_split (line 184) | def forward_split(self, x):
class C3 (line 191) | class C3(nn.Module):
method __init__ (line 194) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ...
method forward (line 202) | def forward(self, x):
class C3x (line 207) | class C3x(C3):
method __init__ (line 210) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
class RepC3 (line 217) | class RepC3(nn.Module):
method __init__ (line 220) | def __init__(self, c1, c2, n=3, e=1.0):
method forward (line 228) | def forward(self, x):
class C3TR (line 233) | class C3TR(C3):
method __init__ (line 236) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
class C3Ghost (line 243) | class C3Ghost(C3):
method __init__ (line 246) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
class GhostBottleneck (line 253) | class GhostBottleneck(nn.Module):
method __init__ (line 256) | def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
method forward (line 266) | def forward(self, x):
class Bottleneck (line 271) | class Bottleneck(nn.Module):
method __init__ (line 274) | def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch...
method forward (line 281) | def forward(self, x):
class BottleneckCSP (line 286) | class BottleneckCSP(nn.Module):
method __init__ (line 289) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ...
method forward (line 300) | def forward(self, x):
FILE: ultralytics/nn/modules/conv.py
function autopad (line 16) | def autopad(k, p=None, d=1): # kernel, padding, dilation
class Conv (line 25) | class Conv(nn.Module):
method __init__ (line 29) | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
method forward (line 36) | def forward(self, x):
method forward_fuse (line 40) | def forward_fuse(self, x):
class Conv2 (line 45) | class Conv2(Conv):
method __init__ (line 48) | def __init__(self, c1, c2, k=3, s=1, p=None, g=1, d=1, act=True):
method forward (line 53) | def forward(self, x):
method fuse_convs (line 57) | def fuse_convs(self):
class LightConv (line 66) | class LightConv(nn.Module):
method __init__ (line 71) | def __init__(self, c1, c2, k=1, act=nn.ReLU()):
method forward (line 77) | def forward(self, x):
class DWConv (line 82) | class DWConv(Conv):
method __init__ (line 85) | def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out,...
class DWConvTranspose2d (line 89) | class DWConvTranspose2d(nn.ConvTranspose2d):
method __init__ (line 92) | def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, ke...
class ConvTranspose (line 96) | class ConvTranspose(nn.Module):
method __init__ (line 100) | def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
method forward (line 107) | def forward(self, x):
method forward_fuse (line 111) | def forward_fuse(self, x):
class Focus (line 116) | class Focus(nn.Module):
method __init__ (line 119) | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in,...
method forward (line 124) | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
class GhostConv (line 129) | class GhostConv(nn.Module):
method __init__ (line 132) | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out,...
method forward (line 138) | def forward(self, x):
class RepConv (line 144) | class RepConv(nn.Module):
method __init__ (line 150) | def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False...
method forward_fuse (line 162) | def forward_fuse(self, x):
method forward (line 166) | def forward(self, x):
method get_equivalent_kernel_bias (line 171) | def get_equivalent_kernel_bias(self):
method _avg_to_3x3_tensor (line 177) | def _avg_to_3x3_tensor(self, avgp):
method _pad_1x1_to_3x3_tensor (line 186) | def _pad_1x1_to_3x3_tensor(self, kernel1x1):
method _fuse_bn_tensor (line 192) | def _fuse_bn_tensor(self, branch):
method fuse_convs (line 219) | def fuse_convs(self):
class ChannelAttention (line 245) | class ChannelAttention(nn.Module):
method __init__ (line 248) | def __init__(self, channels: int) -> None:
method forward (line 254) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class SpatialAttention (line 258) | class SpatialAttention(nn.Module):
method __init__ (line 261) | def __init__(self, kernel_size=7):
method forward (line 269) | def forward(self, x):
class CBAM (line 274) | class CBAM(nn.Module):
method __init__ (line 277) | def __init__(self, c1, kernel_size=7): # ch_in, kernels
method forward (line 282) | def forward(self, x):
class Concat (line 287) | class Concat(nn.Module):
method __init__ (line 290) | def __init__(self, dimension=1):
method forward (line 295) | def forward(self, x):
FILE: ultralytics/nn/modules/head.py
class Detect (line 22) | class Detect(nn.Module):
method __init__ (line 30) | def __init__(self, nc=80, ch=()): # detection layer
method forward (line 43) | def forward(self, x):
method bias_init (line 64) | def bias_init(self):
class Segment (line 74) | class Segment(Detect):
method __init__ (line 77) | def __init__(self, nc=80, nm=32, npr=256, ch=()):
method forward (line 88) | def forward(self, x):
class Pose (line 100) | class Pose(Detect):
method __init__ (line 103) | def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
method forward (line 113) | def forward(self, x):
method kpts_decode (line 123) | def kpts_decode(self, bs, kpts):
class Classify (line 141) | class Classify(nn.Module):
method __init__ (line 144) | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, k...
method forward (line 152) | def forward(self, x):
class RTDETRDecoder (line 160) | class RTDETRDecoder(nn.Module):
method __init__ (line 162) | def __init__(
method forward (line 220) | def forward(self, x, batch=None):
method _generate_anchors (line 253) | def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float3...
method _get_encoder_input (line 272) | def _get_encoder_input(self, x):
method _get_decoder_input (line 289) | def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
method _reset_parameters (line 328) | def _reset_parameters(self):
FILE: ultralytics/nn/modules/transformer.py
class TransformerEncoderLayer (line 20) | class TransformerEncoderLayer(nn.Module):
method __init__ (line 23) | def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(...
method with_pos_embed (line 39) | def with_pos_embed(self, tensor, pos=None):
method forward_post (line 43) | def forward_post(self, src, src_mask=None, src_key_padding_mask=None, ...
method forward_pre (line 53) | def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, p...
method forward (line 63) | def forward(self, src, src_mask=None, src_key_padding_mask=None, pos=N...
class AIFI (line 70) | class AIFI(TransformerEncoderLayer):
method __init__ (line 72) | def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(),...
method forward (line 75) | def forward(self, x):
method build_2d_sincos_position_embedding (line 83) | def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperatur...
class TransformerLayer (line 100) | class TransformerLayer(nn.Module):
method __init__ (line 103) | def __init__(self, c, num_heads):
method forward (line 113) | def forward(self, x):
class TransformerBlock (line 120) | class TransformerBlock(nn.Module):
method __init__ (line 123) | def __init__(self, c1, c2, num_heads, num_layers):
method forward (line 133) | def forward(self, x):
class MLPBlock (line 142) | class MLPBlock(nn.Module):
method __init__ (line 144) | def __init__(self, embedding_dim, mlp_dim, act=nn.GELU):
method forward (line 150) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class MLP (line 154) | class MLP(nn.Module):
method __init__ (line 157) | def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
method forward (line 163) | def forward(self, x):
class LayerNorm2d (line 171) | class LayerNorm2d(nn.Module):
method __init__ (line 173) | def __init__(self, num_channels, eps=1e-6):
method forward (line 179) | def forward(self, x):
class MSDeformAttn (line 187) | class MSDeformAttn(nn.Module):
method __init__ (line 193) | def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
method _reset_parameters (line 215) | def _reset_parameters(self):
method forward (line 232) | def forward(self, query, refer_bbox, value, value_shapes, value_mask=N...
class DeformableTransformerDecoderLayer (line 273) | class DeformableTransformerDecoderLayer(nn.Module):
method __init__ (line 279) | def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0., act...
method with_pos_embed (line 301) | def with_pos_embed(tensor, pos):
method forward_ffn (line 304) | def forward_ffn(self, tgt):
method forward (line 310) | def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None,...
class DeformableTransformerDecoder (line 330) | class DeformableTransformerDecoder(nn.Module):
method __init__ (line 335) | def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1):
method forward (line 342) | def forward(
FILE: ultralytics/nn/modules/utils.py
function _get_clones (line 18) | def _get_clones(module, n):
function bias_init_with_prob (line 22) | def bias_init_with_prob(prior_prob=0.01):
function linear_init_ (line 27) | def linear_init_(module):
function inverse_sigmoid (line 34) | def inverse_sigmoid(x, eps=1e-5):
function multi_scale_deformable_attn_pytorch (line 41) | def multi_scale_deformable_attn_pytorch(value: torch.Tensor, value_spati...
FILE: ultralytics/nn/tasks.py
class BaseModel (line 27) | class BaseModel(nn.Module):
method forward (line 32) | def forward(self, x, *args, **kwargs):
method predict (line 47) | def predict(self, x, profile=False, visualize=False, augment=False):
method _predict_once (line 64) | def _predict_once(self, x, profile=False, visualize=False):
method _predict_augment (line 88) | def _predict_augment(self, x):
method _profile_one_layer (line 95) | def _profile_one_layer(self, m, x, dt):
method fuse (line 120) | def fuse(self, verbose=True):
method is_fused (line 147) | def is_fused(self, thresh=10):
method info (line 160) | def info(self, detailed=False, verbose=True, imgsz=640):
method _apply (line 170) | def _apply(self, fn):
method load (line 189) | def load(self, weights, verbose=True):
method loss (line 203) | def loss(self, batch, preds=None):
method init_criterion (line 217) | def init_criterion(self):
class DetectionModel (line 221) | class DetectionModel(BaseModel):
method __init__ (line 224) | def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): ...
method _predict_augment (line 255) | def _predict_augment(self, x):
method _descale_pred (line 271) | def _descale_pred(p, flips, scale, img_size, dim=1):
method _clip_augmented (line 281) | def _clip_augmented(self, y):
method init_criterion (line 292) | def init_criterion(self):
class SegmentationModel (line 296) | class SegmentationModel(DetectionModel):
method __init__ (line 299) | def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
method init_criterion (line 303) | def init_criterion(self):
method _predict_augment (line 306) | def _predict_augment(self, x):
class PoseModel (line 314) | class PoseModel(DetectionModel):
method __init__ (line 317) | def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_sh...
method init_criterion (line 326) | def init_criterion(self):
method _predict_augment (line 329) | def _predict_augment(self, x):
class ClassificationModel (line 337) | class ClassificationModel(BaseModel):
method __init__ (line 340) | def __init__(self,
method _from_detection_model (line 350) | def _from_detection_model(self, model, nc=1000, cutoff=10):
method _from_yaml (line 366) | def _from_yaml(self, cfg, ch, nc, verbose):
method reshape_outputs (line 383) | def reshape_outputs(model, nc):
method init_criterion (line 403) | def init_criterion(self):
class RTDETRDetectionModel (line 408) | class RTDETRDetectionModel(DetectionModel):
method __init__ (line 410) | def __init__(self, cfg='rtdetr-l.yaml', ch=3, nc=None, verbose=True):
method init_criterion (line 413) | def init_criterion(self):
method loss (line 419) | def loss(self, batch, preds=None):
method predict (line 454) | def predict(self, x, profile=False, visualize=False, batch=None, augme...
class Ensemble (line 482) | class Ensemble(nn.ModuleList):
method __init__ (line 485) | def __init__(self):
method forward (line 489) | def forward(self, x, augment=False, profile=False, visualize=False):
function torch_safe_load (line 501) | def torch_safe_load(weight):
function attempt_load_weights (line 536) | def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
function attempt_load_one_weight (line 576) | def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
function parse_model (line 603) | def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
function yaml_model_load (line 679) | def yaml_model_load(path):
function guess_model_scale (line 697) | def guess_model_scale(model_path):
function guess_model_task (line 715) | def guess_model_task(model):
FILE: ultralytics/tracker/track.py
function on_predict_start (line 15) | def on_predict_start(predictor, persist=False):
function on_predict_postprocess_end (line 39) | def on_predict_postprocess_end(predictor):
function register_tracker (line 55) | def register_tracker(model, persist):
FILE: ultralytics/tracker/trackers/basetrack.py
class TrackState (line 8) | class TrackState:
class BaseTrack (line 17) | class BaseTrack:
method end_frame (line 38) | def end_frame(self):
method next_id (line 43) | def next_id():
method activate (line 48) | def activate(self, *args):
method predict (line 52) | def predict(self):
method update (line 56) | def update(self, *args, **kwargs):
method mark_lost (line 60) | def mark_lost(self):
method mark_removed (line 64) | def mark_removed(self):
method reset_id (line 69) | def reset_id():
FILE: ultralytics/tracker/trackers/bot_sort.py
class BOTrack (line 14) | class BOTrack(STrack):
method __init__ (line 17) | def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
method update_features (line 28) | def update_features(self, feat):
method predict (line 39) | def predict(self):
method re_activate (line 48) | def re_activate(self, new_track, frame_id, new_id=False):
method update (line 54) | def update(self, new_track, frame_id):
method tlwh (line 61) | def tlwh(self):
method multi_predict (line 72) | def multi_predict(stracks):
method convert_coords (line 87) | def convert_coords(self, tlwh):
method tlwh_to_xywh (line 92) | def tlwh_to_xywh(tlwh):
class BOTSORT (line 101) | class BOTSORT(BYTETracker):
method __init__ (line 103) | def __init__(self, args, frame_rate=30):
method get_kalmanfilter (line 116) | def get_kalmanfilter(self):
method init_track (line 120) | def init_track(self, dets, scores, cls, img=None):
method get_dists (line 130) | def get_dists(self, tracks, detections):
method multi_predict (line 146) | def multi_predict(self, tracks):
FILE: ultralytics/tracker/trackers/byte_tracker.py
class STrack (line 10) | class STrack(BaseTrack):
method __init__ (line 13) | def __init__(self, tlwh, score, cls):
method predict (line 25) | def predict(self):
method multi_predict (line 33) | def multi_predict(stracks):
method multi_gmc (line 48) | def multi_gmc(stracks, H=np.eye(2, 3)):
method activate (line 66) | def activate(self, kalman_filter, frame_id):
method re_activate (line 79) | def re_activate(self, new_track, frame_id, new_id=False):
method update (line 93) | def update(self, new_track, frame_id):
method convert_coords (line 113) | def convert_coords(self, tlwh):
method tlwh (line 118) | def tlwh(self):
method tlbr (line 130) | def tlbr(self):
method tlwh_to_xyah (line 139) | def tlwh_to_xyah(tlwh):
method tlbr_to_tlwh (line 149) | def tlbr_to_tlwh(tlbr):
method tlwh_to_tlbr (line 156) | def tlwh_to_tlbr(tlwh):
method __repr__ (line 162) | def __repr__(self):
class BYTETracker (line 167) | class BYTETracker:
method __init__ (line 169) | def __init__(self, args, frame_rate=30):
method update (line 181) | def update(self, results, img=None):
method get_kalmanfilter (line 297) | def get_kalmanfilter(self):
method init_track (line 301) | def init_track(self, dets, scores, cls, img=None):
method get_dists (line 305) | def get_dists(self, tracks, detections):
method multi_predict (line 313) | def multi_predict(self, tracks):
method reset_id (line 317) | def reset_id(self):
method joint_stracks (line 322) | def joint_stracks(tlista, tlistb):
method sub_stracks (line 337) | def sub_stracks(tlista, tlistb):
method remove_duplicate_stracks (line 350) | def remove_duplicate_stracks(stracksa, stracksb):
FILE: ultralytics/tracker/utils/gmc.py
class GMC (line 11) | class GMC:
method __init__ (line 13) | def __init__(self, method='sparseOptFlow', downscale=2, verbose=None):
method apply (line 72) | def apply(self, raw_frame, detections=None):
method applyEcc (line 87) | def applyEcc(self, raw_frame, detections=None):
method applyFeatures (line 119) | def applyFeatures(self, raw_frame, detections=None):
method applySparseOptFlow (line 245) | def applySparseOptFlow(self, raw_frame, detections=None):
method applyFile (line 307) | def applyFile(self, raw_frame, detections=None):
FILE: ultralytics/tracker/utils/kalman_filter.py
class KalmanFilterXYAH (line 11) | class KalmanFilterXYAH:
method __init__ (line 29) | def __init__(self):
method initiate (line 45) | def initiate(self, measurement):
method predict (line 73) | def predict(self, mean, covariance):
method project (line 106) | def project(self, mean, covariance):
method multi_predict (line 132) | def multi_predict(self, mean, covariance):
method update (line 165) | def update(self, mean, covariance, measurement):
method gating_distance (line 197) | def gating_distance(self, mean, covariance, measurements, only_positio...
class KalmanFilterXYWH (line 238) | class KalmanFilterXYWH:
method __init__ (line 256) | def __init__(self):
method initiate (line 272) | def initiate(self, measurement):
method predict (line 301) | def predict(self, mean, covariance):
method project (line 333) | def project(self, mean, covariance):
method multi_predict (line 359) | def multi_predict(self, mean, covariance):
method update (line 392) | def update(self, mean, covariance, measurement):
method gating_distance (line 424) | def gating_distance(self, mean, covariance, measurements, only_positio...
FILE: ultralytics/tracker/utils/matching.py
function merge_matches (line 20) | def merge_matches(m1, m2, shape):
function _indices_to_matches (line 38) | def _indices_to_matches(cost_matrix, indices, thresh):
function linear_assignment (line 50) | def linear_assignment(cost_matrix, thresh, use_lap=True):
function ious (line 73) | def ious(atlbrs, btlbrs):
function iou_distance (line 89) | def iou_distance(atracks, btracks):
function v_iou_distance (line 109) | def v_iou_distance(atracks, btracks):
function embedding_distance (line 129) | def embedding_distance(tracks, detections, metric='cosine'):
function gate_cost_matrix (line 148) | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=...
function fuse_motion (line 161) | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False...
function fuse_iou (line 175) | def fuse_iou(cost_matrix, tracks, detections):
function fuse_score (line 188) | def fuse_score(cost_matrix, detections):
function bbox_ious (line 199) | def bbox_ious(box1, box2, eps=1e-7):
FILE: ultralytics/vit/rtdetr/model.py
class RTDETR (line 22) | class RTDETR:
method __init__ (line 24) | def __init__(self, model='rtdetr-l.pt') -> None:
method _new (line 36) | def _new(self, cfg: str, verbose=True):
method _load (line 47) | def _load(self, weights: str):
method load (line 53) | def load(self, weights='yolov8n.pt'):
method predict (line 63) | def predict(self, source=None, stream=False, **kwargs):
method train (line 89) | def train(self, **kwargs):
method val (line 115) | def val(self, **kwargs):
method info (line 126) | def info(self, verbose=True):
method _check_is_pytorch_model (line 130) | def _check_is_pytorch_model(self):
method fuse (line 142) | def fuse(self):
method export (line 148) | def export(self, **kwargs):
method __call__ (line 166) | def __call__(self, source=None, stream=False, **kwargs):
method __getattr__ (line 170) | def __getattr__(self, attr):
FILE: ultralytics/vit/rtdetr/predict.py
class RTDETRPredictor (line 11) | class RTDETRPredictor(BasePredictor):
method postprocess (line 13) | def postprocess(self, preds, img, orig_imgs):
method pre_transform (line 35) | def pre_transform(self, im):
FILE: ultralytics/vit/rtdetr/train.py
class RTDETRTrainer (line 14) | class RTDETRTrainer(DetectionTrainer):
method get_model (line 16) | def get_model(self, cfg=None, weights=None, verbose=True):
method build_dataset (line 23) | def build_dataset(self, img_path, mode='val', batch=None):
method get_validator (line 42) | def get_validator(self):
method preprocess_batch (line 47) | def preprocess_batch(self, batch):
function train (line 59) | def train(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/vit/rtdetr/val.py
class RTDETRDataset (line 18) | class RTDETRDataset(YOLODataset):
method __init__ (line 20) | def __init__(self, *args, data=None, **kwargs):
method load_image (line 24) | def load_image(self, i):
method build_transforms (line 49) | def build_transforms(self, hyp=None):
class RTDETRValidator (line 69) | class RTDETRValidator(DetectionValidator):
method build_dataset (line 71) | def build_dataset(self, img_path, mode='val', batch=None):
method postprocess (line 90) | def postprocess(self, preds):
method update_metrics (line 108) | def update_metrics(self, preds, batch):
FILE: ultralytics/vit/sam/amg.py
class MaskData (line 12) | class MaskData:
method __init__ (line 18) | def __init__(self, **kwargs) -> None:
method __setitem__ (line 25) | def __setitem__(self, key: str, item: Any) -> None:
method __delitem__ (line 31) | def __delitem__(self, key: str) -> None:
method __getitem__ (line 35) | def __getitem__(self, key: str) -> Any:
method items (line 39) | def items(self) -> ItemsView[str, Any]:
method filter (line 43) | def filter(self, keep: torch.Tensor) -> None:
method cat (line 59) | def cat(self, new_stats: 'MaskData') -> None:
method to_numpy (line 73) | def to_numpy(self) -> None:
function is_box_near_crop_edge (line 80) | def is_box_near_crop_edge(boxes: torch.Tensor,
function box_xyxy_to_xywh (line 94) | def box_xyxy_to_xywh(box_xyxy: torch.Tensor) -> torch.Tensor:
function batch_iterator (line 102) | def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None,...
function mask_to_rle_pytorch (line 110) | def mask_to_rle_pytorch(tensor: torch.Tensor) -> List[Dict[str, Any]]:
function rle_to_mask (line 135) | def rle_to_mask(rle: Dict[str, Any]) -> np.ndarray:
function area_from_rle (line 149) | def area_from_rle(rle: Dict[str, Any]) -> int:
function calculate_stability_score (line 154) | def calculate_stability_score(masks: torch.Tensor, mask_threshold: float...
function build_point_grid (line 168) | def build_point_grid(n_per_side: int) -> np.ndarray:
function build_all_layer_point_grids (line 177) | def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_pe...
function generate_crop_boxes (line 182) | def generate_crop_boxes(im_size: Tuple[int, ...], n_layers: int,
function uncrop_boxes_xyxy (line 216) | def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch...
function uncrop_points (line 226) | def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Te...
function uncrop_masks (line 236) | def uncrop_masks(masks: torch.Tensor, crop_box: List[int], orig_h: int, ...
function remove_small_regions (line 247) | def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str...
function coco_encode_rle (line 267) | def coco_encode_rle(uncompressed_rle: Dict[str, Any]) -> Dict[str, Any]:
function batched_mask_to_box (line 277) | def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
FILE: ultralytics/vit/sam/autosize.py
class ResizeLongestSide (line 18) | class ResizeLongestSide:
method __init__ (line 25) | def __init__(self, target_length: int) -> None:
method apply_image (line 28) | def apply_image(self, image: np.ndarray) -> np.ndarray:
method apply_coords (line 35) | def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ....
method apply_boxes (line 47) | def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ......
method apply_image_torch (line 55) | def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
method apply_coords_torch (line 65) | def apply_coords_torch(self, coords: torch.Tensor, original_size: Tupl...
method apply_boxes_torch (line 77) | def apply_boxes_torch(self, boxes: torch.Tensor, original_size: Tuple[...
method get_preprocess_shape (line 86) | def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) ...
FILE: ultralytics/vit/sam/build.py
function build_sam_vit_h (line 20) | def build_sam_vit_h(checkpoint=None):
function build_sam_vit_l (line 31) | def build_sam_vit_l(checkpoint=None):
function build_sam_vit_b (line 42) | def build_sam_vit_b(checkpoint=None):
function _build_sam (line 53) | def _build_sam(
function build_sam (line 117) | def build_sam(ckpt='sam_b.pt'):
FILE: ultralytics/vit/sam/model.py
class SAM (line 13) | class SAM:
method __init__ (line 15) | def __init__(self, model='sam_b.pt') -> None:
method predict (line 23) | def predict(self, source, stream=False, **kwargs):
method train (line 34) | def train(self, **kwargs):
method val (line 38) | def val(self, **kwargs):
method __call__ (line 42) | def __call__(self, source=None, stream=False, **kwargs):
method __getattr__ (line 46) | def __getattr__(self, attr):
method info (line 51) | def info(self, detailed=False, verbose=True):
FILE: ultralytics/vit/sam/modules/decoders.py
class MaskDecoder (line 12) | class MaskDecoder(nn.Module):
method __init__ (line 14) | def __init__(
method forward (line 57) | def forward(
method predict_masks (line 94) | def predict_masks(
class MLP (line 133) | class MLP(nn.Module):
method __init__ (line 139) | def __init__(
method forward (line 153) | def forward(self, x):
FILE: ultralytics/vit/sam/modules/encoders.py
class ImageEncoderViT (line 14) | class ImageEncoderViT(nn.Module):
method __init__ (line 16) | def __init__(
method forward (line 102) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class PromptEncoder (line 115) | class PromptEncoder(nn.Module):
method __init__ (line 117) | def __init__(
method get_dense_pe (line 162) | def get_dense_pe(self) -> torch.Tensor:
method _embed_points (line 173) | def _embed_points(
method _embed_boxes (line 193) | def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
method _embed_masks (line 202) | def _embed_masks(self, masks: torch.Tensor) -> torch.Tensor:
method _get_batch_size (line 206) | def _get_batch_size(
method _get_device (line 224) | def _get_device(self) -> torch.device:
method forward (line 227) | def forward(
class PositionEmbeddingRandom (line 270) | class PositionEmbeddingRandom(nn.Module):
method __init__ (line 275) | def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = N...
method _pe_encoding (line 284) | def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
method forward (line 293) | def forward(self, size: Tuple[int, int]) -> torch.Tensor:
method forward_with_coords (line 306) | def forward_with_coords(self, coords_input: torch.Tensor, image_size: ...
class Block (line 314) | class Block(nn.Module):
method __init__ (line 317) | def __init__(
method forward (line 361) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class Attention (line 380) | class Attention(nn.Module):
method __init__ (line 383) | def __init__(
method forward (line 416) | def forward(self, x: torch.Tensor) -> torch.Tensor:
function window_partition (line 435) | def window_partition(x: torch.Tensor, window_size: int) -> Tuple[torch.T...
function window_unpartition (line 459) | def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: ...
function get_rel_pos (line 483) | def get_rel_pos(q_size: int, k_size: int, rel_pos: torch.Tensor) -> torc...
function add_decomposed_rel_pos (line 516) | def add_decomposed_rel_pos(
class PatchEmbed (line 554) | class PatchEmbed(nn.Module):
method __init__ (line 559) | def __init__(
method forward (line 579) | def forward(self, x: torch.Tensor) -> torch.Tensor:
FILE: ultralytics/vit/sam/modules/mask_generator.py
class SamAutomaticMaskGenerator (line 23) | class SamAutomaticMaskGenerator:
method __init__ (line 25) | def __init__(
method __call__ (line 121) | def __call__(self, image: np.ndarray, augment=False, visualize=False) ...
method generate (line 125) | def generate(self, image: np.ndarray) -> List[Dict[str, Any]]:
method _generate_masks (line 183) | def _generate_masks(self, image: np.ndarray) -> MaskData:
method _process_crop (line 209) | def _process_crop(
method _process_batch (line 250) | def _process_batch(
method postprocess_small_regions (line 307) | def postprocess_small_regions(mask_data: MaskData, min_area: int, nms_...
FILE: ultralytics/vit/sam/modules/prompt_predictor.py
class PromptPredictor (line 12) | class PromptPredictor:
method __init__ (line 14) | def __init__(self, sam_model: Sam) -> None:
method set_image (line 27) | def set_image(self, image: np.ndarray, image_format: str = 'RGB') -> N...
method set_torch_image (line 49) | def set_torch_image(self, transformed_image: torch.Tensor, original_im...
method predict (line 73) | def predict(
method predict_torch (line 148) | def predict_torch(
method get_image_embedding (line 220) | def get_image_embedding(self) -> torch.Tensor:
method device (line 232) | def device(self) -> torch.device:
method reset_image (line 235) | def reset_image(self) -> None:
FILE: ultralytics/vit/sam/modules/sam.py
class Sam (line 19) | class Sam(nn.Module):
method __init__ (line 23) | def __init__(self,
method device (line 53) | def device(self) -> Any:
method forward (line 57) | def forward(
method postprocess_masks (line 133) | def postprocess_masks(
method preprocess (line 164) | def preprocess(self, x: torch.Tensor) -> torch.Tensor:
FILE: ultralytics/vit/sam/modules/transformer.py
class TwoWayTransformer (line 12) | class TwoWayTransformer(nn.Module):
method __init__ (line 14) | def __init__(
method forward (line 56) | def forward(
class TwoWayAttentionBlock (line 103) | class TwoWayAttentionBlock(nn.Module):
method __init__ (line 105) | def __init__(
method forward (line 142) | def forward(self, queries: Tensor, keys: Tensor, query_pe: Tensor, key...
class Attention (line 176) | class Attention(nn.Module):
method __init__ (line 182) | def __init__(
method _separate_heads (line 199) | def _separate_heads(self, x: Tensor, num_heads: int) -> Tensor:
method _recombine_heads (line 205) | def _recombine_heads(self, x: Tensor) -> Tensor:
method forward (line 211) | def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
FILE: ultralytics/vit/sam/predict.py
class Predictor (line 13) | class Predictor(BasePredictor):
method preprocess (line 15) | def preprocess(self, im):
method setup_model (line 23) | def setup_model(self, model):
method postprocess (line 38) | def postprocess(self, preds, path, orig_imgs):
FILE: ultralytics/vit/utils/loss.py
class DETRLoss (line 12) | class DETRLoss(nn.Module):
method __init__ (line 14) | def __init__(self,
method _get_loss_class (line 48) | def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, po...
method _get_loss_bbox (line 69) | def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=''):
method _get_loss_mask (line 87) | def _get_loss_mask(self, masks, gt_mask, match_indices, postfix=''):
method _dice_loss (line 107) | def _dice_loss(self, inputs, targets, num_gts):
method _get_loss_aux (line 116) | def _get_loss_aux(self,
method _get_index (line 165) | def _get_index(self, match_indices):
method _get_assigned_bboxes (line 171) | def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices):
method _get_loss (line 180) | def _get_loss(self,
method forward (line 218) | def forward(self, pred_bboxes, pred_scores, batch, postfix='', **kwargs):
class RTDETRDetectionLoss (line 249) | class RTDETRDetectionLoss(DETRLoss):
method forward (line 251) | def forward(self, preds, batch, dn_bboxes=None, dn_scores=None, dn_met...
method get_dn_match_indices (line 271) | def get_dn_match_indices(dn_pos_idx, dn_num_group, gt_groups):
FILE: ultralytics/vit/utils/ops.py
class HungarianMatcher (line 12) | class HungarianMatcher(nn.Module):
method __init__ (line 34) | def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_s...
method forward (line 45) | def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_grou...
method _cost_mask (line 113) | def _cost_mask(self, bs, num_gts, masks=None, gt_mask=None):
function get_cdn_group (line 143) | def get_cdn_group(batch,
function inverse_sigmoid (line 257) | def inverse_sigmoid(x, eps=1e-6):
FILE: ultralytics/yolo/cfg/__init__.py
function cfg2dict (line 79) | def cfg2dict(cfg):
function get_cfg (line 96) | def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_D...
function _handle_deprecation (line 147) | def _handle_deprecation(custom):
function check_cfg_mismatch (line 166) | def check_cfg_mismatch(base: Dict, custom: Dict, e=None):
function merge_equals_args (line 188) | def merge_equals_args(args: List[str]) -> List[str]:
function handle_yolo_hub (line 215) | def handle_yolo_hub(args: List[str]) -> None:
function handle_yolo_settings (line 239) | def handle_yolo_settings(args: List[str]) -> None:
function entrypoint (line 260) | def entrypoint(debug=''):
function copy_default_cfg (line 409) | def copy_default_cfg():
FILE: ultralytics/yolo/data/annotator.py
function auto_annotate (line 8) | def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', de...
FILE: ultralytics/yolo/data/augment.py
class BaseTransform (line 23) | class BaseTransform:
method __init__ (line 25) | def __init__(self) -> None:
method apply_image (line 28) | def apply_image(self, labels):
method apply_instances (line 32) | def apply_instances(self, labels):
method apply_semantic (line 36) | def apply_semantic(self, labels):
method __call__ (line 40) | def __call__(self, labels):
class Compose (line 47) | class Compose:
method __init__ (line 49) | def __init__(self, transforms):
method __call__ (line 53) | def __call__(self, data):
method append (line 59) | def append(self, transform):
method tolist (line 63) | def tolist(self):
method __repr__ (line 67) | def __repr__(self):
class BaseMixTransform (line 77) | class BaseMixTransform:
method __init__ (line 80) | def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
method __call__ (line 85) | def __call__(self, labels):
method _mix_transform (line 108) | def _mix_transform(self, labels):
method get_indexes (line 112) | def get_indexes(self):
class Mosaic (line 117) | class Mosaic(BaseMixTransform):
method __init__ (line 131) | def __init__(self, dataset, imgsz=640, p=1.0, n=4):
method get_indexes (line 141) | def get_indexes(self, buffer=True):
method _mix_transform (line 148) | def _mix_transform(self, labels):
method _mosaic4 (line 154) | def _mosaic4(self, labels):
method _mosaic9 (line 190) | def _mosaic9(self, labels):
method _update_labels (line 239) | def _update_labels(labels, padw, padh):
method _cat_labels (line 247) | def _cat_labels(self, mosaic_labels):
class MixUp (line 270) | class MixUp(BaseMixTransform):
method __init__ (line 272) | def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
method get_indexes (line 275) | def get_indexes(self):
method _mix_transform (line 279) | def _mix_transform(self, labels):
class RandomPerspective (line 289) | class RandomPerspective:
method __init__ (line 291) | def __init__(self,
method affine_transform (line 308) | def affine_transform(self, img, border):
method apply_bboxes (line 348) | def apply_bboxes(self, bboxes, M):
method apply_segments (line 373) | def apply_segments(self, segments, M):
method apply_keypoints (line 398) | def apply_keypoints(self, keypoints, M):
method __call__ (line 421) | def __call__(self, labels):
method box_candidates (line 471) | def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0....
class RandomHSV (line 479) | class RandomHSV:
method __init__ (line 481) | def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
method __call__ (line 486) | def __call__(self, labels):
class RandomFlip (line 504) | class RandomFlip:
method __init__ (line 506) | def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
method __call__ (line 514) | def __call__(self, labels):
class LetterBox (line 538) | class LetterBox:
method __init__ (line 541) | def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, ...
method __call__ (line 549) | def __call__(self, labels=None, image=None):
method _update_labels (line 595) | def _update_labels(self, labels, ratio, padw, padh):
class CopyPaste (line 604) | class CopyPaste:
method __init__ (line 606) | def __init__(self, p=0.5) -> None:
method __call__ (line 609) | def __call__(self, labels):
class Albumentations (line 644) | class Albumentations:
method __init__ (line 646) | def __init__(self, p=1.0):
method __call__ (line 672) | def __call__(self, labels):
class Format (line 692) | class Format:
method __init__ (line 694) | def __init__(self,
method __call__ (line 710) | def __call__(self, labels):
method _format_img (line 740) | def _format_img(self, img):
method _format_segments (line 748) | def _format_segments(self, instances, cls, w, h):
function v8_transforms (line 762) | def v8_transforms(dataset, imgsz, hyp, stretch=False):
function classify_transforms (line 794) | def classify_transforms(size=224, mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1...
function hsv2colorjitter (line 804) | def hsv2colorjitter(h, s, v):
function classify_albumentations (line 809) | def classify_albumentations(
class ClassifyLetterBox (line 853) | class ClassifyLetterBox:
method __init__ (line 855) | def __init__(self, size=(640, 640), auto=False, stride=32):
method __call__ (line 862) | def __call__(self, im): # im = np.array HWC
class CenterCrop (line 873) | class CenterCrop:
method __init__ (line 875) | def __init__(self, size=640):
method __call__ (line 880) | def __call__(self, im): # im = np.array HWC
class ToTensor (line 887) | class ToTensor:
method __init__ (line 889) | def __init__(self, half=False):
method __call__ (line 894) | def __call__(self, im): # im = np.array HWC in BGR order
FILE: ultralytics/yolo/data/base.py
class BaseDataset (line 22) | class BaseDataset(Dataset):
method __init__ (line 50) | def __init__(self,
method get_img_files (line 98) | def get_img_files(self, img_path):
method update_labels (line 124) | def update_labels(self, include_class: Optional[list]):
method load_image (line 143) | def load_image(self, i):
method cache_images (line 172) | def cache_images(self, cache):
method cache_images_to_disk (line 188) | def cache_images_to_disk(self, i):
method check_cache_ram (line 194) | def check_cache_ram(self, safety_margin=0.5):
method set_rectangle (line 212) | def set_rectangle(self):
method __getitem__ (line 237) | def __getitem__(self, index):
method get_image_and_label (line 241) | def get_image_and_label(self, index):
method __len__ (line 252) | def __len__(self):
method update_labels_info (line 256) | def update_labels_info(self, label):
method build_transforms (line 260) | def build_transforms(self, hyp=None):
method get_labels (line 272) | def get_labels(self):
FILE: ultralytics/yolo/data/build.py
class InfiniteDataLoader (line 22) | class InfiniteDataLoader(dataloader.DataLoader):
method __init__ (line 25) | def __init__(self, *args, **kwargs):
method __len__ (line 31) | def __len__(self):
method __iter__ (line 35) | def __iter__(self):
method reset (line 40) | def reset(self):
class _RepeatSampler (line 47) | class _RepeatSampler:
method __init__ (line 55) | def __init__(self, sampler):
method __iter__ (line 59) | def __iter__(self):
function seed_worker (line 65) | def seed_worker(worker_id): # noqa
function build_yolo_dataset (line 72) | def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=Fa...
function build_dataloader (line 93) | def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
function check_source (line 112) | def check_source(source):
function load_inference_source (line 138) | def load_inference_source(source=None, imgsz=640, vid_stride=1):
FILE: ultralytics/yolo/data/converter.py
function coco91_to_coco80_class (line 13) | def coco91_to_coco80_class():
function convert_coco (line 28) | def convert_coco(labels_dir='../coco/annotations/', use_segments=False, ...
function rle2polygon (line 118) | def rle2polygon(segmentation):
function min_index (line 146) | def min_index(arr1, arr2):
function merge_multi_segment (line 161) | def merge_multi_segment(segments):
function delete_dsstore (line 212) | def delete_dsstore(path='../datasets'):
FILE: ultralytics/yolo/data/dataloaders/stream_loaders.py
class SourceTypes (line 24) | class SourceTypes:
class LoadStreams (line 31) | class LoadStreams:
method __init__ (line 33) | def __init__(self, sources='file.streams', imgsz=640, vid_stride=1):
method update (line 73) | def update(self, i, cap, stream):
method __iter__ (line 89) | def __iter__(self):
method __next__ (line 94) | def __next__(self):
method __len__ (line 104) | def __len__(self):
class LoadScreenshots (line 109) | class LoadScreenshots:
method __init__ (line 111) | def __init__(self, source, imgsz=640):
method __iter__ (line 138) | def __iter__(self):
method __next__ (line 142) | def __next__(self):
class LoadImages (line 151) | class LoadImages:
method __init__ (line 153) | def __init__(self, path, imgsz=640, vid_stride=1):
method __iter__ (line 189) | def __iter__(self):
method __next__ (line 194) | def __next__(self):
method _new_video (line 229) | def _new_video(self, path):
method _cv2_rotate (line 239) | def _cv2_rotate(self, im):
method __len__ (line 249) | def __len__(self):
class LoadPilAndNumpy (line 254) | class LoadPilAndNumpy:
method __init__ (line 256) | def __init__(self, im0, imgsz=640):
method _single_check (line 268) | def _single_check(im):
method __len__ (line 278) | def __len__(self):
method __next__ (line 282) | def __next__(self):
method __iter__ (line 289) | def __iter__(self):
class LoadTensor (line 295) | class LoadTensor:
method __init__ (line 297) | def __init__(self, imgs) -> None:
method __iter__ (line 302) | def __iter__(self):
method __next__ (line 307) | def __next__(self):
method __len__ (line 314) | def __len__(self):
function autocast_list (line 319) | def autocast_list(source):
function get_best_youtube_url (line 339) | def get_best_youtube_url(url, use_pafy=True):
FILE: ultralytics/yolo/data/dataloaders/v5augmentations.py
class Albumentations (line 24) | class Albumentations:
method __init__ (line 26) | def __init__(self, size=640):
method __call__ (line 51) | def __call__(self, im, labels, p=1.0):
function normalize (line 59) | def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False):
function denormalize (line 64) | def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD):
function augment_hsv (line 71) | def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
function hist_equalize (line 87) | def hist_equalize(im, clahe=True, bgr=False):
function replicate (line 98) | def replicate(im, labels):
function letterbox (line 115) | def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True...
function random_perspective (line 148) | def random_perspective(im,
function copy_paste (line 244) | def copy_paste(im, labels, segments, p=0.5):
function cutout (line 269) | def cutout(im, labels, p=0.5):
function mixup (line 296) | def mixup(im, labels, im2, labels2):
function box_candidates (line 304) | def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1...
function classify_albumentations (line 312) | def classify_albumentations(
function classify_transforms (line 354) | def classify_transforms(size=224):
class LetterBox (line 361) | class LetterBox:
method __init__ (line 363) | def __init__(self, size=(640, 640), auto=False, stride=32):
method __call__ (line 370) | def __call__(self, im): # im = np.array HWC
class CenterCrop (line 381) | class CenterCrop:
method __init__ (line 383) | def __init__(self, size=640):
method __call__ (line 388) | def __call__(self, im): # im = np.array HWC
class ToTensor (line 395) | class ToTensor:
method __init__ (line 397) | def __init__(self, half=False):
method __call__ (line 402) | def __call__(self, im): # im = np.array HWC in BGR order
FILE: ultralytics/yolo/data/dataloaders/v5loader.py
function get_hash (line 52) | def get_hash(paths):
function exif_size (line 60) | def exif_size(img):
function exif_transpose (line 70) | def exif_transpose(image):
function seed_worker (line 96) | def seed_worker(worker_id):
function create_dataloader (line 103) | def create_dataloader(path,
class InfiniteDataLoader (line 158) | class InfiniteDataLoader(dataloader.DataLoader):
method __init__ (line 164) | def __init__(self, *args, **kwargs):
method __len__ (line 170) | def __len__(self):
method __iter__ (line 174) | def __iter__(self):
class _RepeatSampler (line 180) | class _RepeatSampler:
method __init__ (line 187) | def __init__(self, sampler):
method __iter__ (line 191) | def __iter__(self):
class LoadScreenshots (line 197) | class LoadScreenshots:
method __init__ (line 199) | def __init__(self, source, img_size=640, stride=32, auto=True, transfo...
method __iter__ (line 228) | def __iter__(self):
method __next__ (line 232) | def __next__(self):
class LoadImages (line 247) | class LoadImages:
method __init__ (line 249) | def __init__(self, path, img_size=640, stride=32, auto=True, transform...
method __iter__ (line 285) | def __iter__(self):
method __next__ (line 290) | def __next__(self):
method _new_video (line 331) | def _new_video(self, path):
method _cv2_rotate (line 339) | def _cv2_rotate(self, im):
method __len__ (line 349) | def __len__(self):
class LoadStreams (line 354) | class LoadStreams:
method __init__ (line 356) | def __init__(self, sources='file.streams', img_size=640, stride=32, au...
method update (line 401) | def update(self, i, cap, stream):
method __iter__ (line 417) | def __iter__(self):
method __next__ (line 422) | def __next__(self):
method __len__ (line 439) | def __len__(self):
function img2label_paths (line 444) | def img2label_paths(img_paths):
class LoadImagesAndLabels (line 450) | class LoadImagesAndLabels(Dataset):
method __init__ (line 455) | def __init__(self,
method check_cache_ram (line 606) | def check_cache_ram(self, safety_margin=0.1, prefix=''):
method cache_labels (line 623) | def cache_labels(self, path=Path('./labels.cache'), prefix=''):
method __len__ (line 663) | def __len__(self):
method __getitem__ (line 667) | def __getitem__(self, index):
method load_image (line 742) | def load_image(self, i):
method cache_images_to_disk (line 759) | def cache_images_to_disk(self, i):
method load_mosaic (line 765) | def load_mosaic(self, index):
method load_mosaic9 (line 823) | def load_mosaic9(self, index):
method collate_fn (line 901) | def collate_fn(batch):
method collate_fn_old (line 917) | def collate_fn_old(batch):
function flatten_recursive (line 926) | def flatten_recursive(path=DATASETS_DIR / 'coco128'):
function extract_boxes (line 936) | def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataload...
function autosplit (line 970) | def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0...
function verify_image_label (line 996) | def verify_image_label(args):
class ClassificationDataset (line 1049) | class ClassificationDataset(torchvision.datasets.ImageFolder):
method __init__ (line 1058) | def __init__(self, root, augment, imgsz, cache=False):
method __getitem__ (line 1067) | def __getitem__(self, i):
function create_classification_dataloader (line 1085) | def create_classification_dataloader(path,
FILE: ultralytics/yolo/data/dataset.py
class YOLODataset (line 19) | class YOLODataset(BaseDataset):
method __init__ (line 34) | def __init__(self, *args, data=None, use_segments=False, use_keypoints...
method cache_labels (line 41) | def cache_labels(self, path=Path('./labels.cache')):
method get_labels (line 101) | def get_labels(self):
method build_transforms (line 145) | def build_transforms(self, hyp=None):
method close_mosaic (line 163) | def close_mosaic(self, hyp):
method update_labels_info (line 170) | def update_labels_info(self, label):
method collate_fn (line 183) | def collate_fn(batch):
class ClassificationDataset (line 203) | class ClassificationDataset(torchvision.datasets.ImageFolder):
method __init__ (line 218) | def __init__(self, root, args, augment=False, cache=False):
method __getitem__ (line 248) | def __getitem__(self, i):
method __len__ (line 265) | def __len__(self) -> int:
class SemanticDataset (line 270) | class SemanticDataset(BaseDataset):
method __init__ (line 272) | def __init__(self):
FILE: ultralytics/yolo/data/dataset_wrappers.py
class MixAndRectDataset (line 9) | class MixAndRectDataset:
method __init__ (line 18) | def __init__(self, dataset):
method __len__ (line 26) | def __len__(self):
method __getitem__ (line 30) | def __getitem__(self, index):
FILE: ultralytics/yolo/data/utils.py
function img2label_paths (line 39) | def img2label_paths(img_paths):
function get_hash (line 45) | def get_hash(paths):
function exif_size (line 53) | def exif_size(img):
function verify_image_label (line 63) | def verify_image_label(args):
function polygon2mask (line 137) | def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):
function polygons2masks (line 158) | def polygons2masks(imgsz, polygons, color, downsample_ratio=1):
function polygons2masks_overlap (line 173) | def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):
function check_det_dataset (line 193) | def check_det_dataset(dataset, autodownload=True):
function check_cls_dataset (line 269) | def check_cls_dataset(dataset: str, split=''):
class HUBDatasetStats (line 313) | class HUBDatasetStats():
method __init__ (line 331) | def __init__(self, path='coco128.yaml', task='detect', autodownload=Fa...
method _find_yaml (line 351) | def _find_yaml(dir):
method _unzip (line 361) | def _unzip(self, path):
method _hub_ops (line 370) | def _hub_ops(self, f):
method get_json (line 374) | def get_json(self, save=False, verbose=False):
method process_images (line 425) | def process_images(self):
function compress_one_image (line 440) | def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
function delete_dsstore (line 474) | def delete_dsstore(path):
function zip_directory (line 496) | def zip_directory(dir, use_zipfile_library=True):
FILE: ultralytics/yolo/engine/exporter.py
function export_formats (line 75) | def export_formats():
function gd_outputs (line 94) | def gd_outputs(gd):
function try_export (line 103) | def try_export(inner_func):
class Exporter (line 122) | class Exporter:
method __init__ (line 131) | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
method __call__ (line 145) | def __call__(self, model=None):
method export_torchscript (line 278) | def export_torchscript(self, prefix=colorstr('TorchScript:')):
method export_onnx (line 294) | def export_onnx(self, prefix=colorstr('ONNX:')):
method export_openvino (line 352) | def export_openvino(self, prefix=colorstr('OpenVINO:')):
method export_paddle (line 384) | def export_paddle(self, prefix=colorstr('PaddlePaddle:')):
method export_coreml (line 398) | def export_coreml(self, prefix=colorstr('CoreML:')):
method export_engine (line 440) | def export_engine(self, prefix=colorstr('TensorRT:')):
method export_saved_model (line 505) | def export_saved_model(self, prefix=colorstr('TensorFlow SavedModel:')):
method export_pb (line 551) | def export_pb(self, keras_model, prefix=colorstr('TensorFlow GraphDef:...
method export_tflite (line 567) | def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorst...
method export_edgetpu (line 582) | def export_edgetpu(self, tflite_model='', prefix=colorstr('Edge TPU:')):
method export_tfjs (line 609) | def export_tfjs(self, prefix=colorstr('TensorFlow.js:')):
method _add_tflite_metadata (line 645) | def _add_tflite_metadata(self, file):
method _pipeline_coreml (line 703) | def _pipeline_coreml(self, model, prefix=colorstr('CoreML Pipeline:')):
method add_callback (line 820) | def add_callback(self, event: str, callback):
method run_callbacks (line 826) | def run_callbacks(self, event: str):
class iOSDetectModel (line 832) | class iOSDetectModel(torch.nn.Module):
method __init__ (line 835) | def __init__(self, model, im):
method forward (line 846) | def forward(self, x):
function export (line 852) | def export(cfg=DEFAULT_CFG):
FILE: ultralytics/yolo/engine/model.py
class YOLO (line 32) | class YOLO:
method __init__ (line 73) | def __init__(self, model: Union[str, Path] = 'yolov8n.pt', task=None) ...
method __call__ (line 109) | def __call__(self, source=None, stream=False, **kwargs):
method __getattr__ (line 113) | def __getattr__(self, attr):
method is_hub_model (line 119) | def is_hub_model(model):
method _new (line 126) | def _new(self, cfg: str, task=None, verbose=True):
method _load (line 146) | def _load(self, weights: str, task=None):
method _check_is_pytorch_model (line 168) | def _check_is_pytorch_model(self):
method reset_weights (line 181) | def reset_weights(self):
method load (line 194) | def load(self, weights='yolov8n.pt'):
method info (line 204) | def info(self, detailed=False, verbose=True):
method fuse (line 215) | def fuse(self):
method predict (line 221) | def predict(self, source=None, stream=False, **kwargs):
method track (line 257) | def track(self, source=None, stream=False, persist=False, **kwargs):
method val (line 281) | def val(self, data=None, **kwargs):
method benchmark (line 310) | def benchmark(self, **kwargs):
method export (line 325) | def export(self, **kwargs):
method train (line 344) | def train(self, **kwargs):
method to (line 380) | def to(self, device):
method tune (line 390) | def tune(self,
method names (line 476) | def names(self):
method device (line 481) | def device(self):
method transforms (line 486) | def transforms(self):
method add_callback (line 490) | def add_callback(self, event: str, func):
method clear_callback (line 494) | def clear_callback(self, event: str):
method _reset_ckpt_args (line 499) | def _reset_ckpt_args(args):
method _reset_callbacks (line 504) | def _reset_callbacks(self):
FILE: ultralytics/yolo/engine/predictor.py
class BasePredictor (line 59) | class BasePredictor:
method __init__ (line 78) | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
method get_save_dir (line 110) | def get_save_dir(self):
method preprocess (line 115) | def preprocess(self, im):
method pre_transform (line 132) | def pre_transform(self, im):
method write_results (line 144) | def write_results(self, idx, results, batch):
method postprocess (line 178) | def postprocess(self, preds, img, orig_imgs):
method __call__ (line 182) | def __call__(self, source=None, model=None, stream=False):
method predict_cli (line 190) | def predict_cli(self, source=None, model=None):
method setup_source (line 196) | def setup_source(self, source):
method stream_inference (line 210) | def stream_inference(self, source=None, model=None):
method setup_model (line 295) | def setup_model(self, model, verbose=True):
method show (line 310) | def show(self, p):
method save_preds (line 320) | def save_preds(self, vid_cap, idx, save_path):
method run_callbacks (line 341) | def run_callbacks(self, event: str):
method add_callback (line 346) | def add_callback(self, event: str, func):
FILE: ultralytics/yolo/engine/results.py
class BaseTensor (line 20) | class BaseTensor(SimpleClass):
method __init__ (line 25) | def __init__(self, data, orig_shape) -> None:
method shape (line 37) | def shape(self):
method cpu (line 41) | def cpu(self):
method numpy (line 45) | def numpy(self):
method cuda (line 49) | def cuda(self):
method to (line 53) | def to(self, *args, **kwargs):
method __len__ (line 57) | def __len__(self): # override len(results)
method __getitem__ (line 61) | def __getitem__(self, idx):
class Results (line 66) | class Results(SimpleClass):
method __init__ (line 93) | def __init__(self, orig_img, path, names, boxes=None, masks=None, prob...
method __getitem__ (line 107) | def __getitem__(self, idx):
method update (line 114) | def update(self, boxes=None, masks=None, probs=None):
method cpu (line 123) | def cpu(self):
method numpy (line 130) | def numpy(self):
method cuda (line 137) | def cuda(self):
method to (line 144) | def to(self, *args, **kwargs):
method __len__ (line 151) | def __len__(self):
method new (line 156) | def new(self):
method keys (line 161) | def keys(self):
method plot (line 165) | def plot(
method verbose (line 248) | def verbose(self):
method save_txt (line 265) | def save_txt(self, txt_file, save_conf=False):
method save_crop (line 299) | def save_crop(self, save_dir, file_name=Path('im.jpg')):
method pandas (line 320) | def pandas(self):
method tojson (line 324) | def tojson(self, normalize=False):
class Boxes (line 354) | class Boxes(BaseTensor):
method __init__ (line 386) | def __init__(self, boxes, orig_shape) -> None:
method xyxy (line 397) | def xyxy(self):
method conf (line 402) | def conf(self):
method cls (line 407) | def cls(self):
method id (line 412) | def id(self):
method xywh (line 418) | def xywh(self):
method xyxyn (line 424) | def xyxyn(self):
method xywhn (line 433) | def xywhn(self):
method boxes (line 441) | def boxes(self):
class Masks (line 447) | class Masks(BaseTensor):
method __init__ (line 470) | def __init__(self, masks, orig_shape) -> None:
method segments (line 478) | def segments(self):
method xyn (line 486) | def xyn(self):
method xy (line 494) | def xy(self):
method masks (line 501) | def masks(self):
method pandas (line 506) | def pandas(self):
class Keypoints (line 511) | class Keypoints(BaseTensor):
method __init__ (line 534) | def __init__(self, keypoints, orig_shape) -> None:
method xy (line 542) | def xy(self):
method xyn (line 547) | def xyn(self):
method conf (line 555) | def conf(self):
class Probs (line 559) | class Probs(BaseTensor):
method __init__ (line 580) | def __init__(self, probs, orig_shape=None) -> None:
method top5 (line 585) | def top5(self):
method top1 (line 591) | def top1(self):
method top5conf (line 597) | def top5conf(self):
method top1conf (line 603) | def top1conf(self):
FILE: ultralytics/yolo/engine/trainer.py
class BaseTrainer (line 37) | class BaseTrainer:
method __init__ (line 74) | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
method add_callback (line 150) | def add_callback(self, event: str, callback):
method set_callback (line 156) | def set_callback(self, event: str, callback):
method run_callbacks (line 162) | def run_callbacks(self, event: str):
method train (line 167) | def train(self):
method _setup_ddp (line 194) | def _setup_ddp(self, world_size):
method _setup_train (line 205) | def _setup_train(self, world_size):
method _do_train (line 270) | def _do_train(self, world_size=1):
method save_model (line 406) | def save_model(self):
method get_dataset (line 434) | def get_dataset(data):
method setup_model (line 440) | def setup_model(self):
method optimizer_step (line 457) | def optimizer_step(self):
method preprocess_batch (line 467) | def preprocess_batch(self, batch):
method validate (line 473) | def validate(self):
method get_model (line 483) | def get_model(self, cfg=None, weights=None, verbose=True):
method get_validator (line 487) | def get_validator(self):
method get_dataloader (line 491) | def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='tr...
method build_dataset (line 497) | def build_dataset(self, img_path, mode='train', batch=None):
method label_loss_items (line 501) | def label_loss_items(self, loss_items=None, prefix='train'):
method set_model_attributes (line 508) | def set_model_attributes(self):
method build_targets (line 514) | def build_targets(self, preds, targets):
method progress_string (line 518) | def progress_string(self):
method plot_training_samples (line 523) | def plot_training_samples(self, batch, ni):
method plot_training_labels (line 527) | def plot_training_labels(self):
method save_metrics (line 531) | def save_metrics(self, metrics):
method plot_metrics (line 539) | def plot_metrics(self):
method on_plot (line 543) | def on_plot(self, name, data=None):
method final_eval (line 547) | def final_eval(self):
method check_resume (line 558) | def check_resume(self):
method resume_training (line 578) | def resume_training(self, ckpt):
method build_optimizer (line 609) | def build_optimizer(self, model, name='auto', lr=0.001, momentum=0.9, ...
FILE: ultralytics/yolo/engine/validator.py
class BaseValidator (line 38) | class BaseValidator:
method __init__ (line 58) | def __init__(self, dataloader=None, save_dir=None, pbar=None, args=Non...
method __call__ (line 92) | def __call__(self, trainer=None, model=None):
method add_callback (line 199) | def add_callback(self, event: str, callback):
method run_callbacks (line 203) | def run_callbacks(self, event: str):
method get_dataloader (line 208) | def get_dataloader(self, dataset_path, batch_size):
method build_dataset (line 212) | def build_dataset(self, img_path):
method preprocess (line 216) | def preprocess(self, batch):
method postprocess (line 220) | def postprocess(self, preds):
method init_metrics (line 224) | def init_metrics(self, model):
method update_metrics (line 228) | def update_metrics(self, preds, batch):
method finalize_metrics (line 232) | def finalize_metrics(self, *args, **kwargs):
method get_stats (line 236) | def get_stats(self):
method check_stats (line 240) | def check_stats(self, stats):
method print_results (line 244) | def print_results(self):
method get_desc (line 248) | def get_desc(self):
method metric_keys (line 253) | def metric_keys(self):
method on_plot (line 257) | def on_plot(self, name, data=None):
method plot_val_samples (line 262) | def plot_val_samples(self, batch, ni):
method plot_predictions (line 266) | def plot_predictions(self, batch, preds, ni):
method pred_to_json (line 270) | def pred_to_json(self, preds, batch):
method eval_json (line 274) | def eval_json(self, stats):
FILE: ultralytics/yolo/nas/model.py
class NAS (line 26) | class NAS:
method __init__ (line 28) | def __init__(self, model='yolo_nas_s.pt') -> None:
method _load (line 52) | def _load(self, weights: str):
method predict (line 56) | def predict(self, source=None, stream=False, **kwargs):
method train (line 82) | def train(self, **kwargs):
method val (line 86) | def val(self, **kwargs):
method export (line 98) | def export(self, **kwargs):
method info (line 116) | def info(self, detailed=False, verbose=True):
method __call__ (line 126) | def __call__(self, source=None, stream=False, **kwargs):
method __getattr__ (line 130) | def __getattr__(self, attr):
FILE: ultralytics/yolo/nas/predict.py
class NASPredictor (line 11) | class NASPredictor(BasePredictor):
method postprocess (line 13) | def postprocess(self, preds_in, img, orig_imgs):
FILE: ultralytics/yolo/nas/val.py
class NASValidator (line 12) | class NASValidator(DetectionValidator):
method postprocess (line 14) | def postprocess(self, preds_in):
FILE: ultralytics/yolo/utils/__init__.py
class SimpleClass (line 108) | class SimpleClass:
method __str__ (line 114) | def __str__(self):
method __repr__ (line 128) | def __repr__(self):
method __getattr__ (line 132) | def __getattr__(self, attr):
class IterableSimpleNamespace (line 138) | class IterableSimpleNamespace(SimpleNamespace):
method __iter__ (line 144) | def __iter__(self):
method __str__ (line 148) | def __str__(self):
method __getattr__ (line 152) | def __getattr__(self, attr):
method get (line 162) | def get(self, key, default=None):
function plt_settings (line 167) | def plt_settings(rcparams=None, backend='Agg'):
function set_logging (line 206) | def set_logging(name=LOGGING_NAME, verbose=True):
function emojis (line 228) | def emojis(string=''):
class EmojiFilter (line 233) | class EmojiFilter(logging.Filter):
method filter (line 241) | def filter(self, record):
function yaml_save (line 254) | def yaml_save(file='data.yaml', data=None):
function yaml_load (line 282) | def yaml_load(file='data.yaml', append_filename=False):
function yaml_print (line 304) | def yaml_print(yaml_file: Union[str, Path, dict]) -> None:
function is_colab (line 328) | def is_colab():
function is_kaggle (line 338) | def is_kaggle():
function is_jupyter (line 348) | def is_jupyter():
function is_docker (line 362) | def is_docker() -> bool:
function is_online (line 377) | def is_online() -> bool:
function is_pip_package (line 401) | def is_pip_package(filepath: str = __name__) -> bool:
function is_dir_writeable (line 420) | def is_dir_writeable(dir_path: Union[str, Path]) -> bool:
function is_pytest_running (line 433) | def is_pytest_running():
function is_github_actions_ci (line 443) | def is_github_actions_ci() -> bool:
function is_git_dir (line 453) | def is_git_dir():
function get_git_dir (line 464) | def get_git_dir():
function get_git_origin_url (line 478) | def get_git_origin_url():
function get_git_branch (line 492) | def get_git_branch():
function get_default_args (line 506) | def get_default_args(func):
function get_user_config_dir (line 519) | def get_user_config_dir(sub_dir='Ultralytics'):
function colorstr (line 554) | def colorstr(*input):
class TryExcept (line 580) | class TryExcept(contextlib.ContextDecorator):
method __init__ (line 583) | def __init__(self, msg='', verbose=True):
method __enter__ (line 588) | def __enter__(self):
method __exit__ (line 592) | def __exit__(self, exc_type, value, traceback):
function threaded (line 599) | def threaded(func):
function set_sentry (line 611) | def set_sentry():
function get_settings (line 685) | def get_settings(file=SETTINGS_YAML, version='0.0.3'):
function set_settings (line 734) | def set_settings(kwargs, file=SETTINGS_YAML):
function deprecation_warn (line 743) | def deprecation_warn(arg, new_arg, version=None):
function clean_url (line 751) | def clean_url(url):
function url2file (line 757) | def url2file(url):
FILE: ultralytics/yolo/utils/autobatch.py
function check_train_batch_size (line 15) | def check_train_batch_size(model, imgsz=640, amp=True):
function autobatch (line 32) | def autobatch(model, imgsz=640, fraction=0.67, batch_size=DEFAULT_CFG.ba...
FILE: ultralytics/yolo/utils/benchmarks.py
function benchmark (line 44) | def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt',
class ProfileModels (line 149) | class ProfileModels:
method __init__ (line 167) | def __init__(self,
method profile (line 183) | def profile(self):
method get_files (line 224) | def get_files(self):
method get_onnx_model_info (line 239) | def get_onnx_model_info(self, onnx_file: str):
method iterative_sigma_clipping (line 243) | def iterative_sigma_clipping(self, data, sigma=2, max_iters=3):
method profile_tensorrt_model (line 253) | def profile_tensorrt_model(self, engine_file: str):
method profile_onnx_model (line 281) | def profile_onnx_model(self, onnx_file: str):
method generate_table_row (line 333) | def generate_table_row(self, model_name, t_onnx, t_engine, model_info):
method generate_results_dict (line 337) | def generate_results_dict(self, model_name, t_onnx, t_engine, model_in...
method print_table (line 346) | def print_table(self, table_rows):
FILE: ultralytics/yolo/utils/callbacks/base.py
function on_pretrain_routine_start (line 12) | def on_pretrain_routine_start(trainer):
function on_pretrain_routine_end (line 17) | def on_pretrain_routine_end(trainer):
function on_train_start (line 22) | def on_train_start(trainer):
function on_train_epoch_start (line 27) | def on_train_epoch_start(trainer):
function on_train_batch_start (line 32) | def on_train_batch_start(trainer):
function optimizer_step (line 37) | def optimizer_step(trainer):
function on_before_zero_grad (line 42) | def on_before_zero_grad(trainer):
function on_train_batch_end (line 47) | def on_train_batch_end(trainer):
function on_train_epoch_end (line 52) | def on_train_epoch_end(trainer):
function on_fit_epoch_end (line 57) | def on_fit_epoch_end(trainer):
function on_model_save (line 62) | def on_model_save(trainer):
function on_train_end (line 67) | def on_train_end(trainer):
function on_params_update (line 72) | def on_params_update(trainer):
function teardown (line 77) | def teardown(trainer):
function on_val_start (line 85) | def on_val_start(validator):
function on_val_batch_start (line 90) | def on_val_batch_start(validator):
function on_val_batch_end (line 95) | def on_val_batch_end(validator):
function on_val_end (line 100) | def on_val_end(validator):
function on_predict_start (line 108) | def on_predict_start(predictor):
function on_predict_batch_start (line 113) | def on_predict_batch_start(predictor):
function on_predict_batch_end (line 118) | def on_predict_batch_end(predictor):
function on_predict_postprocess_end (line 123) | def on_predict_postprocess_end(predictor):
function on_predict_end (line 128) | def on_predict_end(predictor):
function on_export_start (line 136) | def on_export_start(exporter):
function on_export_end (line 141) | def on_export_end(exporter):
function get_default_callbacks (line 181) | def get_default_callbacks():
function add_integration_callbacks (line 191) | def add_integration_callbacks(instance):
FILE: ultralytics/yolo/utils/callbacks/clearml.py
function _log_debug_samples (line 23) | def _log_debug_samples(files, title='Debug Samples') -> None:
function _log_plot (line 43) | def _log_plot(title, plot_path) -> None:
function on_pretrain_routine_start (line 62) | def on_pretrain_routine_start(trainer):
function on_train_epoch_end (line 87) | def on_train_epoch_end(trainer):
function on_fit_epoch_end (line 99) | def on_fit_epoch_end(trainer):
function on_val_end (line 113) | def on_val_end(validator):
function on_train_end (line 120) | def on_train_end(trainer):
FILE: ultralytics/yolo/utils/callbacks/comet.py
function _get_comet_mode (line 27) | def _get_comet_mode():
function _get_comet_model_name (line 31) | def _get_comet_model_name():
function _get_eval_batch_logging_interval (line 35) | def _get_eval_batch_logging_interval():
function _get_max_image_predictions_to_log (line 39) | def _get_max_image_predictions_to_log():
function _scale_confidence_score (line 43) | def _scale_confidence_score(score):
function _should_log_confusion_matrix (line 48) | def _should_log_confusion_matrix():
function _should_log_image_predictions (line 52) | def _should_log_image_predictions():
function _get_experiment_type (line 56) | def _get_experiment_type(mode, project_name):
function _create_experiment (line 64) | def _create_experiment(args):
function _fetch_trainer_metadata (line 84) | def _fetch_trainer_metadata(trainer):
function _scale_bounding_box_to_original_image_shape (line 105) | def _scale_bounding_box_to_original_image_shape(box, resized_image_shape...
function _format_ground_truth_annotations_for_detection (line 126) | def _format_ground_truth_annotations_for_detection(img_idx, image_path, ...
function _format_prediction_annotations_for_detection (line 153) | def _format_prediction_annotations_for_detection(image_path, metadata, c...
function _fetch_annotations (line 176) | def _fetch_annotations(img_idx, image_path, batch, prediction_metadata_m...
function _create_prediction_metadata_map (line 188) | def _create_prediction_metadata_map(model_predictions):
function _log_confusion_matrix (line 198) | def _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch):
function _log_images (line 211) | def _log_images(experiment, image_paths, curr_step, annotations=None):
function _log_image_predictions (line 222) | def _log_image_predictions(experiment, validator, curr_step):
function _log_plots (line 267) | def _log_plots(experiment, trainer):
function _log_model (line 276) | def _log_model(experiment, trainer):
function on_pretrain_routine_start (line 287) | def on_pretrain_routine_start(trainer):
function on_train_epoch_end (line 295) | def on_train_epoch_end(trainer):
function on_fit_epoch_end (line 315) | def on_fit_epoch_end(trainer):
function on_train_end (line 341) | def on_train_end(trainer):
FILE: ultralytics/yolo/utils/callbacks/dvc.py
function _logger_disabled (line 33) | def _logger_disabled():
function _log_images (line 37) | def _log_images(image_path, prefix=''):
function _log_plots (line 42) | def _log_plots(plots, prefix=''):
function _log_confusion_matrix (line 50) | def _log_confusion_matrix(validator):
function on_pretrain_routine_start (line 66) | def on_pretrain_routine_start(trainer):
function on_pretrain_routine_end (line 81) | def on_pretrain_routine_end(trainer):
function on_train_start (line 85) | def on_train_start(trainer):
function on_train_epoch_start (line 90) | def on_train_epoch_start(trainer):
function on_fit_epoch_end (line 95) | def on_fit_epoch_end(trainer):
function on_train_end (line 113) | def on_train_end(trainer):
FILE: ultralytics/yolo/utils/callbacks/hub.py
function on_pretrain_routine_end (line 11) | def on_pretrain_routine_end(trainer):
function on_fit_epoch_end (line 20) | def on_fit_epoch_end(trainer):
function on_model_save (line 35) | def on_model_save(trainer):
function on_train_end (line 47) | def on_train_end(trainer):
function on_train_start (line 59) | def on_train_start(trainer):
function on_val_start (line 64) | def on_val_start(validator):
function on_predict_start (line 69) | def on_predict_start(predictor):
function on_export_start (line 74) | def on_export_start(exporter):
FILE: ultralytics/yolo/utils/callbacks/mlflow.py
function on_pretrain_routine_end (line 18) | def on_pretrain_routine_end(trainer):
function on_fit_epoch_end (line 48) | def on_fit_epoch_end(trainer):
function on_train_end (line 55) | def on_train_end(trainer):
FILE: ultralytics/yolo/utils/callbacks/neptune.py
function _log_scalars (line 21) | def _log_scalars(scalars, step=0):
function _log_images (line 28) | def _log_images(imgs_dict, group=''):
function _log_plot (line 35) | def _log_plot(title, plot_path):
function on_pretrain_routine_start (line 51) | def on_pretrain_routine_start(trainer):
function on_train_epoch_end (line 61) | def on_train_epoch_end(trainer):
function on_fit_epoch_end (line 69) | def on_fit_epoch_end(trainer):
function on_val_end (line 76) | def on_val_end(validator):
function on_train_end (line 83) | def on_train_end(trainer):
FILE: ultralytics/yolo/utils/callbacks/raytune.py
function on_fit_epoch_end (line 11) | def on_fit_epoch_end(trainer):
FILE: ultralytics/yolo/utils/callbacks/tensorboard.py
function _log_scalars (line 15) | def _log_scalars(scalars, step=0):
function on_pretrain_routine_start (line 22) | def on_pretrain_routine_start(trainer):
function on_batch_end (line 34) | def on_batch_end(trainer):
function on_fit_epoch_end (line 39) | def on_fit_epoch_end(trainer):
FILE: ultralytics/yolo/utils/callbacks/wb.py
function _log_plots (line 16) | def _log_plots(plots, step):
function on_pretrain_routine_start (line 24) | def on_pretrain_routine_start(trainer):
function on_fit_epoch_end (line 29) | def on_fit_epoch_end(trainer):
function on_train_epoch_end (line 38) | def on_train_epoch_end(trainer):
function on_train_end (line 46) | def on_train_end(trainer):
FILE: ultralytics/yolo/utils/checks.py
function is_ascii (line 27) | def is_ascii(s) -> bool:
function check_imgsz (line 44) | def check_imgsz(imgsz, stride=32, min_dim=1, max_dim=2, floor=0):
function check_version (line 91) | def check_version(current: str = '0.0.0',
function check_latest_pypi_version (line 121) | def check_latest_pypi_version(package_name='ultralytics'):
function check_pip_update_available (line 139) | def check_pip_update_available():
function check_font (line 157) | def check_font(font='Arial.ttf'):
function check_python (line 186) | def check_python(minimum: str = '3.7.0') -> bool:
function check_requirements (line 200) | def check_requirements(requirements=ROOT.parent / 'requirements.txt', ex...
function check_suffix (line 253) | def check_suffix(file='yolov8n.pt', suffix='.pt', msg=''):
function check_yolov5u_filename (line 264) | def check_yolov5u_filename(file: str, verbose: bool = True):
function check_file (line 278) | def check_file(file, suffix='', download=True, hard=True):
function check_yaml (line 304) | def check_yaml(file, suffix=('.yaml', '.yml'), hard=True):
function check_imshow (line 309) | def check_imshow(warn=False):
function check_yolo (line 324) | def check_yolo(verbose=True, device=''):
function check_amp (line 350) | def check_amp(model):
function git_describe (line 399) | def git_describe(path=ROOT): # path must be a directory
function print_args (line 408) | def print_args(args: Optional[dict] = None, show_file=True, show_func=Fa...
FILE: ultralytics/yolo/utils/dist.py
function find_free_network_port (line 15) | def find_free_network_port() -> int:
function generate_ddp_file (line 26) | def generate_ddp_file(trainer):
function generate_ddp_command (line 49) | def generate_ddp_command(world_size, trainer):
function ddp_cleanup (line 64) | def ddp_cleanup(trainer, file):
FILE: ultralytics/yolo/utils/downloads.py
function is_url (line 26) | def is_url(url, check=True):
function unzip_file (line 39) | def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')):
function check_disk_space (line 76) | def check_disk_space(url='https://ultralytics.com/assets/coco128.zip', s...
function safe_download (line 108) | def safe_download(url,
function attempt_download_asset (line 193) | def attempt_download_asset(file, repo='ultralytics/assets', release='v0....
function download (line 244) | def download(url, dir=Path.cwd(), unzip=True, delete=False, curl=False, ...
FILE: ultralytics/yolo/utils/errors.py
class HUBModelError (line 6) | class HUBModelError(Exception):
method __init__ (line 8) | def __init__(self, message='Model not found. Please check model URL an...
FILE: ultralytics/yolo/utils/files.py
class WorkingDirectory (line 11) | class WorkingDirectory(contextlib.ContextDecorator):
method __init__ (line 14) | def __init__(self, new_dir):
method __enter__ (line 19) | def __enter__(self):
method __exit__ (line 23) | def __exit__(self, exc_type, exc_val, exc_tb):
function increment_path (line 28) | def increment_path(path, exist_ok=False, sep='', mkdir=False):
function file_age (line 63) | def file_age(path=__file__):
function file_date (line 69) | def file_date(path=__file__):
function file_size (line 75) | def file_size(path):
function get_latest_run (line 87) | def get_latest_run(search_dir='.'):
function make_dirs (line 93) | def make_dirs(dir='new_dir/'):
FILE: ultralytics/yolo/utils/instance.py
function _ntuple (line 13) | def _ntuple(n):
class Bboxes (line 33) | class Bboxes:
method __init__ (line 36) | def __init__(self, bboxes, format='xyxy') -> None:
method convert (line 67) | def convert(self, format):
method areas (line 81) | def areas(self):
method mul (line 102) | def mul(self, scale):
method add (line 116) | def add(self, offset):
method __len__ (line 130) | def __len__(self):
method concatenate (line 135) | def concatenate(cls, boxes_list: List['Bboxes'], axis=0) -> 'Bboxes':
method __getitem__ (line 159) | def __getitem__(self, index) -> 'Bboxes':
class Instances (line 184) | class Instances:
method __init__ (line 186) | def __init__(self, bboxes, segments=None, keypoints=None, bbox_format=...
method convert_bbox (line 208) | def convert_bbox(self, format):
method bbox_areas (line 213) | def bbox_areas(self):
method scale (line 217) | def scale(self, scale_w, scale_h, bbox_only=False):
method denormalize (line 228) | def denormalize(self, w, h):
method normalize (line 240) | def normalize(self, w, h):
method add_padding (line 252) | def add_padding(self, padw, padh):
method __getitem__ (line 262) | def __getitem__(self, index) -> 'Instances':
method flipud (line 290) | def flipud(self, h):
method fliplr (line 303) | def fliplr(self, w):
method clip (line 316) | def clip(self, w, h):
method remove_zero_area_boxes (line 330) | def remove_zero_area_boxes(self):
method update (line 341) | def update(self, bboxes, segments=None, keypoints=None):
method __len__ (line 349) | def __len__(self):
method concatenate (line 354) | def concatenate(cls, instances_list: List['Instances'], axis=0) -> 'In...
method bboxes (line 389) | def bboxes(self):
FILE: ultralytics/yolo/utils/loss.py
class VarifocalLoss (line 15) | class VarifocalLoss(nn.Module):
method __init__ (line 18) | def __init__(self):
method forward (line 22) | def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0):
class FocalLoss (line 32) | class FocalLoss(nn.Module):
method __init__ (line 35) | def __init__(self, ):
method forward (line 38) | def forward(self, pred, label, gamma=1.5, alpha=0.25):
class BboxLoss (line 55) | class BboxLoss(nn.Module):
method __init__ (line 57) | def __init__(self, reg_max, use_dfl=False):
method forward (line 63) | def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes...
method _df_loss (line 80) | def _df_loss(pred_dist, target):
class KeypointLoss (line 91) | class KeypointLoss(nn.Module):
method __init__ (line 93) | def __init__(self, sigmas) -> None:
method forward (line 97) | def forward(self, pred_kpts, gt_kpts, kpt_mask, area):
class v8DetectionLoss (line 107) | class v8DetectionLoss:
method __init__ (line 109) | def __init__(self, model): # model must be de-paralleled
method preprocess (line 129) | def preprocess(self, targets, batch_size, scale_tensor):
method bbox_decode (line 146) | def bbox_decode(self, anchor_points, pred_dist):
method __call__ (line 155) | def __call__(self, preds, batch):
class v8SegmentationLoss (line 203) | class v8SegmentationLoss(v8DetectionLoss):
method __init__ (line 205) | def __init__(self, model): # model must be de-paralleled
method __call__ (line 210) | def __call__(self, preds, batch):
method single_mask_loss (line 290) | def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
class v8PoseLoss (line 298) | class v8PoseLoss(v8DetectionLoss):
method __init__ (line 300) | def __init__(self, model): # model must be de-paralleled
method __call__ (line 309) | def __call__(self, preds, batch):
method kpts_decode (line 377) | def kpts_decode(self, anchor_points, pred_kpts):
class v8ClassificationLoss (line 386) | class v8ClassificationLoss:
method __call__ (line 388) | def __call__(self, preds, batch):
FILE: ultralytics/yolo/utils/metrics.py
function box_area (line 19) | def box_area(box):
function bbox_ioa (line 24) | def bbox_ioa(box1, box2, eps=1e-7):
function box_iou (line 52) | def box_iou(box1, box2, eps=1e-7):
function bbox_iou (line 75) | def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, ...
function mask_iou (line 131) | def mask_iou(mask1, mask2, eps=1e-7):
function kpt_iou (line 150) | def kpt_iou(kpt1, kpt2, area, sigma, eps=1e-7):
function smooth_BCE (line 172) | def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues...
class ConfusionMatrix (line 177) | class ConfusionMatrix:
method __init__ (line 189) | def __init__(self, nc, conf=0.25, iou_thres=0.45, task='detect'):
method process_cls_preds (line 197) | def process_cls_preds(self, preds, targets):
method process_batch (line 209) | def process_batch(self, detections, labels):
method matrix (line 255) | def matrix(self):
method tp_fp (line 259) | def tp_fp(self):
method plot (line 268) | def plot(self, normalize=True, save_dir='', names=(), on_plot=None):
method print (line 311) | def print(self):
function smooth (line 319) | def smooth(y, f=0.05):
function plot_pr_curve (line 328) | def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=(), o...
function plot_mc_curve (line 353) | def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabe...
function compute_ap (line 377) | def compute_ap(recall, precision):
function ap_per_class (line 410) | def ap_per_class(tp,
class Metric (line 500) | class Metric(SimpleClass):
method __init__ (line 528) | def __init__(self) -> None:
method ap50 (line 537) | def ap50(self):
method ap (line 547) | def ap(self):
method mp (line 557) | def mp(self):
method mr (line 567) | def mr(self):
method map50 (line 577) | def map50(self):
method map75 (line 587) | def map75(self):
method map (line 597) | def map(self):
method mean_results (line 606) | def mean_results(self):
method class_result (line 610) | def class_result(self, i):
method maps (line 615) | def maps(self):
method fitness (line 622) | def fitness(self):
method update (line 627) | def update(self, results):
class DetMetrics (line 635) | class DetMetrics(SimpleClass):
method __init__ (line 665) | def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names...
method process (line 673) | def process(self, tp, conf, pred_cls, target_cls):
method keys (line 687) | def keys(self):
method mean_results (line 691) | def mean_results(self):
method class_result (line 695) | def class_result(self, i):
method maps (line 700) | def maps(self):
method fitness (line 705) | def fitness(self):
method ap_class_index (line 710) | def ap_class_index(self):
method results_dict (line 715) | def results_dict(self):
class SegmentMetrics (line 720) | class SegmentMetrics(SimpleClass):
method __init__ (line 749) | def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names...
method process (line 758) | def process(self, tp_b, tp_m, conf, pred_cls, target_cls):
method keys (line 794) | def keys(self):
method mean_results (line 800) | def mean_results(self):
method class_result (line 804) | def class_result(self, i):
method maps (line 809) | def maps(self):
method fitness (line 814) | def fitness(self):
method ap_class_index (line 819) | def ap_class_index(self):
method results_dict (line 824) | def results_dict(self):
class PoseMetrics (line 829) | class PoseMetrics(SegmentMetrics):
method __init__ (line 858) | def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names...
method __getattr__ (line 868) | def __getattr__(self, attr):
method process (line 873) | def process(self, tp_b, tp_p, conf, pred_cls, target_cls):
method keys (line 909) | def keys(self):
method mean_results (line 915) | def mean_results(self):
method class_result (line 919) | def class_result(self, i):
method maps (line 924) | def maps(self):
method fitness (line 929) | def fitness(self):
class ClassifyMetrics (line 934) | class ClassifyMetrics(SimpleClass):
method __init__ (line 952) | def __init__(self) -> None:
method process (line 957) | def process(self, targets, pred):
method fitness (line 965) | def fitness(self):
method results_dict (line 970) | def results_dict(self):
method keys (line 975) | def keys(self):
FILE: ultralytics/yolo/utils/ops.py
class Profile (line 19) | class Profile(contextlib.ContextDecorator):
method __init__ (line 25) | def __init__(self, t=0.0):
method __enter__ (line 35) | def __enter__(self):
method __exit__ (line 42) | def __exit__(self, type, value, traceback):
method time (line 49) | def time(self):
function coco80_to_coco91_class (line 58) | def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index...
function segment2box (line 70) | def segment2box(segment, width=640, height=640):
function scale_boxes (line 90) | def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
function make_divisible (line 120) | def make_divisible(x, divisor):
function non_max_suppression (line 136) | def non_max_suppression(
function clip_boxes (line 269) | def clip_boxes(boxes, shape):
function clip_coords (line 288) | def clip_coords(coords, shape):
function scale_image (line 307) | def scale_image(masks, im0_shape, ratio_pad=None):
function xyxy2xywh (line 345) | def xyxy2xywh(x):
function xywh2xyxy (line 362) | def xywh2xyxy(x):
function xywhn2xyxy (line 380) | def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
function xyxy2xywhn (line 402) | def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
function xyn2xy (line 426) | def xyn2xy(x, w=640, h=640, padw=0, padh=0):
function xywh2ltwh (line 445) | def xywh2ltwh(x):
function xyxy2ltwh (line 460) | def xyxy2ltwh(x):
function ltwh2xywh (line 475) | def ltwh2xywh(x):
function ltwh2xyxy (line 488) | def ltwh2xyxy(x):
function segments2boxes (line 504) | def segments2boxes(segments):
function resample_segments (line 521) | def resample_segments(segments, n=1000):
function crop_mask (line 541) | def crop_mask(masks, boxes):
function process_mask_upsample (line 560) | def process_mask_upsample(protos, masks_in, bboxes, shape):
function process_mask (line 581) | def process_mask(protos, masks_in, bboxes, shape, upsample=False):
function process_mask_native (line 613) | def process_mask_native(protos, masks_in, bboxes, shape):
function scale_coords (line 639) | def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normali...
function masks2segments (line 671) | def masks2segments(masks, strategy='largest'):
function clean_str (line 696) | def clean_str(s):
FILE: ultralytics/yolo/utils/patches.py
function imread (line 16) | def imread(filename, flags=cv2.IMREAD_COLOR):
function imwrite (line 20) | def imwrite(filename, img):
function imshow (line 28) | def imshow(path, im):
function torch_save (line 36) | def torch_save(*args, **kwargs):
FILE: ultralytics/yolo/utils/plotting.py
class Colors (line 22) | class Colors:
method __init__ (line 24) | def __init__(self):
method __call__ (line 36) | def __call__(self, i, bgr=False):
method hex2rgb (line 42) | def hex2rgb(h): # rgb order (PIL)
class Annotator (line 49) | class Annotator:
method __init__ (line 51) | def __init__(self, im, line_width=None, font_size=None, font='Arial.tt...
method box_label (line 78) | def box_label(self, box, label='', color=(128, 128, 128), txt_color=(2...
method masks (line 111) | def masks(self, masks, colors, im_gpu, alpha=0.5, retina_masks=False):
method kpts (line 144) | def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True):
method rectangle (line 191) | def rectangle(self, xy, fill=None, outline=None, width=1):
method text (line 195) | def text(self, xy, text, txt_color=(255, 255, 255), anchor='top', box_...
method fromarray (line 219) | def fromarray(self, im):
method result (line 224) | def result(self):
function plot_labels (line 231) | def plot_labels(boxes, cls, names=(), save_dir=Path(''), on_plot=None):
function save_one_box (line 281) | def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, squar...
function plot_images (line 299) | def plot_images(images,
function plot_results (line 431) | def plot_results(file='path/to/results.csv', dir='', segment=False, pose...
function output_to_target (line 473) | def output_to_target(output, max_det=300):
function feature_visualization (line 484) | def feature_visualization(x, module_type, stage, n=32, save_dir=Path('ru...
FILE: ultralytics/yolo/utils/tal.py
function select_candidates_in_gts (line 12) | def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9):
function select_highest_overlaps (line 29) | def select_highest_overlaps(mask_pos, overlaps, n_max_boxes):
class TaskAlignedAssigner (line 57) | class TaskAlignedAssigner(nn.Module):
method __init__ (line 72) | def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1...
method forward (line 83) | def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bbox...
method get_pos_mask (line 129) | def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc...
method get_box_metrics (line 141) | def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, ...
method select_topk_candidates (line 162) | def select_topk_candidates(self, metrics, largest=True, topk_mask=None):
method get_targets (line 198) | def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask):
function make_anchors (line 246) | def make_anchors(feats, strides, grid_cell_offset=0.5):
function dist2bbox (line 261) | def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
function bbox2dist (line 273) | def bbox2dist(anchor_points, bbox, reg_max):
FILE: ultralytics/yolo/utils/torch_utils.py
function torch_distributed_zero_first (line 36) | def torch_distributed_zero_first(local_rank: int):
function smart_inference_mode (line 46) | def smart_inference_mode():
function select_device (line 56) | def select_device(device='', batch=0, newline=False, verbose=True):
function time_sync (line 107) | def time_sync():
function fuse_conv_and_bn (line 114) | def fuse_conv_and_bn(conv, bn):
function fuse_deconv_and_bn (line 138) | def fuse_deconv_and_bn(deconv, bn):
function model_info (line 163) | def model_info(model, detailed=False, verbose=True, imgsz=640):
function get_num_params (line 187) | def get_num_params(model):
function get_num_gradients (line 192) | def get_num_gradients(model):
function model_info_for_loggers (line 197) | def model_info_for_loggers(trainer):
function get_flops (line 220) | def get_flops(model, imgsz=640):
function get_flops_with_torch_profiler (line 234) | def get_flops_with_torch_profiler(model, imgsz=640):
function initialize_weights (line 248) | def initialize_weights(model):
function scale_img (line 261) | def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,...
function make_divisible (line 273) | def make_divisible(x, divisor):
function copy_attr (line 280) | def copy_attr(a, b, include=(), exclude=()):
function get_latest_opset (line 289) | def get_latest_opset():
function intersect_dicts (line 294) | def intersect_dicts(da, db, exclude=()):
function is_parallel (line 299) | def is_parallel(model):
function de_parallel (line 304) | def de_parallel(model):
function one_cycle (line 309) | def one_cycle(y1=0.0, y2=1.0, steps=100):
function init_seeds (line 314) | def init_seeds(seed=0, deterministic=False):
class ModelEMA (line 335) | class ModelEMA:
method __init__ (line 342) | def __init__(self, model, decay=0.9999, tau=2000, updates=0):
method update (line 351) | def update(self, model):
method update_attr (line 364) | def update_attr(self, model, include=(), exclude=('process_group', 're...
function strip_optimizer (line 370) | def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
function profile (line 410) | def profile(input, ops, n=10, device=None):
class EarlyStopping (line 463) | class EarlyStopping:
method __init__ (line 468) | def __init__(self, patience=50):
method __call__ (line 480) | def __call__(self, epoch, fitness):
FILE: ultralytics/yolo/v8/classify/predict.py
class ClassificationPredictor (line 10) | class ClassificationPredictor(BasePredictor):
method __init__ (line 12) | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
method preprocess (line 16) | def preprocess(self, img):
method postprocess (line 23) | def postprocess(self, preds, img, orig_imgs):
function predict (line 35) | def predict(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/yolo/v8/classify/train.py
class ClassificationTrainer (line 15) | class ClassificationTrainer(BaseTrainer):
method __init__ (line 17) | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
method set_model_attributes (line 26) | def set_model_attributes(self):
method get_model (line 30) | def get_model(self, cfg=None, weights=None, verbose=True):
method setup_model (line 45) | def setup_model(self):
method build_dataset (line 70) | def build_dataset(self, img_path, mode='train', batch=None):
method get_dataloader (line 73) | def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='tr...
method preprocess_batch (line 87) | def preprocess_batch(self, batch):
method progress_string (line 93) | def progress_string(self):
method get_validator (line 98) | def get_validator(self):
method label_loss_items (line 103) | def label_loss_items(self, loss_items=None, prefix='train'):
method resume_training (line 114) | def resume_training(self, ckpt):
method plot_metrics (line 118) | def plot_metrics(self):
method final_eval (line 122) | def final_eval(self):
method plot_training_samples (line 136) | def plot_training_samples(self, batch, ni):
function train (line 145) | def train(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/yolo/v8/classify/val.py
class ClassificationValidator (line 12) | class ClassificationValidator(BaseValidator):
method __init__ (line 14) | def __init__(self, dataloader=None, save_dir=None, pbar=None, args=Non...
method get_desc (line 20) | def get_desc(self):
method init_metrics (line 24) | def init_metrics(self, model):
method preprocess (line 32) | def preprocess(self, batch):
method update_metrics (line 39) | def update_metrics(self, preds, batch):
method finalize_metrics (line 45) | def finalize_metrics(self, *args, **kwargs):
method get_stats (line 57) | def get_stats(self):
method build_dataset (line 62) | def build_dataset(self, img_path):
method get_dataloader (line 65) | def get_dataloader(self, dataset_path, batch_size):
method print_results (line 70) | def print_results(self):
method plot_val_samples (line 75) | def plot_val_samples(self, batch, ni):
method plot_predictions (line 84) | def plot_predictions(self, batch, preds, ni):
function val (line 94) | def val(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/yolo/v8/detect/predict.py
class DetectionPredictor (line 10) | class DetectionPredictor(BasePredictor):
method postprocess (line 12) | def postprocess(self, preds, img, orig_imgs):
function predict (line 32) | def predict(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/yolo/v8/detect/train.py
class DetectionTrainer (line 17) | class DetectionTrainer(BaseTrainer):
method build_dataset (line 19) | def build_dataset(self, img_path, mode='train', batch=None):
method get_dataloader (line 30) | def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='tr...
method preprocess_batch (line 62) | def preprocess_batch(self, batch):
method set_model_attributes (line 67) | def set_model_attributes(self):
method get_model (line 77) | def get_model(self, cfg=None, weights=None, verbose=True):
method get_validator (line 84) | def get_validator(self):
method label_loss_items (line 89) | def label_loss_items(self, loss_items=None, prefix='train'):
method progress_string (line 101) | def progress_string(self):
method plot_training_samples (line 106) | def plot_training_samples(self, batch, ni):
method plot_metrics (line 116) | def plot_metrics(self):
method plot_training_labels (line 120) | def plot_training_labels(self):
function train (line 127) | def train(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/yolo/v8/detect/val.py
class DetectionValidator (line 19) | class DetectionValidator(BaseValidator):
method __init__ (line 21) | def __init__(self, dataloader=None, save_dir=None, pbar=None, args=Non...
method preprocess (line 31) | def preprocess(self, batch):
method init_metrics (line 44) | def init_metrics(self, model):
method get_desc (line 59) | def get_desc(self):
method postprocess (line 63) | def postprocess(self, preds):
method update_metrics (line 73) | def update_metrics(self, preds, batch):
method finalize_metrics (line 119) | def finalize_metrics(self, *args, **kwargs):
method get_stats (line 124) | def get_stats(self):
method print_results (line 132) | def print_results(self):
method _process_batch (line 152) | def _process_batch(self, detections, labels):
method build_dataset (line 177) | def build_dataset(self, img_path, mode='val', batch=None):
method get_dataloader (line 188) | def get_dataloader(self, dataset_path, batch_size):
method plot_val_samples (line 212) | def plot_val_samples(self, batch, ni):
method plot_predictions (line 223) | def plot_predictions(self, batch, preds, ni):
method save_one_txt (line 232) | def save_one_txt(self, predn, save_conf, shape, file):
method pred_to_json (line 241) | def pred_to_json(self, predn, filename):
method eval_json (line 254) | def eval_json(self, stats):
function val (line 281) | def val(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/yolo/v8/pose/predict.py
class PosePredictor (line 8) | class PosePredictor(DetectionPredictor):
method __init__ (line 10) | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
method postprocess (line 14) | def postprocess(self, preds, img, orig_imgs):
function predict (line 42) | def predict(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/yolo/v8/pose/train.py
class PoseTrainer (line 12) | class PoseTrainer(v8.detect.DetectionTrainer):
method __init__ (line 14) | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
method get_model (line 21) | def get_model(self, cfg=None, weights=None, verbose=True):
method set_model_attributes (line 29) | def set_model_attributes(self):
method get_validator (line 34) | def get_validator(self):
method plot_training_samples (line 39) | def plot_training_samples(self, batch, ni):
method plot_metrics (line 56) | def plot_metrics(self):
function train (line 61) | def train(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/yolo/v8/pose/val.py
class PoseValidator (line 15) | class PoseValidator(DetectionValidator):
method __init__ (line 17) | def __init__(self, dataloader=None, save_dir=None, pbar=None, args=Non...
method preprocess (line 23) | def preprocess(self, batch):
method get_desc (line 29) | def get_desc(self):
method postprocess (line 34) | def postprocess(self, preds):
method init_metrics (line 45) | def init_metrics(self, model):
method update_metrics (line 53) | def update_metrics(self, preds, batch):
method _process_batch (line 110) | def _process_batch(self, detections, labels, pred_kpts=None, gt_kpts=N...
method plot_val_samples (line 143) | def plot_val_samples(self, batch, ni):
method plot_predictions (line 155) | def plot_predictions(self, batch, preds, ni):
method pred_to_json (line 166) | def pred_to_json(self, predn, filename):
method eval_json (line 180) | def eval_json(self, stats):
function val (line 209) | def val(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/yolo/v8/segment/predict.py
class SegmentationPredictor (line 10) | class SegmentationPredictor(DetectionPredictor):
method __init__ (line 12) | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
method postprocess (line 16) | def postprocess(self, preds, img, orig_imgs):
function predict (line 47) | def predict(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/yolo/v8/segment/train.py
class SegmentationTrainer (line 11) | class SegmentationTrainer(v8.detect.DetectionTrainer):
method __init__ (line 13) | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
method get_model (line 20) | def get_model(self, cfg=None, weights=None, verbose=True):
method get_validator (line 28) | def get_validator(self):
method plot_training_samples (line 33) | def plot_training_samples(self, batch, ni):
method plot_metrics (line 44) | def plot_metrics(self):
function train (line 49) | def train(cfg=DEFAULT_CFG, use_python=False):
FILE: ultralytics/yolo/v8/segment/val.py
class SegmentationValidator (line 17) | class SegmentationValidator(DetectionValidator):
method __init__ (line 19) | def __init__(self, dataloader=None, save_dir=None, pbar=None, args=Non...
method preprocess (line 25) | def preprocess(self, batch):
method init_metrics (line 31) | def init_metrics(self, model):
method get_desc (line 41) | def get_desc(self):
method postprocess (line 46) | def postprocess(self, preds):
method update_metrics (line 59) | def update_metrics(self, preds, batch):
method finalize_metrics (line 126) | def finalize_metrics(self, *args, **kwargs):
method _process_batch (line 131) | def _process_batch(self, detections, labels, pred_masks=None, gt_masks...
method plot_val_samples (line 168) | def plot_val_samples(self, batch, ni):
method plot_predictions (line 180) | def plot_predictions(self, batch, preds, ni):
method pred_to_json (line 192) | def pred_to_json(self, predn, filename, pred_masks):
method eval_json (line 218) | def eval_json(self, stats):
function val (line 247) | def val(cfg=DEFAULT_CFG, use_python=False):
FILE: utils/tools.py
function convert_box_xywh_to_xyxy (line 10) | def convert_box_xywh_to_xyxy(box):
function segment_image (line 21) | def segment_image(image, bbox):
function format_results (line 38) | def format_results(result, filter=0):
function filter_masks (line 56) | def filter_masks(annotations): # filter the overlap mask
function get_bbox_from_mask (line 74) | def get_bbox_from_mask(mask):
function fast_process (line 94) | def fast_process(
function fast_show_mask (line 194) | def fast_show_mask(
function fast_show_mask_gpu (line 260) | def fast_show_mask_gpu(
function retriev (line 327) | def retriev(
function crop_image (line 342) | def crop_image(annotations, image_like):
function box_prompt (line 366) | def box_prompt(masks, bbox, target_height, target_width):
function point_prompt (line 394) | def point_prompt(masks, points, point_label, target_height, target_width...
function text_prompt (line 418) | def text_prompt(annotations, text, img_path, device, wider=False, thresh...
FILE: utils/tools_gradio.py
function fast_process (line 8) | def fast_process(
function fast_show_mask (line 88) | def fast_show_mask(
function fast_show_mask_gpu (line 130) | def fast_show_mask_gpu(
Condensed preview — 179 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,413K chars).
[
{
"path": ".gitignore",
"chars": 83,
"preview": "*.pyc\n*.pyo\n*.pyd\n.DS_Store\n.idea\nweights\nbuild/\n*.egg-info/\ngradio_cached_examples"
},
{
"path": "Inference.py",
"chars": 3934,
"preview": "import argparse\r\nfrom fastsam import FastSAM, FastSAMPrompt \r\nimport ast\r\nimport torch\r\nfrom PIL import Image\r\nfrom util"
},
{
"path": "LICENSE",
"chars": 34523,
"preview": " GNU AFFERO GENERAL PUBLIC LICENSE\n Version 3, 19 November 2007\n\n Copyright (C)"
},
{
"path": "MORE_USAGES.md",
"chars": 1811,
"preview": "# MORE_USAGES\n\n\n\n### Everything mode\nUse --imgsz to change different input sizes.\n\n```shell\npython Inference.py --model_"
},
{
"path": "README.md",
"chars": 11577,
"preview": "\r\n\r\n# Fast Segment Anything\r\n\r\n[[`📕Paper`](https://arxiv.org/pdf/2306.12156.pdf)] [[`🤗HuggingFace De"
},
{
"path": "app_gradio.py",
"chars": 15079,
"preview": "from ultralytics import YOLO\nimport gradio as gr\nimport torch\nfrom utils.tools_gradio import fast_process\nfrom utils.too"
},
{
"path": "cog.yaml",
"chars": 674,
"preview": "# Configuration for Cog ⚙️\n# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md\n# Thanks for chenxwh.\n\nb"
},
{
"path": "fastsam/__init__.py",
"chars": 287,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .model import FastSAM\nfrom .predict import FastSAMPredictor\nfrom .prompt im"
},
{
"path": "fastsam/decoder.py",
"chars": 4711,
"preview": "from .model import FastSAM\nimport numpy as np\nfrom PIL import Image\nfrom typing import Optional, List, Tuple, Union\n\n\ncl"
},
{
"path": "fastsam/model.py",
"chars": 4356,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nFastSAM model interface.\n\nUsage - Predict:\n from ultralytics import FastSA"
},
{
"path": "fastsam/predict.py",
"chars": 2852,
"preview": "import torch\n\nfrom ultralytics.yolo.engine.results import Results\nfrom ultralytics.yolo.utils import DEFAULT_CFG, ops\nfr"
},
{
"path": "fastsam/prompt.py",
"chars": 18949,
"preview": "import os\nimport sys\nimport cv2\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport torch\nfrom .utils import image"
},
{
"path": "fastsam/utils.py",
"chars": 2686,
"preview": "import numpy as np\nimport torch\nfrom PIL import Image\n\n\ndef adjust_bboxes_to_image_border(boxes, image_shape, threshold="
},
{
"path": "predict.py",
"chars": 5251,
"preview": "# Prediction interface for Cog ⚙️\n# https://github.com/replicate/cog/blob/main/docs/python.md\n# Thanks for chenxwh.\n\nimp"
},
{
"path": "requirements.txt",
"chars": 326,
"preview": "# Base-----------------------------------\r\nmatplotlib>=3.2.2\r\nopencv-python>=4.6.0\r\nPillow>=7.1.2\r\nPyYAML>=5.3.1\r\nreques"
},
{
"path": "segpredict.py",
"chars": 1429,
"preview": "from fastsam import FastSAM, FastSAMPrompt\nimport torch \n\nmodel = FastSAM('FastSAM.pt')\nIMAGE_PATH = './images/dogs.jpg'"
},
{
"path": "setup.py",
"chars": 631,
"preview": "# This source code is licensed under the license found in the\n# LICENSE file in the root directory of this source tree.\n"
},
{
"path": "ultralytics/.pre-commit-config.yaml",
"chars": 1811,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# Pre-commit hooks. For more information see https://github.com/pre-commit/pre-co"
},
{
"path": "ultralytics/__init__.py",
"chars": 424,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\n__version__ = '8.0.120'\n\nfrom ultralytics.hub import start\nfrom ultralytics.vit."
},
{
"path": "ultralytics/datasets/Argoverse.yaml",
"chars": 2728,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial"
},
{
"path": "ultralytics/datasets/GlobalWheat2020.yaml",
"chars": 1963,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatc"
},
{
"path": "ultralytics/datasets/ImageNet.yaml",
"chars": 42416,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University\n"
},
{
"path": "ultralytics/datasets/Objects365.yaml",
"chars": 9247,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# Objects365 dataset https://www.objects365.org/ by Megvii\n# Example usage: yolo "
},
{
"path": "ultralytics/datasets/SKU-110K.yaml",
"chars": 2414,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax "
},
{
"path": "ultralytics/datasets/VOC.yaml",
"chars": 3489,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxfo"
},
{
"path": "ultralytics/datasets/VisDrone.yaml",
"chars": 2989,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianji"
},
{
"path": "ultralytics/datasets/coco-pose.yaml",
"chars": 1524,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# COCO 2017 dataset http://cocodataset.org by Microsoft\n# Example usage: yolo tra"
},
{
"path": "ultralytics/datasets/coco.yaml",
"chars": 2503,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# COCO 2017 dataset http://cocodataset.org by Microsoft\n# Example usage: yolo tra"
},
{
"path": "ultralytics/datasets/coco128-seg.yaml",
"chars": 1839,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 image"
},
{
"path": "ultralytics/datasets/coco128.yaml",
"chars": 1823,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images fr"
},
{
"path": "ultralytics/datasets/coco8-pose.yaml",
"chars": 872,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics\n# Exampl"
},
{
"path": "ultralytics/datasets/coco8-seg.yaml",
"chars": 1774,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics\n# Example"
},
{
"path": "ultralytics/datasets/coco8.yaml",
"chars": 1754,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# COCO8 dataset (first 8 images from COCO train2017) by Ultralytics\n# Example usa"
},
{
"path": "ultralytics/datasets/xView.yaml",
"chars": 5155,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National G"
},
{
"path": "ultralytics/hub/__init__.py",
"chars": 4395,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport requests\n\nfrom ultralytics.hub.auth import Auth\nfrom ultralytics.hub.util"
},
{
"path": "ultralytics/hub/auth.py",
"chars": 5196,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport requests\n\nfrom ultralytics.hub.utils import HUB_API_ROOT, PREFIX, request"
},
{
"path": "ultralytics/hub/session.py",
"chars": 8464,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\nimport signal\nimport sys\nfrom pathlib import Path\nfrom time import sleep\n\nimport "
},
{
"path": "ultralytics/hub/utils.py",
"chars": 9154,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport os\nimport platform\nimport random\nimport sys\nimport threading\nimport time\n"
},
{
"path": "ultralytics/models/README.md",
"chars": 2524,
"preview": "## Models\n\nWelcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model config"
},
{
"path": "ultralytics/models/rt-detr/rtdetr-l.yaml",
"chars": 1967,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# RT-DETR-l object detection model with P3-P5 outputs. For details see https://do"
},
{
"path": "ultralytics/models/rt-detr/rtdetr-x.yaml",
"chars": 2174,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# RT-DETR-x object detection model with P3-P5 outputs. For details see https://do"
},
{
"path": "ultralytics/models/v3/yolov3-spp.yaml",
"chars": 1547,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://d"
},
{
"path": "ultralytics/models/v3/yolov3-tiny.yaml",
"chars": 1249,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://"
},
{
"path": "ultralytics/models/v3/yolov3.yaml",
"chars": 1534,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv3 object detection model with P3-P5 outputs. For details see https://docs."
},
{
"path": "ultralytics/models/v5/yolov5-p6.yaml",
"chars": 1920,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv5 object detection model with P3-P6 outputs. For details see https://docs."
},
{
"path": "ultralytics/models/v5/yolov5.yaml",
"chars": 1547,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv5 object detection model with P3-P5 outputs. For details see https://docs."
},
{
"path": "ultralytics/models/v6/yolov6.yaml",
"chars": 1732,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https:"
},
{
"path": "ultralytics/models/v8/yolov8-cls.yaml",
"chars": 917,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv8-cls image classification model. For Usage examples see https://docs.ultr"
},
{
"path": "ultralytics/models/v8/yolov8-p2.yaml",
"chars": 1748,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https:"
},
{
"path": "ultralytics/models/v8/yolov8-p6.yaml",
"chars": 1853,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https:"
},
{
"path": "ultralytics/models/v8/yolov8-pose-p6.yaml",
"chars": 1943,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://doc"
},
{
"path": "ultralytics/models/v8/yolov8-pose.yaml",
"chars": 1577,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://doc"
},
{
"path": "ultralytics/models/v8/yolov8-rtdetr.yaml",
"chars": 1917,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https:"
},
{
"path": "ultralytics/models/v8/yolov8-seg.yaml",
"chars": 1487,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ult"
},
{
"path": "ultralytics/models/v8/yolov8.yaml",
"chars": 1910,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https:"
},
{
"path": "ultralytics/nn/__init__.py",
"chars": 552,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .tasks import (BaseModel, ClassificationModel, DetectionModel, Segmentation"
},
{
"path": "ultralytics/nn/autobackend.py",
"chars": 24246,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport ast\nimport contextlib\nimport json\nimport platform\nimport zipfile\nfrom col"
},
{
"path": "ultralytics/nn/autoshape.py",
"chars": 12512,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nCommon modules\n\"\"\"\n\nfrom copy import copy\nfrom pathlib import Path\n\nimport cv"
},
{
"path": "ultralytics/nn/modules/__init__.py",
"chars": 1584,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nUltralytics modules. Visualize with:\n\nfrom ultralytics.nn.modules import *\nim"
},
{
"path": "ultralytics/nn/modules/block.py",
"chars": 11813,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nBlock modules\n\"\"\"\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functio"
},
{
"path": "ultralytics/nn/modules/conv.py",
"chars": 11644,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nConvolution modules\n\"\"\"\n\nimport math\n\nimport numpy as np\nimport torch\nimport "
},
{
"path": "ultralytics/nn/modules/head.py",
"chars": 15320,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nModel head modules\n\"\"\"\n\nimport math\n\nimport torch\nimport torch.nn as nn\nfrom "
},
{
"path": "ultralytics/nn/modules/transformer.py",
"chars": 15931,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nTransformer modules\n\"\"\"\n\nimport math\n\nimport torch\nimport torch.nn as nn\nimpo"
},
{
"path": "ultralytics/nn/modules/utils.py",
"chars": 3241,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nModule utils\n\"\"\"\n\nimport copy\nimport math\n\nimport numpy as np\nimport torch\nim"
},
{
"path": "ultralytics/nn/tasks.py",
"chars": 34133,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport contextlib\nfrom copy import deepcopy\nfrom pathlib import Path\n\nimport tor"
},
{
"path": "ultralytics/tracker/README.md",
"chars": 2540,
"preview": "# Tracker\n\n## Supported Trackers\n\n- [x] ByteTracker\n- [x] BoT-SORT\n\n## Usage\n\n### python interface:\n\nYou can use the Pyt"
},
{
"path": "ultralytics/tracker/__init__.py",
"chars": 199,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .track import register_tracker\nfrom .trackers import BOTSORT, BYTETracker\n\n"
},
{
"path": "ultralytics/tracker/cfg/botsort.yaml",
"chars": 887,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon"
},
{
"path": "ultralytics/tracker/cfg/bytetrack.yaml",
"chars": 691,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/"
},
{
"path": "ultralytics/tracker/track.py",
"chars": 2306,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom functools import partial\n\nimport torch\n\nfrom ultralytics.yolo.utils import "
},
{
"path": "ultralytics/tracker/trackers/__init__.py",
"chars": 168,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .bot_sort import BOTSORT\nfrom .byte_tracker import BYTETracker\n\n__all__ = '"
},
{
"path": "ultralytics/tracker/trackers/basetrack.py",
"chars": 1606,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom collections import OrderedDict\n\nimport numpy as np\n\n\nclass TrackState:\n "
},
{
"path": "ultralytics/tracker/trackers/bot_sort.py",
"chars": 5681,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom collections import deque\n\nimport numpy as np\n\nfrom ..utils import matching\n"
},
{
"path": "ultralytics/tracker/trackers/byte_tracker.py",
"chars": 14445,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport numpy as np\n\nfrom ..utils import matching\nfrom ..utils.kalman_filter impo"
},
{
"path": "ultralytics/tracker/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "ultralytics/tracker/utils/gmc.py",
"chars": 12211,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport copy\n\nimport cv2\nimport numpy as np\n\nfrom ultralytics.yolo.utils import L"
},
{
"path": "ultralytics/tracker/utils/kalman_filter.py",
"chars": 18417,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport numpy as np\nimport scipy.linalg\n\n# Table for the 0.95 quantile of the chi"
},
{
"path": "ultralytics/tracker/utils/matching.py",
"chars": 8696,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport numpy as np\nimport scipy\nfrom scipy.spatial.distance import cdist\n\nfrom ."
},
{
"path": "ultralytics/vit/__init__.py",
"chars": 139,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .rtdetr import RTDETR\nfrom .sam import SAM\n\n__all__ = 'RTDETR', 'SAM' # al"
},
{
"path": "ultralytics/vit/rtdetr/__init__.py",
"chars": 194,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .model import RTDETR\nfrom .predict import RTDETRPredictor\nfrom .val import "
},
{
"path": "ultralytics/vit/rtdetr/model.py",
"chars": 7407,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nRT-DETR model interface\n\"\"\"\n\nfrom pathlib import Path\n\nimport torch.nn as nn\n"
},
{
"path": "ultralytics/vit/rtdetr/predict.py",
"chars": 1899,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\n\nfrom ultralytics.yolo.data.augment import LetterBox\nfrom ultralyti"
},
{
"path": "ultralytics/vit/rtdetr/train.py",
"chars": 2947,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom copy import copy\n\nimport torch\n\nfrom ultralytics.nn.tasks import RTDETRDete"
},
{
"path": "ultralytics/vit/rtdetr/val.py",
"chars": 6580,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom pathlib import Path\n\nimport cv2\nimport numpy as np\nimport torch\n\nfrom ultra"
},
{
"path": "ultralytics/vit/sam/__init__.py",
"chars": 170,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .build import build_sam # noqa\nfrom .model import SAM # noqa\nfrom .module"
},
{
"path": "ultralytics/vit/sam/amg.py",
"chars": 13293,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport math\nfrom copy import deepcopy\nfrom itertools import product\nfrom typing "
},
{
"path": "ultralytics/vit/sam/autosize.py",
"chars": 3918,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n\n# T"
},
{
"path": "ultralytics/vit/sam/build.py",
"chars": 3821,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n\n# T"
},
{
"path": "ultralytics/vit/sam/model.py",
"chars": 2361,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nSAM model interface\n\"\"\"\n\nfrom ultralytics.yolo.cfg import get_cfg\n\nfrom ...yo"
},
{
"path": "ultralytics/vit/sam/modules/__init__.py",
"chars": 39,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n"
},
{
"path": "ultralytics/vit/sam/modules/decoders.py",
"chars": 6369,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom typing import List, Tuple, Type\n\nimport torch\nfrom torch import nn\nfrom tor"
},
{
"path": "ultralytics/vit/sam/modules/encoders.py",
"chars": 22542,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom typing import Any, Optional, Tuple, Type\n\nimport numpy as np\nimport torch\ni"
},
{
"path": "ultralytics/vit/sam/modules/mask_generator.py",
"chars": 15290,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n\n# T"
},
{
"path": "ultralytics/vit/sam/modules/prompt_predictor.py",
"chars": 11241,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom typing import Optional, Tuple\n\nimport numpy as np\nimport torch\n\nfrom ..auto"
},
{
"path": "ultralytics/vit/sam/modules/sam.py",
"chars": 7306,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n\n# T"
},
{
"path": "ultralytics/vit/sam/modules/transformer.py",
"chars": 8529,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport math\nfrom typing import Tuple, Type\n\nimport torch\nfrom torch import Tenso"
},
{
"path": "ultralytics/vit/sam/predict.py",
"chars": 2179,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport numpy as np\nimport torch\n\nfrom ultralytics.yolo.engine.predictor import B"
},
{
"path": "ultralytics/vit/utils/__init__.py",
"chars": 39,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n"
},
{
"path": "ultralytics/vit/utils/loss.py",
"chars": 13187,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom ultraly"
},
{
"path": "ultralytics/vit/utils/ops.py",
"chars": 13001,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom scipy.op"
},
{
"path": "ultralytics/yolo/__init__.py",
"chars": 91,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom . import v8\n\n__all__ = 'v8', # tuple or list\n"
},
{
"path": "ultralytics/yolo/cfg/__init__.py",
"chars": 18224,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport contextlib\nimport re\nimport shutil\nimport sys\nfrom difflib import get_clo"
},
{
"path": "ultralytics/yolo/cfg/default.yaml",
"chars": 7299,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n# Default training settings and hyperparameters for medium-augmentation COCO trai"
},
{
"path": "ultralytics/yolo/data/__init__.py",
"chars": 455,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .base import BaseDataset\nfrom .build import build_dataloader, build_yolo_da"
},
{
"path": "ultralytics/yolo/data/annotator.py",
"chars": 2341,
"preview": "from pathlib import Path\n\nfrom ultralytics import YOLO\nfrom ultralytics.vit.sam import PromptPredictor, build_sam\nfrom u"
},
{
"path": "ultralytics/yolo/data/augment.py",
"chars": 37192,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport math\nimport random\nfrom copy import deepcopy\n\nimport cv2\nimport numpy as "
},
{
"path": "ultralytics/yolo/data/base.py",
"chars": 12640,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport glob\nimport math\nimport os\nimport random\nfrom copy import deepcopy\nfrom m"
},
{
"path": "ultralytics/yolo/data/build.py",
"chars": 6571,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport os\nimport random\nfrom pathlib import Path\n\nimport numpy as np\nimport torc"
},
{
"path": "ultralytics/yolo/data/converter.py",
"chars": 9190,
"preview": "import json\nfrom collections import defaultdict\nfrom pathlib import Path\n\nimport cv2\nimport numpy as np\nfrom tqdm import"
},
{
"path": "ultralytics/yolo/data/dataloaders/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "ultralytics/yolo/data/dataloaders/stream_loaders.py",
"chars": 14896,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport glob\nimport math\nimport os\nimport time\nfrom dataclasses import dataclass\n"
},
{
"path": "ultralytics/yolo/data/dataloaders/v5augmentations.py",
"chars": 17639,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nImage augmentation functions\n\"\"\"\n\nimport math\nimport random\n\nimport cv2\nimpor"
},
{
"path": "ultralytics/yolo/data/dataloaders/v5loader.py",
"chars": 51219,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nDataloaders and dataset utils\n\"\"\"\n\nimport contextlib\nimport glob\nimport hashl"
},
{
"path": "ultralytics/yolo/data/dataset.py",
"chars": 13352,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom itertools import repeat\nfrom multiprocessing.pool import ThreadPool\nfrom pa"
},
{
"path": "ultralytics/yolo/data/dataset_wrappers.py",
"chars": 1773,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport collections\nfrom copy import deepcopy\n\nfrom .augment import LetterBox\n\n\nc"
},
{
"path": "ultralytics/yolo/data/scripts/download_weights.sh",
"chars": 540,
"preview": "#!/bin/bash\n# Ultralytics YOLO 🚀, AGPL-3.0 license\n# Download latest models from https://github.com/ultralytics/assets/r"
},
{
"path": "ultralytics/yolo/data/scripts/get_coco.sh",
"chars": 1703,
"preview": "#!/bin/bash\n# Ultralytics YOLO 🚀, AGPL-3.0 license\n# Download COCO 2017 dataset http://cocodataset.org\n# Example usage: "
},
{
"path": "ultralytics/yolo/data/scripts/get_coco128.sh",
"chars": 596,
"preview": "#!/bin/bash\n# Ultralytics YOLO 🚀, AGPL-3.0 license\n# Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128"
},
{
"path": "ultralytics/yolo/data/scripts/get_imagenet.sh",
"chars": 1649,
"preview": "#!/bin/bash\n# Ultralytics YOLO 🚀, AGPL-3.0 license\n# Download ILSVRC2012 ImageNet dataset https://image-net.org\n# Exampl"
},
{
"path": "ultralytics/yolo/data/utils.py",
"chars": 24007,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport contextlib\nimport hashlib\nimport json\nimport os\nimport subprocess\nimport "
},
{
"path": "ultralytics/yolo/engine/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "ultralytics/yolo/engine/exporter.py",
"chars": 40971,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nExport a YOLOv8 PyTorch model to other formats. TensorFlow exports authored b"
},
{
"path": "ultralytics/yolo/engine/model.py",
"chars": 22252,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport sys\nfrom pathlib import Path\nfrom typing import Union\n\nfrom ultralytics i"
},
{
"path": "ultralytics/yolo/engine/predictor.py",
"chars": 16386,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nRun prediction on images, videos, directories, globs, YouTube, webcam, stream"
},
{
"path": "ultralytics/yolo/engine/results.py",
"chars": 24263,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nUltralytics Results, Boxes and Masks classes for handling inference results\n\n"
},
{
"path": "ultralytics/yolo/engine/trainer.py",
"chars": 31289,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nTrain a model on a dataset\n\nUsage:\n $ yolo mode=train model=yolov8n.pt dat"
},
{
"path": "ultralytics/yolo/engine/validator.py",
"chars": 11710,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nCheck a model's accuracy on a test or val split of a dataset\n\nUsage:\n $ yo"
},
{
"path": "ultralytics/yolo/nas/__init__.py",
"chars": 176,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .model import NAS\nfrom .predict import NASPredictor\nfrom .val import NASVal"
},
{
"path": "ultralytics/yolo/nas/model.py",
"chars": 5220,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nYOLO-NAS model interface.\n\nUsage - Predict:\n from ultralytics import NAS\n\n"
},
{
"path": "ultralytics/yolo/nas/predict.py",
"chars": 1462,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\n\nfrom ultralytics.yolo.engine.predictor import BasePredictor\nfrom u"
},
{
"path": "ultralytics/yolo/nas/val.py",
"chars": 950,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\n\nfrom ultralytics.yolo.utils import ops\nfrom ultralytics.yolo.utils"
},
{
"path": "ultralytics/yolo/utils/__init__.py",
"chars": 28096,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport contextlib\nimport inspect\nimport logging.config\nimport os\nimport platform"
},
{
"path": "ultralytics/yolo/utils/autobatch.py",
"chars": 3857,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nFunctions for estimating the best YOLO batch size to use a fraction of the av"
},
{
"path": "ultralytics/yolo/utils/benchmarks.py",
"chars": 15861,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nBenchmark a YOLO model formats for speed and accuracy\n\nUsage:\n from ultral"
},
{
"path": "ultralytics/yolo/utils/callbacks/__init__.py",
"chars": 211,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .base import add_integration_callbacks, default_callbacks, get_default_call"
},
{
"path": "ultralytics/yolo/utils/callbacks/base.py",
"chars": 5590,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nBase callbacks\n\"\"\"\n\nfrom collections import defaultdict\nfrom copy import deep"
},
{
"path": "ultralytics/yolo/utils/callbacks/clearml.py",
"chars": 5895,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport re\n\nimport matplotlib.image as mpimg\nimport matplotlib.pyplot as plt\n\nfro"
},
{
"path": "ultralytics/yolo/utils/callbacks/comet.py",
"chars": 13038,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport os\nfrom pathlib import Path\n\nfrom ultralytics.yolo.utils import LOGGER, R"
},
{
"path": "ultralytics/yolo/utils/callbacks/dvc.py",
"chars": 4294,
"preview": "# Ultralytics YOLO 🚀, GPL-3.0 license\nimport os\n\nimport pkg_resources as pkg\n\nfrom ultralytics.yolo.utils import LOGGER,"
},
{
"path": "ultralytics/yolo/utils/callbacks/hub.py",
"chars": 3299,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport json\nfrom time import time\n\nfrom ultralytics.hub.utils import PREFIX, eve"
},
{
"path": "ultralytics/yolo/utils/callbacks/mlflow.py",
"chars": 2529,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport os\nimport re\nfrom pathlib import Path\n\nfrom ultralytics.yolo.utils import"
},
{
"path": "ultralytics/yolo/utils/callbacks/neptune.py",
"chars": 3672,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport matplotlib.image as mpimg\nimport matplotlib.pyplot as plt\n\nfrom ultralyti"
},
{
"path": "ultralytics/yolo/utils/callbacks/raytune.py",
"chars": 492,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\ntry:\n import ray\n from ray import tune\n from ray.air import session\nexc"
},
{
"path": "ultralytics/yolo/utils/callbacks/tensorboard.py",
"chars": 1518,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom ultralytics.yolo.utils import LOGGER, TESTS_RUNNING, colorstr\n\ntry:\n fro"
},
{
"path": "ultralytics/yolo/utils/callbacks/wb.py",
"chars": 2167,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\nfrom ultralytics.yolo.utils import TESTS_RUNNING\nfrom ultralytics.yolo.utils.torc"
},
{
"path": "ultralytics/yolo/utils/checks.py",
"chars": 17548,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\nimport contextlib\nimport glob\nimport inspect\nimport math\nimport os\nimport platfor"
},
{
"path": "ultralytics/yolo/utils/dist.py",
"chars": 2593,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport os\nimport re\nimport shutil\nimport socket\nimport sys\nimport tempfile\nfrom "
},
{
"path": "ultralytics/yolo/utils/downloads.py",
"chars": 12099,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport contextlib\nimport shutil\nimport subprocess\nfrom itertools import repeat\nf"
},
{
"path": "ultralytics/yolo/utils/errors.py",
"chars": 314,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom ultralytics.yolo.utils import emojis\n\n\nclass HUBModelError(Exception):\n\n "
},
{
"path": "ultralytics/yolo/utils/files.py",
"chars": 3585,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport contextlib\nimport glob\nimport os\nimport shutil\nfrom datetime import datet"
},
{
"path": "ultralytics/yolo/utils/instance.py",
"chars": 14619,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom collections import abc\nfrom itertools import repeat\nfrom numbers import Num"
},
{
"path": "ultralytics/yolo/utils/loss.py",
"chars": 19154,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom ultraly"
},
{
"path": "ultralytics/yolo/utils/metrics.py",
"chars": 42285,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nModel validation metrics\n\"\"\"\nimport math\nimport warnings\nfrom pathlib import "
},
{
"path": "ultralytics/yolo/utils/ops.py",
"chars": 28168,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport contextlib\nimport math\nimport re\nimport time\n\nimport cv2\nimport numpy as "
},
{
"path": "ultralytics/yolo/utils/patches.py",
"chars": 1238,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\"\"\"\nMonkey patches to update/extend functionality of existing functions\n\"\"\"\n\nfrom"
},
{
"path": "ultralytics/yolo/utils/plotting.py",
"chars": 24382,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport contextlib\nimport math\nfrom pathlib import Path\n\nimport cv2\nimport matplo"
},
{
"path": "ultralytics/yolo/utils/tal.py",
"chars": 13642,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\nimport torch.nn as nn\n\nfrom .checks import check_version\nfrom .metr"
},
{
"path": "ultralytics/yolo/utils/torch_utils.py",
"chars": 22332,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport math\nimport os\nimport platform\nimport random\nimport time\nfrom contextlib "
},
{
"path": "ultralytics/yolo/utils/tuner.py",
"chars": 2297,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom ultralytics.yolo.utils import LOGGER\n\ntry:\n from ray import tune\n fro"
},
{
"path": "ultralytics/yolo/v8/__init__.py",
"chars": 155,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom ultralytics.yolo.v8 import classify, detect, pose, segment\n\n__all__ = 'clas"
},
{
"path": "ultralytics/yolo/v8/classify/__init__.py",
"chars": 388,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom ultralytics.yolo.v8.classify.predict import ClassificationPredictor, predic"
},
{
"path": "ultralytics/yolo/v8/classify/predict.py",
"chars": 1926,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\n\nfrom ultralytics.yolo.engine.predictor import BasePredictor\nfrom u"
},
{
"path": "ultralytics/yolo/v8/classify/train.py",
"chars": 6872,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\nimport torchvision\n\nfrom ultralytics.nn.tasks import Classification"
},
{
"path": "ultralytics/yolo/v8/classify/val.py",
"chars": 4673,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\n\nfrom ultralytics.yolo.data import ClassificationDataset, build_dat"
},
{
"path": "ultralytics/yolo/v8/detect/__init__.py",
"chars": 274,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .predict import DetectionPredictor, predict\nfrom .train import DetectionTra"
},
{
"path": "ultralytics/yolo/v8/detect/predict.py",
"chars": 1851,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\n\nfrom ultralytics.yolo.engine.predictor import BasePredictor\nfrom u"
},
{
"path": "ultralytics/yolo/v8/detect/train.py",
"chars": 7188,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\nfrom copy import copy\n\nimport numpy as np\n\nfrom ultralytics.nn.tasks import Detec"
},
{
"path": "ultralytics/yolo/v8/detect/val.py",
"chars": 14719,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport os\nfrom pathlib import Path\n\nimport numpy as np\nimport torch\n\nfrom ultral"
},
{
"path": "ultralytics/yolo/v8/pose/__init__.py",
"chars": 244,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .predict import PosePredictor, predict\nfrom .train import PoseTrainer, trai"
},
{
"path": "ultralytics/yolo/v8/pose/predict.py",
"chars": 2372,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom ultralytics.yolo.engine.results import Results\nfrom ultralytics.yolo.utils "
},
{
"path": "ultralytics/yolo/v8/pose/train.py",
"chars": 2787,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom copy import copy\n\nfrom ultralytics.nn.tasks import PoseModel\nfrom ultralyti"
},
{
"path": "ultralytics/yolo/v8/pose/val.py",
"chars": 10924,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom pathlib import Path\n\nimport numpy as np\nimport torch\n\nfrom ultralytics.yolo"
},
{
"path": "ultralytics/yolo/v8/segment/__init__.py",
"chars": 292,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom .predict import SegmentationPredictor, predict\nfrom .train import Segmentat"
},
{
"path": "ultralytics/yolo/v8/segment/predict.py",
"chars": 2836,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nimport torch\n\nfrom ultralytics.yolo.engine.results import Results\nfrom ultralyti"
},
{
"path": "ultralytics/yolo/v8/segment/train.py",
"chars": 2496,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\nfrom copy import copy\n\nfrom ultralytics.nn.tasks import SegmentationModel\nfrom ul"
},
{
"path": "ultralytics/yolo/v8/segment/val.py",
"chars": 12903,
"preview": "# Ultralytics YOLO 🚀, AGPL-3.0 license\n\nfrom multiprocessing.pool import ThreadPool\nfrom pathlib import Path\n\nimport num"
},
{
"path": "utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "utils/tools.py",
"chars": 16107,
"preview": "import numpy as np\r\nfrom PIL import Image\r\nimport matplotlib.pyplot as plt\r\nimport cv2\r\nimport torch\r\nimport os\r\nimport "
},
{
"path": "utils/tools_gradio.py",
"chars": 6187,
"preview": "import numpy as np\r\nfrom PIL import Image\r\nimport matplotlib.pyplot as plt\r\nimport cv2\r\nimport torch\r\n\r\n\r\ndef fast_proce"
}
]
About this extraction
This page contains the full source code of the CASIA-LMC-Lab/FastSAM GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 179 files (1.3 MB), approximately 360.7k tokens, and a symbol index with 1574 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.