Repository: ArmindoFlores/ao3_api
Branch: master
Commit: 02e349985d92
Files: 36
Total size: 166.2 KB
Directory structure:
gitextract_gr15a5tc/
├── AO3/
│ ├── __init__.py
│ ├── chapters.py
│ ├── comments.py
│ ├── common.py
│ ├── extra.py
│ ├── requester.py
│ ├── search.py
│ ├── series.py
│ ├── session.py
│ ├── threadable.py
│ ├── users.py
│ ├── utils.py
│ └── works.py
├── LICENSE
├── README.md
├── dist/
│ ├── ao3_api-2.0.0-py3-none-any.whl
│ ├── ao3_api-2.0.1-py3-none-any.whl
│ ├── ao3_api-2.0.2-py3-none-any.whl
│ ├── ao3_api-2.0.3-py3-none-any.whl
│ ├── ao3_api-2.0.4-py3-none-any.whl
│ ├── ao3_api-2.0.5-py3-none-any.whl
│ ├── ao3_api-2.0.6-py3-none-any.whl
│ ├── ao3_api-2.0.7-py3-none-any.whl
│ ├── ao3_api-2.0.8-py3-none-any.whl
│ ├── ao3_api-2.1.0-py3-none-any.whl
│ ├── ao3_api-2.1.1-py3-none-any.whl
│ ├── ao3_api-2.1.2-py3-none-any.whl
│ ├── ao3_api-2.2.0-py3-none-any.whl
│ ├── ao3_api-2.2.1-py3-none-any.whl
│ ├── ao3_api-2.3.0-py3-none-any.whl
│ └── ao3_api-2.3.1-py3-none-any.whl
├── docs/
│ ├── index.md
│ ├── install.md
│ └── use.md
├── mkdocs.yml
└── pyproject.toml
================================================
FILE CONTENTS
================================================
================================================
FILE: AO3/__init__.py
================================================
from . import extra, utils
from .chapters import Chapter
from .comments import Comment
from .search import Search
from .series import Series
from .session import GuestSession, Session
from .users import User
from .works import Work
VERSION = "2.3.0"
================================================
FILE: AO3/chapters.py
================================================
from functools import cached_property
import bs4
from bs4 import BeautifulSoup
from . import threadable, utils
from .comments import Comment
from .requester import requester
from .users import User
class Chapter:
"""
AO3 chapter object
"""
def __init__(self, chapterid, work, session=None, load=True):
self._session = session
self._work = work
self.id = chapterid
self._soup = None
if load:
self.reload()
def __repr__(self):
if self.id is None:
return f"Chapter [ONESHOT] from [{self.work}]"
try:
return f"<Chapter [{self.title} ({self.number})] from [{self.work}]>"
except:
return f"<Chapter [{self.id}] from [{self.work}]>"
def __eq__(self, other):
return isinstance(other, __class__) and other.id == self.id
def __getstate__(self):
d = {}
for attr in self.__dict__:
if isinstance(self.__dict__[attr], BeautifulSoup):
d[attr] = (self.__dict__[attr].encode(), True)
else:
d[attr] = (self.__dict__[attr], False)
return d
def __setstate__(self, d):
for attr in d:
value, issoup = d[attr]
if issoup:
self.__dict__[attr] = BeautifulSoup(value, "lxml")
else:
self.__dict__[attr] = value
def set_session(self, session):
"""Sets the session used to make requests for this chapter
Args:
session (AO3.Session/AO3.GuestSession): session object
"""
self._session = session
@threadable.threadable
def reload(self):
"""
Loads information about this chapter.
This function is threadable.
"""
from .works import Work
for attr in self.__class__.__dict__:
if isinstance(getattr(self.__class__, attr), cached_property):
if attr in self.__dict__:
delattr(self, attr)
if self.work is None:
soup = self.request(f"https://archiveofourown.org/chapters/{self.id}?view_adult=true")
workid = soup.find("li", {"class": "chapter entire"})
if workid is None:
raise utils.InvalidIdError("Cannot find work")
self._work = Work(utils.workid_from_url(workid.a["href"]))
else:
self.work.reload()
for chapter in self.work.chapters:
if chapter == self:
self._soup = chapter._soup
@threadable.threadable
def comment(self, comment_text, email="", name="", pseud=None):
"""Leaves a comment on this chapter.
This function is threadable.
Args:
comment_text (str): Comment text
Raises:
utils.UnloadedError: Couldn't load chapters
utils.AuthError: Invalid session
Returns:
requests.models.Response: Response object
"""
if self.id is None:
return self._work.comment(comment_text, email, name, pseud)
if not self.loaded:
raise utils.UnloadedError("Chapter isn't loaded. Have you tried calling Chapter.reload()?")
if self._session is None:
raise utils.AuthError("Invalid session")
if self.id is not None:
return utils.comment(self, comment_text, self._session, False, email=email, name=name, pseud=pseud)
def get_comments(self, maximum=None):
"""Returns a list of all threads of comments in the chapter. This operation can take a very long time.
Because of that, it is recomended that you set a maximum number of comments.
Duration: ~ (0.13 * n_comments) seconds or 2.9 seconds per comment page
Args:
maximum (int, optional): Maximum number of comments to be returned. None -> No maximum
Raises:
ValueError: Invalid chapter number
IndexError: Invalid chapter number
utils.UnloadedError: Chapter isn't loaded
Returns:
list: List of comments
"""
if self.id is None:
return self._work.get_comments(maximum=maximum)
if not self.loaded:
raise utils.UnloadedError("Chapter isn't loaded. Have you tried calling Chapter.reload()?")
url = f"https://archiveofourown.org/chapters/{self.id}?page=%d&show_comments=true&view_adult=true"
soup = self.request(url%1)
pages = 0
div = soup.find("div", {"id": "comments_placeholder"})
ol = div.find("ol", {"class": "pagination actions"})
if ol is None:
pages = 1
else:
for li in ol.findAll("li"):
if li.getText().isdigit():
pages = int(li.getText())
comments = []
for page in range(pages):
if page != 0:
soup = self.request(url%(page+1))
ol = soup.find("ol", {"class": "thread"})
for li in ol.findAll("li", {"role": "article"}, recursive=False):
if maximum is not None and len(comments) >= maximum:
return comments
id_ = int(li.attrs["id"][8:])
header = li.find("h4", {"class": ("heading", "byline")})
if header is None:
author = None
else:
author = User(str(header.a.text), self._session, False)
if li.blockquote is not None:
text = li.blockquote.getText()
else:
text = ""
comment = Comment(id_, self, session=self._session, load=False)
setattr(comment, "authenticity_token", self.authenticity_token)
setattr(comment, "author", author)
setattr(comment, "text", text)
comment._thread = None
comments.append(comment)
return comments
def get_images(self):
"""Gets all images from this work
Raises:
utils.UnloadedError: Raises this error if the chapter isn't loaded
Returns:
tuple: Pairs of image urls and the paragraph number
"""
div = self._soup.find("div", {"class": "userstuff"})
images = []
line = 0
for p in div.findAll("p"):
line += 1
for img in p.findAll("img"):
if "src" in img.attrs:
images.append((img.attrs["src"], line))
return tuple(images)
@property
def loaded(self):
"""Returns True if this chapter has been loaded"""
return self._soup is not None
@property
def authenticity_token(self):
"""Token used to take actions that involve this work"""
return self.work.authenticity_token
@property
def work(self):
"""Work this chapter is a part of"""
return self._work
@cached_property
def text(self):
"""This chapter's text"""
text = ""
if self.id is not None:
div = self._soup.find("div", {"role": "article"})
else:
div = self._soup
for p in div.findAll(("p", "center")):
text += p.getText().replace("\n", "") + "\n"
if isinstance(p.next_sibling, bs4.element.NavigableString):
text += str(p.next_sibling)
return text
@cached_property
def title(self):
"""This chapter's title"""
if self.id is None:
return self.work.title
preface_group = self._soup.find("div", {"class": ("chapter", "preface", "group")})
if preface_group is None:
return str(self.number)
title = preface_group.find("h3", {"class": "title"})
if title is None:
return str(self.number)
return tuple(title.strings)[-1].strip()[2:]
@cached_property
def number(self):
"""This chapter's number"""
if self.id is None:
return 1
return int(self._soup["id"].split("-")[-1])
@cached_property
def words(self):
"""Number of words from this chapter"""
return utils.word_count(self.text)
@cached_property
def summary(self):
"""Text from this chapter's summary"""
notes = self._soup.find("div", {"id": "summary"})
if notes is None:
return ""
text = ""
for p in notes.findAll("p"):
text += p.getText() + "\n"
return text
@cached_property
def start_notes(self):
"""Text from this chapter's start notes"""
notes = self._soup.find("div", {"id": "notes"})
if notes is None:
return ""
text = ""
for p in notes.findAll("p"):
text += p.getText().strip() + "\n"
return text
@cached_property
def end_notes(self):
"""Text from this chapter's end notes"""
notes = self._soup.find("div", {"id": f"chapter_{self.number}_endnotes"})
if notes is None:
return ""
text = ""
for p in notes.findAll("p"):
text += p.getText() + "\n"
return text
@cached_property
def url(self):
"""Returns the URL to this chapter
Returns:
str: chapter URL
"""
return f"https://archiveofourown.org/works/{self._work.id}/chapters/{self.id}"
def request(self, url):
"""Request a web page and return a BeautifulSoup object.
Args:
url (str): Url to request
Returns:
bs4.BeautifulSoup: BeautifulSoup object representing the requested page's html
"""
req = self.get(url)
soup = BeautifulSoup(req.content, "lxml")
return soup
def get(self, *args, **kwargs):
"""Request a web page and return a Response object"""
if self._session is None:
req = requester.request("get", *args, **kwargs)
else:
req = requester.request("get", *args, **kwargs, session=self._session.session)
if req.status_code == 429:
raise utils.HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
return req
================================================
FILE: AO3/comments.py
================================================
from functools import cached_property
from bs4 import BeautifulSoup
from . import threadable, utils
from .requester import requester
from .users import User
class Comment:
"""
AO3 comment object
"""
def __init__(self, comment_id, parent=None, parent_comment=None, session=None, load=True):
"""Creates a new AO3 comment object
Args:
comment_id (int/str): Comment ID
parent (Work/Chapter, optional): Parent object (where the comment is posted). Defaults to None.
parent_comment (Comment, optional): Parent comment. Defaults to None.
session (Session/GuestSession, optional): Session object
load (boolean, optional): If true, the comment is loaded on initialization. Defaults to True.
"""
self.id = comment_id
self.parent = parent
self.parent_comment = parent_comment
self.authenticity_token = None
self._thread = None
self._session = session
self.__soup = None
if load:
self.reload()
def __repr__(self):
return f"<Comment [{self.id}] on [{self.parent}]>"
@property
def _soup(self):
if self.__soup is None:
if self.parent_comment is None:
return None
return self.parent_comment._soup
return self.__soup
@property
def first_parent_comment(self):
if self.parent_comment is None:
return self
else:
return self.parent_comment.first_parent_comment
@property
def fullwork(self):
from .works import Work
if self.parent is None:
return None
return isinstance(self.parent, Work)
@cached_property
def author(self):
"""Comment author"""
li = self._soup.find("li", {"id": f"comment_{self.id}"})
header = li.find("h4", {"class": ("heading", "byline")})
if header is None:
author = None
else:
author = User(str(header.a.text), self._session, False)
return author
@cached_property
def text(self):
"""Comment text"""
li = self._soup.find("li", {"id": f"comment_{self.id}"})
if li.blockquote is not None:
text = li.blockquote.getText()
else:
text = ""
return text
def get_thread(self):
"""Returns all the replies to this comment, and all subsequent replies recursively.
Also loads any parent comments this comment might have.
Raises:
utils.InvalidIdError: The specified comment_id was invalid
Returns:
list: Thread
"""
if self._thread is not None:
return self._thread
else:
if self._soup is None:
self.reload()
nav = self._soup.find("ul", {"id": f"navigation_for_comment_{self.id}"})
for li in nav.findAll("li"):
if li.getText() == "\nParent Thread\n":
id_ = int(li.a["href"].split("/")[-1])
parent = Comment(id_, session=self._session)
for comment in parent.get_thread_iterator():
if comment.id == self.id:
index = comment.parent_comment._thread.index(comment)
comment.parent_comment._thread.pop(index)
comment.parent_comment._thread.insert(index, self)
self._thread = comment._thread
self.parent_comment = comment.parent_comment
del comment
return self._thread
thread = self._soup.find("ol", {"class": "thread"})
if thread is None:
self._thread = []
return self._thread
self._get_thread(None, thread)
if self._thread is None:
self._thread = []
return self._thread
def _get_thread(self, parent, soup):
comments = soup.findAll("li", recursive=False)
l = [self] if parent is None else []
for comment in comments:
if "role" in comment.attrs:
id_ = int(comment.attrs["id"][8:])
c = Comment(id_, self.parent, session=self._session, load=False)
c.authenticity_token = self.authenticity_token
c._thread = []
if parent is not None:
c.parent_comment = parent
if comment.blockquote is not None:
text = comment.blockquote.getText()
else:
text = ""
if comment.a is not None:
author = User(comment.a.getText(), load=False)
else:
author = None
setattr(c, "text", text)
setattr(c, "author", author)
l.append(c)
else:
c.parent_comment = self
if comment.blockquote is not None:
text = comment.blockquote.getText()
else:
text = ""
if comment.a is not None:
author = User(comment.a.getText(), load=False)
else:
author = None
setattr(l[0], "text", text)
setattr(l[0], "author", author)
else:
self._get_thread(l[-1], comment.ol)
if parent is not None:
parent._thread = l
def get_thread_iterator(self):
"""Returns a generator that allows you to iterate through the entire thread
Returns:
generator: The generator object
"""
return threadIterator(self)
@threadable.threadable
def reply(self, comment_text, email="", name=""):
"""Replies to a comment.
This function is threadable.
Args:
comment_text (str): Comment text
email (str, optional): Email. Defaults to "".
name (str, optional): Name. Defaults to "".
Raises:
utils.InvalidIdError: Invalid ID
utils.UnexpectedResponseError: Unknown error
utils.PseudoError: Couldn't find a valid pseudonym to post under
utils.DuplicateCommentError: The comment you're trying to post was already posted
ValueError: Invalid name/email
ValueError: self.parent cannot be None
Returns:
requests.models.Response: Response object
"""
if self.parent is None:
raise ValueError("self.parent cannot be 'None'")
return utils.comment(self.parent, comment_text, self._session, self.fullwork, self.id, email, name)
@threadable.threadable
def reload(self):
"""Loads all comment properties
This function is threadable.
"""
from .works import Work
for attr in self.__class__.__dict__:
if isinstance(getattr(self.__class__, attr), cached_property):
if attr in self.__dict__:
delattr(self, attr)
req = self.get(f"https://archiveofourown.org/comments/{self.id}")
self.__soup = BeautifulSoup(req.content, features="lxml")
token = self.__soup.find("meta", {"name": "csrf-token"})
self.authenticity_token = token["content"]
self._thread = None
li = self._soup.find("li", {"id": f"comment_{self.id}"})
reply_link = li.find("li", {"id": f"add_comment_reply_link_{self.id}"})
if self.parent is None:
if reply_link is not None:
fields = [field.split("=") for field in reply_link.a["href"].split("?")[-1].split("&")]
for key, value in fields:
if key == "chapter_id":
self.parent = int(value)
break
self.parent_comment = None
@threadable.threadable
def delete(self):
"""Deletes this comment.
This function is threadable.
Raises:
PermissionError: You don't have permission to delete the comment
utils.AuthError: Invalid auth token
utils.UnexpectedResponseError: Unknown error
"""
utils.delete_comment(self, self._session)
def get(self, *args, **kwargs):
"""Request a web page and return a Response object"""
if self._session is None:
req = requester.request("get", *args, **kwargs)
else:
req = requester.request("get", *args, **kwargs, session=self._session.session)
if req.status_code == 429:
raise utils.HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
return req
def threadIterator(comment):
if comment.get_thread() is None or len(comment.get_thread()) == 0:
yield comment
else:
for c in comment.get_thread():
yield c
for sub in threadIterator(c):
if c != sub:
yield sub
================================================
FILE: AO3/common.py
================================================
import datetime
from . import utils
def __setifnotnone(obj, attr, value):
if value is not None:
setattr(obj, attr, value)
def get_work_from_banner(work):
#* These imports need to be here to prevent circular imports
#* (series.py would requite common.py and vice-versa)
from .series import Series
from .users import User
from .works import Work
authors = []
try:
for a in work.h4.find_all("a"):
if 'rel' in a.attrs.keys():
if "author" in a['rel']:
authors.append(User(a.string, load=False))
elif a.attrs["href"].startswith("/works"):
workname = a.string
workid = utils.workid_from_url(a['href'])
except AttributeError:
pass
new = Work(workid, load=False)
fandoms = []
try:
for a in work.find("h5", {"class": "fandoms"}).find_all("a"):
fandoms.append(a.string)
except AttributeError:
pass
warnings = []
relationships = []
characters = []
freeforms = []
try:
for a in work.find(attrs={"class": "tags"}).find_all("li"):
if "warnings" in a['class']:
warnings.append(a.text)
elif "relationships" in a['class']:
relationships.append(a.text)
elif "characters" in a['class']:
characters.append(a.text)
elif "freeforms" in a['class']:
freeforms.append(a.text)
except AttributeError:
pass
reqtags = work.find(attrs={"class": "required-tags"})
if reqtags is not None:
rating = reqtags.find(attrs={"class": "rating"})
if rating is not None:
rating = rating.text
categories = reqtags.find(attrs={"class": "category"})
if categories is not None:
categories = categories.text.split(", ")
else:
rating = categories = None
summary = work.find(attrs={"class": "userstuff summary"})
if summary is not None:
summary = summary.text
series = []
series_list = work.find(attrs={"class": "series"})
if series_list is not None:
for a in series_list.find_all("a"):
seriesid = int(a.attrs['href'].split("/")[-1])
seriesname = a.text
s = Series(seriesid, load=False)
setattr(s, "name", seriesname)
series.append(s)
stats = work.find(attrs={"class": "stats"})
if stats is not None:
language = stats.find("dd", {"class": "language"})
if language is not None:
language = language.text
words = stats.find("dd", {"class": "words"})
if words is not None:
words = words.text.replace(",", "")
if words.isdigit(): words = int(words)
else: words = None
bookmarks = stats.find("dd", {"class": "bookmarks"})
if bookmarks is not None:
bookmarks = bookmarks.text.replace(",", "")
if bookmarks.isdigit(): bookmarks = int(bookmarks)
else: bookmarks = None
chapters = stats.find("dd", {"class": "chapters"})
if chapters is not None:
chapters = chapters.text.split('/')[0].replace(",", "")
if chapters.isdigit(): chapters = int(chapters)
else: chapters = None
expected_chapters = stats.find("dd", {"class": "chapters"})
if expected_chapters is not None:
expected_chapters = expected_chapters.text.split('/')[-1].replace(",", "")
if expected_chapters.isdigit(): expected_chapters = int(expected_chapters)
else: expected_chapters = None
hits = stats.find("dd", {"class": "hits"})
if hits is not None:
hits = hits.text.replace(",", "")
if hits.isdigit(): hits = int(hits)
else: hits = None
kudos = stats.find("dd", {"class": "kudos"})
if kudos is not None:
kudos = kudos.text.replace(",", "")
if kudos.isdigit(): kudos = int(kudos)
else: kudos = None
comments = stats.find("dd", {"class": "comments"})
if comments is not None:
comments = comments.text.replace(",", "")
if comments.isdigit(): comments = int(comments)
else: comments = None
restricted = work.find("img", {"title": "Restricted"}) is not None
if chapters is None:
complete = None
else:
complete = chapters == expected_chapters
else:
language = words = bookmarks = chapters = expected_chapters = hits = restricted = complete = None
date = work.find("p", {"class": "datetime"})
if date is None:
date_updated = None
else:
date_updated = datetime.datetime.strptime(date.getText(), "%d %b %Y")
__setifnotnone(new, "authors", authors)
__setifnotnone(new, "bookmarks", bookmarks)
__setifnotnone(new, "categories", categories)
__setifnotnone(new, "nchapters", chapters)
__setifnotnone(new, "characters", characters)
__setifnotnone(new, "complete", complete)
__setifnotnone(new, "date_updated", date_updated)
__setifnotnone(new, "expected_chapters", expected_chapters)
__setifnotnone(new, "fandoms", fandoms)
__setifnotnone(new, "hits", hits)
__setifnotnone(new, "comments", comments)
__setifnotnone(new, "kudos", kudos)
__setifnotnone(new, "language", language)
__setifnotnone(new, "rating", rating)
__setifnotnone(new, "relationships", relationships)
__setifnotnone(new, "restricted", restricted)
__setifnotnone(new, "series", series)
__setifnotnone(new, "summary", summary)
__setifnotnone(new, "tags", freeforms)
__setifnotnone(new, "title", workname)
__setifnotnone(new, "warnings", warnings)
__setifnotnone(new, "words", words)
return new
def url_join(base, *args):
result = base
for arg in args:
if len(result) > 0 and not result[-1] == "/":
result += "/"
if len(arg) > 0 and arg[0] != "/":
result += arg
else:
result += arg[1:]
return result
================================================
FILE: AO3/extra.py
================================================
import functools
import os
import pathlib
import pickle
from bs4 import BeautifulSoup
from . import threadable, utils
from .requester import requester
def _download_languages():
path = os.path.dirname(__file__)
languages = []
try:
rsrc_path = os.path.join(path, "resources")
if not os.path.isdir(rsrc_path):
os.mkdir(rsrc_path)
language_path = os.path.join(rsrc_path, "languages")
if not os.path.isdir(language_path):
os.mkdir(language_path)
url = "https://archiveofourown.org/languages"
print(f"Downloading from {url}")
req = requester.request("get", url)
soup = BeautifulSoup(req.content, "lxml")
for dt in soup.find("dl", {"class": "language index group"}).findAll("dt"):
if dt.a is not None:
alias = dt.a.attrs["href"].split("/")[-1]
else:
alias = None
languages.append((dt.getText(), alias))
with open(f"{os.path.join(language_path, 'languages')}.pkl", "wb") as file:
pickle.dump(languages, file)
except AttributeError:
raise utils.UnexpectedResponseError("Couldn't download the desired resource. Do you have the latest version of ao3-api?")
print(f"Download complete ({len(languages)} languages)")
def _download_fandom(fandom_key, name):
path = os.path.dirname(__file__)
fandoms = []
try:
rsrc_path = os.path.join(path, "resources")
if not os.path.isdir(rsrc_path):
os.mkdir(rsrc_path)
fandom_path = os.path.join(rsrc_path, "fandoms")
if not os.path.isdir(fandom_path):
os.mkdir(fandom_path)
url = f"https://archiveofourown.org/media/{fandom_key}/fandoms"
print(f"Downloading from {url}")
req = requester.request("get", url)
soup = BeautifulSoup(req.content, "lxml")
for fandom in soup.find("ol", {"class": "alphabet fandom index group"}).findAll("a", {"class": "tag"}):
fandoms.append(fandom.getText())
with open(f"{os.path.join(fandom_path, name)}.pkl", "wb") as file:
pickle.dump(fandoms, file)
except AttributeError:
raise utils.UnexpectedResponseError("Couldn't download the desired resource. Do you have the latest version of ao3-api?")
print(f"Download complete ({len(fandoms)} fandoms)")
_FANDOM_RESOURCES = {
"anime_manga_fandoms": functools.partial(
_download_fandom,
"Anime%20*a*%20Manga",
"anime_manga_fandoms"),
"books_literature_fandoms": functools.partial(
_download_fandom,
"Books%20*a*%20Literature",
"books_literature_fandoms"),
"cartoons_comics_graphicnovels_fandoms": functools.partial(
_download_fandom,
"Cartoons%20*a*%20Comics%20*a*%20Graphic%20Novels",
"cartoons_comics_graphicnovels_fandoms"),
"celebrities_real_people_fandoms": functools.partial(
_download_fandom,
"Celebrities%20*a*%20Real%20People",
"celebrities_real_people_fandoms"),
"movies_fandoms": functools.partial(
_download_fandom,
"Movies",
"movies_fandoms"),
"music_bands_fandoms": functools.partial(
_download_fandom,
"Music%20*a*%20Bands",
"music_bands_fandoms"),
"other_media_fandoms": functools.partial(
_download_fandom,
"Other%20Media",
"other_media_fandoms"),
"theater_fandoms": functools.partial(
_download_fandom,
"Theater",
"theater_fandoms"),
"tvshows_fandoms": functools.partial(
_download_fandom,
"TV%20Shows",
"tvshows_fandoms"),
"videogames_fandoms": functools.partial(
_download_fandom,
"Video%20Games",
"videogames_fandoms"),
"uncategorized_fandoms": functools.partial(
_download_fandom,
"Uncategorized%20Fandoms",
"uncategorized_fandoms")
}
_LANGUAGE_RESOURCES = {
"languages": _download_languages
}
_RESOURCE_DICTS = [("fandoms", _FANDOM_RESOURCES),
("languages", _LANGUAGE_RESOURCES)]
@threadable.threadable
def download(resource):
"""Downloads the specified resource.
This function is threadable.
Args:
resource (str): Resource name
Raises:
KeyError: Invalid resource
"""
for _, resource_dict in _RESOURCE_DICTS:
if resource in resource_dict:
resource_dict[resource]()
return
raise KeyError(f"'{resource}' is not a valid resource")
def get_resources():
"""Returns a list of every resource available for download"""
d = {}
for name, resource_dict in _RESOURCE_DICTS:
d[name] = list(resource_dict.keys())
return d
def has_resource(resource):
"""Returns True if resource was already download, False otherwise"""
path = os.path.join(os.path.dirname(__file__), "resources")
return len(list(pathlib.Path(path).rglob(resource+".pkl"))) > 0
@threadable.threadable
def download_all(redownload=False):
"""Downloads every available resource.
This function is threadable."""
types = get_resources()
for rsrc_type in types:
for rsrc in types[rsrc_type]:
if redownload or not has_resource(rsrc):
download(rsrc)
@threadable.threadable
def download_all_threaded(redownload=False):
"""Downloads every available resource in parallel (about ~3.7x faster).
This function is threadable."""
threads = []
types = get_resources()
for rsrc_type in types:
for rsrc in types[rsrc_type]:
if redownload or not has_resource(rsrc):
threads.append(download(rsrc, threaded=True))
for thread in threads:
thread.join()
================================================
FILE: AO3/requester.py
================================================
import threading
import time
import requests
class Requester:
"""Requester object"""
def __init__(self, rqtw=-1, timew=60):
"""Limits the request rate to prevent HTTP 429 (rate limiting) responses.
12 request per minute seems to be the limit.
Args:
rqm (int, optional): Maximum requests per time window (-1 -> no limit). Defaults to -1.
timew (int, optional): Time window (seconds). Defaults to 60.
"""
self._requests = []
self._rqtw = rqtw
self._timew = timew
self._lock = threading.Lock()
self.total = 0
def setRQTW(self, value):
self._rqtw = value
def setTimeW(self, value):
self._timew = value
def request(self, *args, **kwargs):
"""Requests a web page once enough time has passed since the last request
Args:
session(requests.Session, optional): Session object to request with
Returns:
requests.Response: Response object
"""
# We've made a bunch of requests, time to rate limit?
if self._rqtw != -1:
with self._lock:
if len(self._requests) >= self._rqtw:
t = time.time()
# Reduce list to only requests made within the current time window
while len(self._requests):
if t-self._requests[0] >= self._timew:
self._requests.pop(0) # Older than window, forget about it
else:
break # Inside window, the rest of them must be too
# Have we used up all available requests within our window?
if len(self._requests) >= self._rqtw: # Yes
# Wait until the oldest request exits the window, giving us a slot for the new one
time.sleep(self._requests[0] + self._timew - t)
# Now outside window, drop it
self._requests.pop(0)
if self._rqtw != -1:
self._requests.append(time.time())
self.total += 1
if "session" in kwargs:
sess = kwargs["session"]
del kwargs["session"]
req = sess.request(*args, **kwargs)
else:
req = requests.request(*args, **kwargs)
return req
requester = Requester()
================================================
FILE: AO3/search.py
================================================
from math import ceil
from bs4 import BeautifulSoup
from . import threadable, utils
from .common import get_work_from_banner
from .requester import requester
from .series import Series
from .users import User
from .works import Work
DEFAULT = "_score"
BEST_MATCH = "_score"
AUTHOR = "authors_to_sort_on"
TITLE = "title_to_sort_on"
DATE_POSTED = "created_at"
DATE_UPDATED = "revised_at"
WORD_COUNT = "word_count"
RATING = "rating_ids"
HITS = "hits"
BOOKMARKS = "bookmarks_count"
COMMENTS = "comments_count"
KUDOS = "kudos_count"
DESCENDING = "desc"
ASCENDING = "asc"
class Search:
def __init__(
self,
any_field="",
title="",
author="",
single_chapter=False,
word_count=None,
language="",
fandoms="",
rating=None,
hits=None,
kudos=None,
crossovers=None,
bookmarks=None,
excluded_tags="",
comments=None,
completion_status=None,
page=1,
sort_column="",
sort_direction="",
revised_at="",
characters="",
relationships="",
tags="",
session=None):
self.any_field = any_field
self.title = title
self.author = author
self.single_chapter = single_chapter
self.word_count = word_count
self.language = language
self.fandoms = fandoms
self.characters = characters
self.relationships = relationships
self.tags = tags
self.rating = rating
self.hits = hits
self.kudos = kudos
self.crossovers = crossovers
self.bookmarks = bookmarks
self.excluded_tags = excluded_tags
self.comments = comments
self.completion_status = completion_status
self.page = page
self.sort_column = sort_column
self.sort_direction = sort_direction
self.revised_at = revised_at
self.session = session
self.results = None
self.pages = 0
self.total_results = 0
@threadable.threadable
def update(self):
"""Sends a request to the AO3 website with the defined search parameters, and updates all info.
This function is threadable.
"""
soup = search(
self.any_field, self.title, self.author, self.single_chapter,
self.word_count, self.language, self.fandoms, self.rating, self.hits,
self.kudos, self.crossovers, self.bookmarks, self.excluded_tags, self.comments, self.completion_status, self.page,
self.sort_column, self.sort_direction, self.revised_at, self.session,
self.characters, self.relationships, self.tags)
results = soup.find("ol", {"class": ("work", "index", "group")})
if results is None and soup.find("p", text="No results found. You may want to edit your search to make it less specific.") is not None:
self.results = []
self.total_results = 0
self.pages = 0
return
works = []
for work in results.find_all("li", {"role": "article"}):
if work.h4 is None:
continue
new = get_work_from_banner(work)
new._session = self.session
works.append(new)
self.results = works
maindiv = soup.find("div", {"class": "works-search region", "id": "main"})
self.total_results = int(maindiv.find("h3", {"class": "heading"}).getText().replace(',','').replace('.','').strip().split(" ")[0])
self.pages = ceil(self.total_results / 20)
def search(
any_field="",
title="",
author="",
single_chapter=False,
word_count=None,
language="",
fandoms="",
rating=None,
hits=None,
kudos=None,
crossovers=None,
bookmarks=None,
excluded_tags="",
comments=None,
completion_status=None,
page=1,
sort_column="",
sort_direction="",
revised_at="",
session=None,
characters="",
relationships="",
tags=""):
"""Returns the results page for the search as a Soup object
Args:
any_field (str, optional): Generic search. Defaults to "".
title (str, optional): Title of the work. Defaults to "".
author (str, optional): Authors of the work. Defaults to "".
single_chapter (bool, optional): Only include one-shots. Defaults to False.
word_count (AO3.utils.Constraint, optional): Word count. Defaults to None.
language (str, optional): Work language. Defaults to "".
fandoms (str, optional): Fandoms included in the work. Defaults to "".
characters (str, optional): Characters included in the work. Defaults to "".
relationships (str, optional): Relationships included in the work. Defaults to "".
tags (str, optional): Additional tags applied to the work. Defaults to "".
rating (int, optional): Rating for the work. 9 for Not Rated, 10 for General Audiences, 11 for Teen And Up Audiences, 12 for Mature, 13 for Explicit. Defaults to None.
hits (AO3.utils.Constraint, optional): Number of hits. Defaults to None.
kudos (AO3.utils.Constraint, optional): Number of kudos. Defaults to None.
crossovers (bool, optional): If specified, if false, exclude crossovers, if true, include only crossovers
bookmarks (AO3.utils.Constraint, optional): Number of bookmarks. Defaults to None.
excluded_tags (str, optional): Tags to exclude. Defaults to "".
comments (AO3.utils.Constraint, optional): Number of comments. Defaults to None.
page (int, optional): Page number. Defaults to 1.
sort_column (str, optional): Which column to sort on. Defaults to "".
sort_direction (str, optional): Which direction to sort. Defaults to "".
revised_at (str, optional): Show works older / more recent than this date. Defaults to "".
session (AO3.Session, optional): Session object. Defaults to None.
Returns:
bs4.BeautifulSoup: Search result's soup
"""
query = utils.Query()
query.add_field(f"work_search[query]={any_field if any_field != '' else ' '}")
if page != 1:
query.add_field(f"page={page}")
if title != "":
query.add_field(f"work_search[title]={title}")
if author != "":
query.add_field(f"work_search[creators]={author}")
if single_chapter:
query.add_field(f"work_search[single_chapter]=1")
if word_count is not None:
query.add_field(f"work_search[word_count]={word_count}")
if language != "":
query.add_field(f"work_search[language_id]={language}")
if fandoms != "":
query.add_field(f"work_search[fandom_names]={fandoms}")
if characters != "":
query.add_field(f"work_search[character_names]={characters}")
if relationships != "":
query.add_field(f"work_search[relationship_names]={relationships}")
if tags != "":
query.add_field(f"work_search[freeform_names]={tags}")
if rating is not None:
query.add_field(f"work_search[rating_ids]={rating}")
if hits is not None:
query.add_field(f"work_search[hits]={hits}")
if kudos is not None:
query.add_field(f"work_search[kudos_count]={kudos}")
if crossovers is not None:
query.add_field(f"work_search[crossover]={'T' if crossovers else 'F'}")
if bookmarks is not None:
query.add_field(f"work_search[bookmarks_count]={bookmarks}")
if excluded_tags != "":
query.add_field(f"work_search[excluded_tag_names]={excluded_tags}")
if comments is not None:
query.add_field(f"work_search[comments_count]={comments}")
if completion_status is not None:
query.add_field(f"work_search[complete]={'T' if completion_status else 'F'}")
if sort_column != "":
query.add_field(f"work_search[sort_column]={sort_column}")
if sort_direction != "":
query.add_field(f"work_search[sort_direction]={sort_direction}")
if revised_at != "":
query.add_field(f"work_search[revised_at]={revised_at}")
url = f"https://archiveofourown.org/works/search?{query.string}"
if session is None:
req = requester.request("get", url)
else:
req = session.get(url)
if req.status_code == 429:
raise utils.HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
soup = BeautifulSoup(req.content, features="lxml")
return soup
================================================
FILE: AO3/series.py
================================================
from datetime import date
from functools import cached_property
from bs4 import BeautifulSoup
from . import threadable, utils
from .common import get_work_from_banner
from .requester import requester
from .users import User
from .works import Work
class Series:
def __init__(self, seriesid, session=None, load=True):
"""Creates a new series object
Args:
seriesid (int/str): ID of the series
session (AO3.Session, optional): Session object. Defaults to None.
load (bool, optional): If true, the work is loaded on initialization. Defaults to True.
Raises:
utils.InvalidIdError: Invalid series ID
"""
self.id = seriesid
self._session = session
self._soup = None
if load:
self.reload()
def __eq__(self, other):
return isinstance(other, __class__) and other.id == self.id
def __repr__(self):
try:
return f"<Series [{self.name}]>"
except:
return f"<Series [{self.id}]>"
def __getstate__(self):
d = {}
for attr in self.__dict__:
if isinstance(self.__dict__[attr], BeautifulSoup):
d[attr] = (self.__dict__[attr].encode(), True)
else:
d[attr] = (self.__dict__[attr], False)
return d
def __setstate__(self, d):
for attr in d:
value, issoup = d[attr]
if issoup:
self.__dict__[attr] = BeautifulSoup(value, "lxml")
else:
self.__dict__[attr] = value
def set_session(self, session):
"""Sets the session used to make requests for this series
Args:
session (AO3.Session/AO3.GuestSession): session object
"""
self._session = session
@threadable.threadable
def reload(self):
"""
Loads information about this series.
This function is threadable.
"""
for attr in self.__class__.__dict__:
if isinstance(getattr(self.__class__, attr), cached_property):
if attr in self.__dict__:
delattr(self, attr)
self._soup = self.request(f"https://archiveofourown.org/series/{self.id}")
if "Error 404" in self._soup.text:
raise utils.InvalidIdError("Cannot find series")
@threadable.threadable
def subscribe(self):
"""Subscribes to this series.
This function is threadable.
Raises:
utils.AuthError: Invalid session
"""
if self._session is None or not self._session.is_authed:
raise utils.AuthError("You can only subscribe to a series using an authenticated session")
utils.subscribe(self, "Series", self._session)
@threadable.threadable
def unsubscribe(self):
"""Unubscribes from this series.
This function is threadable.
Raises:
utils.AuthError: Invalid session
"""
if not self.is_subscribed:
raise Exception("You are not subscribed to this series")
if self._session is None or not self._session.is_authed:
raise utils.AuthError("You can only unsubscribe from a series using an authenticated session")
utils.subscribe(self, "Series", self._session, True, self._sub_id)
@threadable.threadable
def bookmark(self, notes="", tags=None, collections=None, private=False, recommend=False, pseud=None):
"""Bookmarks this series
This function is threadable
Args:
notes (str, optional): Bookmark notes. Defaults to "".
tags (list, optional): What tags to add. Defaults to None.
collections (list, optional): What collections to add this bookmark to. Defaults to None.
private (bool, optional): Whether this bookmark should be private. Defaults to False.
recommend (bool, optional): Whether to recommend this bookmark. Defaults to False.
pseud (str, optional): What pseud to add the bookmark under. Defaults to default pseud.
Raises:
utils.UnloadedError: Series isn't loaded
utils.AuthError: Invalid session
"""
if not self.loaded:
raise utils.UnloadedError("Series isn't loaded. Have you tried calling Series.reload()?")
if self._session is None:
raise utils.AuthError("Invalid session")
utils.bookmark(self, self._session, notes, tags, collections, private, recommend, pseud)
@threadable.threadable
def delete_bookmark(self):
"""Removes a bookmark from this series
This function is threadable
Raises:
utils.UnloadedError: Series isn't loaded
utils.AuthError: Invalid session
"""
if not self.loaded:
raise utils.UnloadedError("Series isn't loaded. Have you tried calling Series.reload()?")
if self._session is None:
raise utils.AuthError("Invalid session")
if self._bookmarkid is None:
raise utils.BookmarkError("You don't have a bookmark here")
utils.delete_bookmark(self._bookmarkid, self._session, self.authenticity_token)
@cached_property
def _bookmarkid(self):
form_div = self._soup.find("div", {"id": "bookmark-form"})
if form_div is None:
return None
if form_div.form is None:
return None
if "action" in form_div.form and form_div.form["action"].startswith("/bookmark"):
text = form_div.form["action"].split("/")[-1]
if text.isdigit():
return int(text)
return None
return None
@cached_property
def url(self):
"""Returns the URL to this series
Returns:
str: series URL
"""
return f"https://archiveofourown.org/series/{self.id}"
@property
def loaded(self):
"""Returns True if this series has been loaded"""
return self._soup is not None
@cached_property
def authenticity_token(self):
"""Token used to take actions that involve this work"""
if not self.loaded:
return None
token = self._soup.find("meta", {"name": "csrf-token"})
return token["content"]
@cached_property
def is_subscribed(self):
"""True if you're subscribed to this series"""
if self._session is None or not self._session.is_authed:
raise utils.AuthError("You can only get a series ID using an authenticated session")
form = self._soup.find("form", {"data-create-value": "Subscribe"})
input_ = form.find("input", {"name": "commit", "value": "Unsubscribe"})
return input_ is not None
@cached_property
def _sub_id(self):
"""Returns the subscription ID. Used for unsubscribing"""
if not self.is_subscribed:
raise Exception("You are not subscribed to this series")
form = self._soup.find("form", {"data-create-value": "Subscribe"})
id_ = form.attrs["action"].split("/")[-1]
return int(id_)
@cached_property
def name(self):
div = self._soup.find("div", {"class": "series-show region"})
return div.h2.getText().replace("\t", "").replace("\n", "")
@cached_property
def creators(self):
dl = self._soup.find("dl", {"class": "series meta group"})
return [User(author.getText(), load=False) for author in dl.findAll("a", {"rel": "author"})]
@cached_property
def series_begun(self):
dl = self._soup.find("dl", {"class": "series meta group"})
info = dl.findAll(("dd", "dt"))
last_dt = None
for field in info:
if field.name == "dt":
last_dt = field.getText().strip()
elif last_dt == "Series Begun:":
date_str = field.getText().strip()
break
return date(*list(map(int, date_str.split("-"))))
@cached_property
def series_updated(self):
dl = self._soup.find("dl", {"class": "series meta group"})
info = dl.findAll(("dd", "dt"))
last_dt = None
for field in info:
if field.name == "dt":
last_dt = field.getText().strip()
elif last_dt == "Series Updated:":
date_str = field.getText().strip()
break
return date(*list(map(int, date_str.split("-"))))
@cached_property
def words(self):
dl = self._soup.find("dl", {"class": "series meta group"})
stats = dl.find("dl", {"class": "stats"}).findAll(("dd", "dt"))
last_dt = None
for field in stats:
if field.name == "dt":
last_dt = field.getText().strip()
elif last_dt == "Words:":
words = field.getText().strip()
break
return int(words.replace(",", ""))
@cached_property
def nworks(self):
dl = self._soup.find("dl", {"class": "series meta group"})
stats = dl.find("dl", {"class": "stats"}).findAll(("dd", "dt"))
last_dt = None
for field in stats:
if field.name == "dt":
last_dt = field.getText().strip()
elif last_dt == "Works:":
works = field.getText().strip()
break
return int(works.replace(",", ""))
@cached_property
def complete(self):
dl = self._soup.find("dl", {"class": "series meta group"})
stats = dl.find("dl", {"class": "stats"}).findAll(("dd", "dt"))
last_dt = None
for field in stats:
if field.name == "dt":
last_dt = field.getText().strip()
elif last_dt == "Complete:":
complete = field.getText().strip()
break
return True if complete == "Yes" else False
@cached_property
def description(self):
dl = self._soup.find("dl", {"class": "series meta group"})
info = dl.findAll(("dd", "dt"))
last_dt = None
desc = ""
for field in info:
if field.name == "dt":
last_dt = field.getText().strip()
elif last_dt == "Description:":
desc = field.getText().strip()
break
return desc
@cached_property
def notes(self):
dl = self._soup.find("dl", {"class": "series meta group"})
info = dl.findAll(("dd", "dt"))
last_dt = None
notes = ""
for field in info:
if field.name == "dt":
last_dt = field.getText().strip()
elif last_dt == "Notes:":
notes = field.getText().strip()
break
return notes
@cached_property
def nbookmarks(self):
dl = self._soup.find("dl", {"class": "series meta group"})
stats = dl.find("dl", {"class": "stats"}).findAll(("dd", "dt"))
last_dt = None
book = "0"
for field in stats:
if field.name == "dt":
last_dt = field.getText().strip()
elif last_dt == "Bookmarks:":
book = field.getText().strip()
break
return int(book.replace(",", ""))
@cached_property
def work_list(self):
ul = self._soup.find("ul", {"class": "series work index group"})
works = []
for work in ul.find_all("li", {"role": "article"}):
if work.h4 is None:
continue
works.append(get_work_from_banner(work))
# authors = []
# if work.h4 is None:
# continue
# for a in work.h4.find_all("a"):
# if "rel" in a.attrs.keys():
# if "author" in a["rel"]:
# authors.append(User(a.string, load=False))
# elif a.attrs["href"].startswith("/works"):
# workname = a.string
# workid = utils.workid_from_url(a["href"])
# new = Work(workid, load=False)
# setattr(new, "title", workname)
# setattr(new, "authors", authors)
# works.append(new)
return works
def get(self, *args, **kwargs):
"""Request a web page and return a Response object"""
if self._session is None:
req = requester.request("get", *args, **kwargs)
else:
req = requester.request("get", *args, **kwargs, session=self._session.session)
if req.status_code == 429:
raise utils.HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
return req
def request(self, url):
"""Request a web page and return a BeautifulSoup object.
Args:
url (str): Url to request
Returns:
bs4.BeautifulSoup: BeautifulSoup object representing the requested page's html
"""
req = self.get(url)
soup = BeautifulSoup(req.content, "lxml")
return soup
================================================
FILE: AO3/session.py
================================================
import datetime
import re
import time
from functools import cached_property
import requests
from bs4 import BeautifulSoup
from . import threadable, utils
from .requester import requester
from .series import Series
from .users import User
from .works import Work
class GuestSession:
"""
AO3 guest session object
"""
def __init__(self):
self.is_authed = False
self.authenticity_token = None
self.username = ""
self.session = requests.Session()
@property
def user(self):
return User(self.username, self, False)
@threadable.threadable
def comment(self, commentable, comment_text, oneshot=False, commentid=None):
"""Leaves a comment on a specific work.
This function is threadable.
Args:
commentable (Work/Chapter): Commentable object
comment_text (str): Comment text (must have between 1 and 10000 characters)
oneshot (bool): Should be True if the work has only one chapter. In this case, chapterid becomes workid
commentid (str/int): If specified, the comment is posted as a reply to this one. Defaults to None.
Raises:
utils.InvalidIdError: Invalid ID
utils.UnexpectedResponseError: Unknown error
utils.PseudoError: Couldn't find a valid pseudonym to post under
utils.DuplicateCommentError: The comment you're trying to post was already posted
ValueError: Invalid name/email
Returns:
requests.models.Response: Response object
"""
response = utils.comment(commentable, comment_text, self, oneshot, commentid)
return response
@threadable.threadable
def kudos(self, work):
"""Leave a 'kudos' in a specific work.
This function is threadable.
Args:
work (Work): ID of the work
Raises:
utils.UnexpectedResponseError: Unexpected response received
utils.InvalidIdError: Invalid ID (work doesn't exist)
Returns:
bool: True if successful, False if you already left kudos there
"""
return utils.kudos(work, self)
@threadable.threadable
def refresh_auth_token(self):
"""Refreshes the authenticity token.
This function is threadable.
Raises:
utils.UnexpectedResponseError: Couldn't refresh the token
"""
# For some reason, the auth token in the root path only works if you're
# unauthenticated. To get around that, we check if this is an authed
# session and, if so, get the token from the profile page.
if self.is_authed:
req = self.session.get(f"https://archiveofourown.org/users/{self.username}")
else:
req = self.session.get("https://archiveofourown.org")
if req.status_code == 429:
raise utils.HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
soup = BeautifulSoup(req.content, "lxml")
token = soup.find("input", {"name": "authenticity_token"})
if token is None:
raise utils.UnexpectedResponseError("Couldn't refresh token")
self.authenticity_token = token.attrs["value"]
def get(self, *args, **kwargs):
"""Request a web page and return a Response object"""
if self.session is None:
req = requester.request("get", *args, **kwargs)
else:
req = requester.request("get", *args, **kwargs, session=self.session)
if req.status_code == 429:
raise utils.HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
return req
def request(self, url):
"""Request a web page and return a BeautifulSoup object.
Args:
url (str): Url to request
Returns:
bs4.BeautifulSoup: BeautifulSoup object representing the requested page's html
"""
req = self.get(url)
soup = BeautifulSoup(req.content, "lxml")
return soup
def post(self, *args, **kwargs):
"""Make a post request with the current session
Returns:
requests.Request
"""
req = self.session.post(*args, **kwargs)
if req.status_code == 429:
raise utils.HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
return req
def __del__(self):
self.session.close()
class Session(GuestSession):
"""
AO3 session object
"""
def __init__(self, username, password):
"""Creates a new AO3 session object
Args:
username (str): AO3 username
password (str): AO3 password
Raises:
utils.LoginError: Login was unsucessful (wrong username or password)
"""
super().__init__()
self.is_authed = True
self.username = username
self.url = "https://archiveofourown.org/users/%s"%self.username
self.session = requests.Session()
soup = self.request("https://archiveofourown.org/users/login")
self.authenticity_token = soup.find("input", {"name": 'authenticity_token'})["value"]
payload = {'user[login]': username,
'user[password]': password,
'authenticity_token': self.authenticity_token}
post = self.post("https://archiveofourown.org/users/login", params=payload, allow_redirects=False)
if not post.status_code == 302:
raise utils.LoginError("Invalid username or password")
self._subscriptions_url = "https://archiveofourown.org/users/{0}/subscriptions?page={1:d}"
self._bookmarks_url = "https://archiveofourown.org/users/{0}/bookmarks?page={1:d}"
self._history_url = "https://archiveofourown.org/users/{0}/readings?page={1:d}"
self._bookmarks = None
self._subscriptions = None
self._history = None
def __getstate__(self):
d = {}
for attr in self.__dict__:
if isinstance(self.__dict__[attr], BeautifulSoup):
d[attr] = (self.__dict__[attr].encode(), True)
else:
d[attr] = (self.__dict__[attr], False)
return d
def __setstate__(self, d):
for attr in d:
value, issoup = d[attr]
if issoup:
self.__dict__[attr] = BeautifulSoup(value, "lxml")
else:
self.__dict__[attr] = value
def clear_cache(self):
for attr in self.__class__.__dict__:
if isinstance(getattr(self.__class__, attr), cached_property):
if attr in self.__dict__:
delattr(self, attr)
self._bookmarks = None
self._subscriptions = None
@cached_property
def _subscription_pages(self):
url = self._subscriptions_url.format(self.username, 1)
soup = self.request(url)
pages = soup.find("ol", {"title": "pagination"})
if pages is None:
return 1
n = 1
for li in pages.findAll("li"):
text = li.getText()
if text.isdigit():
n = int(text)
return n
def get_work_subscriptions(self, use_threading=False):
"""
Get subscribed works. Loads them if they haven't been previously
Returns:
list: List of work subscriptions
"""
subs = self.get_subscriptions(use_threading)
return list(filter(lambda obj: isinstance(obj, Work), subs))
def get_series_subscriptions(self, use_threading=False):
"""
Get subscribed series. Loads them if they haven't been previously
Returns:
list: List of series subscriptions
"""
subs = self.get_subscriptions(use_threading)
return list(filter(lambda obj: isinstance(obj, Series), subs))
def get_user_subscriptions(self, use_threading=False):
"""
Get subscribed users. Loads them if they haven't been previously
Returns:
list: List of users subscriptions
"""
subs = self.get_subscriptions(use_threading)
return list(filter(lambda obj: isinstance(obj, User), subs))
def get_subscriptions(self, use_threading=False):
"""
Get user's subscriptions. Loads them if they haven't been previously
Returns:
list: List of subscriptions
"""
if self._subscriptions is None:
if use_threading:
self.load_subscriptions_threaded()
else:
self._subscriptions = []
for page in range(self._subscription_pages):
self._load_subscriptions(page=page+1)
return self._subscriptions
@threadable.threadable
def load_subscriptions_threaded(self):
"""
Get subscribed works using threads.
This function is threadable.
"""
threads = []
self._subscriptions = []
for page in range(self._subscription_pages):
threads.append(self._load_subscriptions(page=page+1, threaded=True))
for thread in threads:
thread.join()
@threadable.threadable
def _load_subscriptions(self, page=1):
url = self._subscriptions_url.format(self.username, page)
soup = self.request(url)
subscriptions = soup.find("dl", {"class": "subscription index group"})
for sub in subscriptions.find_all("dt"):
type_ = "work"
user = None
series = None
workid = None
workname = None
authors = []
for a in sub.find_all("a"):
if "rel" in a.attrs.keys():
if "author" in a["rel"]:
authors.append(User(str(a.string), load=False))
elif a["href"].startswith("/works"):
workname = str(a.string)
workid = utils.workid_from_url(a["href"])
elif a["href"].startswith("/users"):
type_ = "user"
user = User(str(a.string), load=False)
else:
type_ = "series"
workname = str(a.string)
series = int(a["href"].split("/")[-1])
if type_ == "work":
new = Work(workid, load=False)
setattr(new, "title", workname)
setattr(new, "authors", authors)
self._subscriptions.append(new)
elif type_ == "user":
self._subscriptions.append(user)
elif type_ == "series":
new = Series(series, load=False)
setattr(new, "name", workname)
setattr(new, "authors", authors)
self._subscriptions.append(new)
@cached_property
def _history_pages(self):
url = self._history_url.format(self.username, 1)
soup = self.request(url)
pages = soup.find("ol", {"title": "pagination"})
if pages is None:
return 1
n = 1
for li in pages.findAll("li"):
text = li.getText()
if text.isdigit():
n = int(text)
return n
def get_history(self, hist_sleep=3, start_page=0, max_pages=None, timeout_sleep=60):
"""
Get history works. Loads them if they haven't been previously.
Arguments:
hist_sleep (int to sleep between requests)
start_page (int for page to start on, zero-indexed)
max_pages (int for page to end on, zero-indexed)
timeout_sleep (int, if set will attempt to recovery from http errors, likely timeouts, if set to None will just attempt to load)
takes two arguments the first hist_sleep is an int and is a sleep to run between pages of history to load to avoid hitting the rate limiter, the second is an int of the maximum number of pages of history to load, by default this is None so loads them all.
Returns:
list: List of tuples (Work, number-of-visits, datetime-last-visited)
"""
if self._history is None:
self._history = []
for page in range(start_page, self._history_pages):
# If we are attempting to recover from errors then
# catch and loop, otherwise just call and go
if timeout_sleep is None:
self._load_history(page=page+1)
else:
loaded=False
while loaded == False:
try:
self._load_history(page=page+1)
# print(f"Read history page {page+1}")
loaded = True
except utils.HTTPError:
# print(f"History being rate limited, sleeping for {timeout_sleep} seconds")
time.sleep(timeout_sleep)
# Check for maximum history page load
if max_pages is not None and page >= max_pages:
return self._history
# Again attempt to avoid rate limiter, sleep for a few
# seconds between page requests.
if hist_sleep is not None and hist_sleep > 0:
time.sleep(hist_sleep)
return self._history
def _load_history(self, page=1):
url = self._history_url.format(self.username, page)
soup = self.request(url)
history = soup.find("ol", {"class": "reading work index group"})
for item in history.find_all("li", {"role": "article"}):
# authors = []
workname = None
workid = None
for a in item.h4.find_all("a"):
if a.attrs["href"].startswith("/works"):
workname = str(a.string)
workid = utils.workid_from_url(a["href"])
visited_date = None
visited_num = 1
for viewed in item.find_all("h4", {"class": "viewed heading" }):
data_string = str(viewed)
date_str = re.search('<span>Last visited:</span> (\d{2} .+ \d{4})', data_string)
if date_str is not None:
raw_date = date_str.group(1)
date_time_obj = datetime.datetime.strptime(date_str.group(1), '%d %b %Y')
visited_date = date_time_obj
visited_str = re.search('Visited (\d+) times', data_string)
if visited_str is not None:
visited_num = int(visited_str.group(1))
if workname != None and workid != None:
new = Work(workid, load=False)
setattr(new, "title", workname)
# setattr(new, "authors", authors)
hist_item = [ new, visited_num, visited_date ]
# print(hist_item)
if new not in self._history:
self._history.append(hist_item)
@cached_property
def _bookmark_pages(self):
url = self._bookmarks_url.format(self.username, 1)
soup = self.request(url)
pages = soup.find("ol", {"title": "pagination"})
if pages is None:
return 1
n = 1
for li in pages.findAll("li"):
text = li.getText()
if text.isdigit():
n = int(text)
return n
def get_bookmarks(self, use_threading=False):
"""
Get bookmarked works. Loads them if they haven't been previously
Returns:
list: List of tuples (workid, workname, authors)
"""
if self._bookmarks is None:
if use_threading:
self.load_bookmarks_threaded()
else:
self._bookmarks = []
for page in range(self._bookmark_pages):
self._load_bookmarks(page=page+1)
return self._bookmarks
@threadable.threadable
def load_bookmarks_threaded(self):
"""
Get bookmarked works using threads.
This function is threadable.
"""
threads = []
self._bookmarks = []
for page in range(self._bookmark_pages):
threads.append(self._load_bookmarks(page=page+1, threaded=True))
for thread in threads:
thread.join()
@threadable.threadable
def _load_bookmarks(self, page=1):
url = self._bookmarks_url.format(self.username, page)
soup = self.request(url)
bookmarks = soup.find("ol", {"class": "bookmark index group"})
for bookm in bookmarks.find_all("li", {"class": ["bookmark", "index", "group"]}):
authors = []
recommended = False
workid = -1
if bookm.h4 is not None:
for a in bookm.h4.find_all("a"):
if "rel" in a.attrs.keys():
if "author" in a["rel"]:
authors.append(User(str(a.string), load=False))
elif a.attrs["href"].startswith("/works"):
workname = str(a.string)
workid = utils.workid_from_url(a["href"])
# Get whether the bookmark is recommended
for span in bookm.p.find_all("span"):
if "title" in span.attrs.keys():
if span["title"] == "Rec":
recommended = True
if workid != -1:
new = Work(workid, load=False)
setattr(new, "title", workname)
setattr(new, "authors", authors)
setattr(new, "recommended", recommended)
if new not in self._bookmarks:
self._bookmarks.append(new)
@cached_property
def bookmarks(self):
"""Get the number of your bookmarks.
Must be logged in to use.
Returns:
int: Number of bookmarks
"""
url = self._bookmarks_url.format(self.username, 1)
soup = self.request(url)
div = soup.find("div", {"class": "bookmarks-index dashboard filtered region"})
h2 = div.h2.text.split()
return int(h2[4].replace(',', ''))
def get_statistics(self, year=None):
year = "All+Years" if year is None else str(year)
url = f"https://archiveofourown.org/users/{self.username}/stats?year={year}"
soup = self.request(url)
stats = {}
dt = soup.find("dl", {"class": "statistics meta group"})
if dt is not None:
for field in dt.findAll("dt"):
name = field.getText()[:-1].lower().replace(" ", "_")
if field.next_sibling is not None and field.next_sibling.next_sibling is not None:
value = field.next_sibling.next_sibling.getText().replace(",", "")
if value.isdigit():
stats[name] = int(value)
return stats
@staticmethod
def str_format(string):
"""Formats a given string
Args:
string (str): String to format
Returns:
str: Formatted string
"""
return string.replace(",", "")
def get_marked_for_later(self, sleep=1, timeout_sleep=60):
"""
Gets every marked for later work
Arguments:
sleep (int): The time to wait between page requests
timeout_sleep (int): The time to wait after the rate limit is hit
Returns:
works (list): All marked for later works
"""
pageRaw = self.request(f"https://archiveofourown.org/users/{self.username}/readings?page=1&show=to-read").find("ol", {"class": "pagination actions"}).find_all("li")
maxPage = int(pageRaw[len(pageRaw)-2].text)
works = []
for page in range(maxPage):
grabbed = False
while grabbed == False:
try:
workPage = self.request(f"https://archiveofourown.org/users/{self.username}/readings?page={page+1}&show=to-read")
worksRaw = workPage.find_all("li", {"role": "article"})
for work in worksRaw:
try:
workId = int(work.h4.a.get("href").split("/")[2])
works.append(Work(workId, session=self, load=False))
except AttributeError:
pass
grabbed = True
except utils.HTTPError:
time.sleep(timeout_sleep)
time.sleep(sleep)
return works
================================================
FILE: AO3/threadable.py
================================================
import threading
def threadable(func):
"""Allows the function to be ran as a thread using the 'threaded' argument"""
def new(*args, threaded=False, **kwargs):
if threaded:
thread = threading.Thread(target=func, args=args, kwargs=kwargs)
thread.start()
return thread
else:
return func(*args, **kwargs)
new.__doc__ = func.__doc__
new.__name__ = func.__name__
new._threadable = True
return new
class ThreadPool:
def __init__(self, maximum=None):
self.maximum = maximum
self._tasks = []
self._threads = []
def add_task(self, task):
self._tasks.append(task)
@threadable
def start(self):
while len(self._threads) != 0 or len(self._tasks) != 0:
self._threads[:] = filter(lambda thread: thread.is_alive(), self._threads)
for _ in range(min(self.maximum-len(self._threads), len(self._tasks))):
self._threads.append(self._tasks.pop(0)(threaded=True))
================================================
FILE: AO3/users.py
================================================
import datetime
from functools import cached_property
import requests
from bs4 import BeautifulSoup
from . import threadable, utils
from .common import get_work_from_banner
from .requester import requester
class User:
"""
AO3 user object
"""
def __init__(self, username, session=None, load=True):
"""Creates a new AO3 user object
Args:
username (str): AO3 username
session (AO3.Session, optional): Used to access additional info
load (bool, optional): If true, the user is loaded on initialization. Defaults to True.
"""
self.username = username
self._session = session
self._soup_works = None
self._soup_profile = None
self._soup_bookmarks = None
self._works = None
self._bookmarks = None
if load:
self.reload()
def __repr__(self):
return f"<User [{self.username}]>"
def __eq__(self, other):
return isinstance(other, __class__) and other.username == self.username
def __getstate__(self):
d = {}
for attr in self.__dict__:
if isinstance(self.__dict__[attr], BeautifulSoup):
d[attr] = (self.__dict__[attr].encode(), True)
else:
d[attr] = (self.__dict__[attr], False)
return d
def __setstate__(self, d):
for attr in d:
value, issoup = d[attr]
if issoup:
self.__dict__[attr] = BeautifulSoup(value, "lxml")
else:
self.__dict__[attr] = value
def set_session(self, session):
"""Sets the session used to make requests for this work
Args:
session (AO3.Session/AO3.GuestSession): session object
"""
self._session = session
@threadable.threadable
def reload(self):
"""
Loads information about this user.
This function is threadable.
"""
for attr in self.__class__.__dict__:
if isinstance(getattr(self.__class__, attr), cached_property):
if attr in self.__dict__:
delattr(self, attr)
@threadable.threadable
def req_works(username):
self._soup_works = self.request(f"https://archiveofourown.org/users/{username}/works")
token = self._soup_works.find("meta", {"name": "csrf-token"})
setattr(self, "authenticity_token", token["content"])
@threadable.threadable
def req_profile(username):
self._soup_profile = self.request(f"https://archiveofourown.org/users/{username}/profile")
token = self._soup_profile.find("meta", {"name": "csrf-token"})
setattr(self, "authenticity_token", token["content"])
@threadable.threadable
def req_bookmarks(username):
self._soup_bookmarks = self.request(f"https://archiveofourown.org/users/{username}/bookmarks")
token = self._soup_bookmarks.find("meta", {"name": "csrf-token"})
setattr(self, "authenticity_token", token["content"])
rs = [req_works(self.username, threaded=True),
req_profile(self.username, threaded=True),
req_bookmarks(self.username, threaded=True)]
for r in rs:
r.join()
self._works = None
self._bookmarks = None
def get_avatar(self):
"""Returns a tuple containing the name of the file and its data
Returns:
tuple: (name: str, img: bytes)
"""
icon = self._soup_profile.find("p", {"class": "icon"})
src = icon.img.attrs["src"]
name = src.split("/")[-1].split("?")[0]
img = self.get(src).content
return name, img
@threadable.threadable
def subscribe(self):
"""Subscribes to this user.
This function is threadable.
Raises:
utils.AuthError: Invalid session
"""
if self._session is None or not self._session.is_authed:
raise utils.AuthError("You can only subscribe to a user using an authenticated session")
utils.subscribe(self, "User", self._session)
@threadable.threadable
def unsubscribe(self):
"""Unubscribes from this user.
This function is threadable.
Raises:
utils.AuthError: Invalid session
"""
if not self.is_subscribed:
raise Exception("You are not subscribed to this user")
if self._session is None or not self._session.is_authed:
raise utils.AuthError("You can only unsubscribe from a user using an authenticated session")
utils.subscribe(self, "User", self._session, True, self._sub_id)
@property
def id(self):
id_ = self._soup_profile.find("input", {"id": "subscription_subscribable_id"})
return int(id_["value"]) if id_ is not None else None
@cached_property
def is_subscribed(self):
"""True if you're subscribed to this user"""
if self._session is None or not self._session.is_authed:
raise utils.AuthError("You can only get a user ID using an authenticated session")
header = self._soup_profile.find("div", {"class": "primary header module"})
input_ = header.find("input", {"name": "commit", "value": "Unsubscribe"})
return input_ is not None
@property
def loaded(self):
"""Returns True if this user has been loaded"""
return self._soup_profile is not None
# @cached_property
# def authenticity_token(self):
# """Token used to take actions that involve this user"""
# if not self.loaded:
# return None
# token = self._soup_profile.find("meta", {"name": "csrf-token"})
# return token["content"]
@cached_property
def user_id(self):
if self._session is None or not self._session.is_authed:
raise utils.AuthError("You can only get a user ID using an authenticated session")
header = self._soup_profile.find("div", {"class": "primary header module"})
input_ = header.find("input", {"name": "subscription[subscribable_id]"})
if input_ is None:
raise utils.UnexpectedResponseError("Couldn't fetch user ID")
return int(input_.attrs["value"])
@cached_property
def _sub_id(self):
"""Returns the subscription ID. Used for unsubscribing"""
if not self.is_subscribed:
raise Exception("You are not subscribed to this user")
header = self._soup_profile.find("div", {"class": "primary header module"})
id_ = header.form.attrs["action"].split("/")[-1]
return int(id_)
@cached_property
def works(self):
"""Returns the number of works authored by this user
Returns:
int: Number of works
"""
div = self._soup_works.find("div", {"class": "works-index dashboard filtered region"})
h2 = div.h2.text.split()
return int(h2[4].replace(',', ''))
@cached_property
def _works_pages(self):
pages = self._soup_works.find("ol", {"title": "pagination"})
if pages is None:
return 1
n = 1
for li in pages.findAll("li"):
text = li.getText()
if text.isdigit():
n = int(text)
return n
def get_works(self, use_threading=False):
"""
Get works authored by this user.
Returns:
list: List of works
"""
if self._works is None:
if use_threading:
self.load_works_threaded()
else:
self._works = []
for page in range(self._works_pages):
self._load_works(page=page+1)
return self._works
@threadable.threadable
def load_works_threaded(self):
"""
Get the user's works using threads.
This function is threadable.
"""
threads = []
self._works = []
for page in range(self._works_pages):
threads.append(self._load_works(page=page+1, threaded=True))
for thread in threads:
thread.join()
@threadable.threadable
def _load_works(self, page=1):
from .works import Work
self._soup_works = self.request(f"https://archiveofourown.org/users/{self.username}/works?page={page}")
ol = self._soup_works.find("ol", {"class": "work index group"})
for work in ol.find_all("li", {"role": "article"}):
if work.h4 is None:
continue
self._works.append(get_work_from_banner(work))
@cached_property
def bookmarks(self):
"""Returns the number of works user has bookmarked
Returns:
int: Number of bookmarks
"""
div = self._soup_bookmarks.find("div", {"class": "bookmarks-index dashboard filtered region"})
h2 = div.h2.text.split()
return int(h2[4].replace(',', ''))
@cached_property
def _bookmarks_pages(self):
pages = self._soup_bookmarks.find("ol", {"title": "pagination"})
if pages is None:
return 1
n = 1
for li in pages.findAll("li"):
text = li.getText()
if text.isdigit():
n = int(text)
return n
def get_bookmarks(self, use_threading=False):
"""
Get this user's bookmarked works. Loads them if they haven't been previously
Returns:
list: List of works
"""
if self._bookmarks is None:
if use_threading:
self.load_bookmarks_threaded()
else:
self._bookmarks = []
for page in range(self._bookmarks_pages):
self._load_bookmarks(page=page+1)
return self._bookmarks
@threadable.threadable
def load_bookmarks_threaded(self):
"""
Get the user's bookmarks using threads.
This function is threadable.
"""
threads = []
self._bookmarks = []
for page in range(self._bookmarks_pages):
threads.append(self._load_bookmarks(page=page+1, threaded=True))
for thread in threads:
thread.join()
@threadable.threadable
def _load_bookmarks(self, page=1):
from .works import Work
self._soup_bookmarks = self.request(f"https://archiveofourown.org/users/{self.username}/bookmarks?page={page}")
ol = self._soup_bookmarks.find("ol", {"class": "bookmark index group"})
for work in ol.find_all("li", {"role": "article"}):
authors = []
if work.h4 is None:
continue
self._bookmarks.append(get_work_from_banner(work))
@cached_property
def bio(self):
"""Returns the user's bio
Returns:
str: User's bio
"""
div = self._soup_profile.find("div", {"class": "bio module"})
if div is None:
return ""
blockquote = div.find("blockquote", {"class": "userstuff"})
return blockquote.getText() if blockquote is not None else ""
@cached_property
def url(self):
"""Returns the URL to the user's profile
Returns:
str: user profile URL
"""
return "https://archiveofourown.org/users/%s"%self.username
def get(self, *args, **kwargs):
"""Request a web page and return a Response object"""
if self._session is None:
req = requester.request("get", *args, **kwargs)
else:
req = requester.request("get", *args, **kwargs, session=self._session.session)
if req.status_code == 429:
raise utils.HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
return req
def request(self, url):
"""Request a web page and return a BeautifulSoup object.
Args:
url (str): Url to request
Returns:
bs4.BeautifulSoup: BeautifulSoup object representing the requested page's html
"""
req = self.get(url)
soup = BeautifulSoup(req.content, "lxml")
return soup
@staticmethod
def str_format(string):
"""Formats a given string
Args:
string (str): String to format
Returns:
str: Formatted string
"""
return string.replace(",", "")
@property
def work_pages(self):
"""
Returns how many pages of works a user has
Returns:
int: Amount of pages
"""
return self._works_pages
================================================
FILE: AO3/utils.py
================================================
import os
import pickle
import re
from bs4 import BeautifulSoup
from .requester import requester
from .common import url_join
_FANDOMS = None
_LANGUAGES = None
AO3_AUTH_ERROR_URL = "https://archiveofourown.org/auth_error"
class LoginError(Exception):
def __init__(self, message, errors=[]):
super().__init__(message)
self.errors = errors
class UnloadedError(Exception):
def __init__(self, message, errors=[]):
super().__init__(message)
self.errors = errors
class UnexpectedResponseError(Exception):
def __init__(self, message, errors=[]):
super().__init__(message)
self.errors = errors
class InvalidIdError(Exception):
def __init__(self, message, errors=[]):
super().__init__(message)
self.errors = errors
class DownloadError(Exception):
def __init__(self, message, errors=[]):
super().__init__(message)
self.errors = errors
class AuthError(Exception):
def __init__(self, message, errors=[]):
super().__init__(message)
self.errors = errors
class DuplicateCommentError(Exception):
def __init__(self, message, errors=[]):
super().__init__(message)
self.errors = errors
class PseudError(Exception):
def __init__(self, message, errors=[]):
super().__init__(message)
self.errors = errors
class HTTPError(Exception):
def __init__(self, message, errors=[]):
super().__init__(message)
self.errors = errors
class BookmarkError(Exception):
def __init__(self, message, errors=[]):
super().__init__(message)
self.errors = errors
class CollectError(Exception):
def __init__(self, message, errors=[]):
super().__init__(message)
self.errors = errors
class Query:
def __init__(self):
self.fields = []
def add_field(self, text):
self.fields.append(text)
@property
def string(self):
return '&'.join(self.fields)
class Constraint:
"""Represents a bounding box of a value
"""
def __init__(self, lowerbound=0, upperbound=None):
"""Creates a new Constraint object
Args:
lowerbound (int, optional): Constraint lowerbound. Defaults to 0.
upperbound (int, optional): Constraint upperbound. Defaults to None.
"""
self._lb = lowerbound
self._ub = upperbound
@property
def string(self):
"""Returns the string representation of this constraint
Returns:
str: string representation
"""
if self._lb == 0:
return f"<{self._ub}"
elif self._ub is None:
return f">{self._lb}"
elif self._ub == self._lb:
return str(self._lb)
else:
return f"{self._lb}-{self._ub}"
def __str__(self):
return self.string
def word_count(text):
return len(tuple(filter(lambda w: w != "", re.split(" |\n|\t", text))))
def set_rqtw(value):
"""Sets the requests per time window parameter for the AO3 requester"""
requester.setRQTW(value)
def set_timew(value):
"""Sets the time window parameter for the AO3 requester"""
requester.setTimeW(value)
def limit_requests(limit=True):
"""Toggles request limiting"""
if limit:
requester.setRQTW(12)
else:
requester.setRQTW(-1)
def load_fandoms():
"""Loads fandoms into memory
Raises:
FileNotFoundError: No resource was found
"""
global _FANDOMS
fandom_path = os.path.join(os.path.dirname(__file__), "resources", "fandoms")
if not os.path.isdir(fandom_path):
raise FileNotFoundError("No fandom resources have been downloaded. Try AO3.extra.download()")
files = os.listdir(fandom_path)
_FANDOMS = []
for file in files:
with open(os.path.join(fandom_path, file), "rb") as f:
_FANDOMS += pickle.load(f)
def load_languages():
"""Loads languages into memory
Raises:
FileNotFoundError: No resource was found
"""
global _LANGUAGES
language_path = os.path.join(os.path.dirname(__file__), "resources", "languages")
if not os.path.isdir(language_path):
raise FileNotFoundError("No language resources have been downloaded. Try AO3.extra.download()")
files = os.listdir(language_path)
_LANGUAGES = []
for file in files:
with open(os.path.join(language_path, file), "rb") as f:
_LANGUAGES += pickle.load(f)
def get_languages():
"""Returns all available languages"""
return _LANGUAGES[:]
def search_fandom(fandom_string):
"""Searches for a fandom that matches the given string
Args:
fandom_string (str): query string
Raises:
UnloadedError: load_fandoms() wasn't called
UnloadedError: No resources were downloaded
Returns:
list: All results matching 'fandom_string'
"""
if _FANDOMS is None:
raise UnloadedError("Did you forget to call AO3.utils.load_fandoms()?")
if _FANDOMS == []:
raise UnloadedError("Did you forget to download the required resources with AO3.extra.download()?")
results = []
for fandom in _FANDOMS:
if fandom_string.lower() in fandom.lower():
results.append(fandom)
return results
def workid_from_url(url):
"""Get the workid from an archiveofourown.org website url
Args:
url (str): Work URL
Returns:
int: Work ID
"""
split_url = url.split("/")
try:
index = split_url.index("works")
except ValueError:
return
if len(split_url) >= index+1:
workid = split_url[index+1].split("?")[0]
if workid.isdigit():
return int(workid)
return
def comment(commentable, comment_text, session, fullwork=False, commentid=None, email="", name="", pseud=None):
"""Leaves a comment on a specific work
Args:
commentable (Work/Chapter): Chapter/Work object
comment_text (str): Comment text (must have between 1 and 10000 characters)
fullwork (bool): Should be True if the work has only one chapter or if the comment is to be posted on the full work.
session (AO3.Session/AO3.GuestSession): Session object to request with.
commentid (str/int): If specified, the comment is posted as a reply to this comment. Defaults to None.
email (str): Email to post with. Only used if sess is None. Defaults to "".
name (str): Name that will appear on the comment. Only used if sess is None. Defaults to "".
pseud (str, optional): What pseud to add the comment under. Defaults to default pseud.
Raises:
utils.InvalidIdError: Invalid ID
utils.UnexpectedResponseError: Unknown error
utils.PseudError: Couldn't find a valid pseudonym to post under
utils.DuplicateCommentError: The comment you're trying to post was already posted
ValueError: Invalid name/email
Returns:
requests.models.Response: Response object
"""
if commentable.authenticity_token is not None:
at = commentable.authenticity_token
else:
at = session.authenticity_token
headers = {
"x-requested-with": "XMLHttpRequest",
"x-newrelic-id": "VQcCWV9RGwIJVFFRAw==",
"x-csrf-token": at
}
data = {}
if fullwork:
data["work_id"] = str(commentable.id)
else:
data["chapter_id"] = str(commentable.id)
if commentid is not None:
data["comment_id"] = commentid
if session.is_authed:
if fullwork:
referer = f"https://archiveofourown.org/works/{commentable.id}"
else:
referer = f"https://archiveofourown.org/chapters/{commentable.id}"
pseud_id = get_pseud_id(commentable, session, pseud)
if pseud_id is None:
raise PseudError("Couldn't find your pseud's id")
data.update({
"authenticity_token": at,
"comment[pseud_id]": pseud_id,
"comment[comment_content]": comment_text,
})
else:
if email == "" or name == "":
raise ValueError("You need to specify both an email and a name!")
data.update({
"authenticity_token": at,
"comment[email]": email,
"comment[name]": name,
"comment[comment_content]": comment_text,
})
response = session.post(f"https://archiveofourown.org/comments.js", headers=headers, data=data)
if response.status_code == 429:
raise HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
if response.status_code == 404:
if len(response.content) > 0:
return response
else:
raise InvalidIdError(f"Invalid {'work ID' if fullwork else 'chapter ID'}")
if response.status_code == 422:
json = response.json()
if "errors" in json:
if "auth_error" in json["errors"]:
raise AuthError("Invalid authentication token. Try calling session.refresh_auth_token()")
raise UnexpectedResponseError(f"Unexpected json received:\n{str(json)}")
elif response.status_code == 200:
raise DuplicateCommentError("You have already left this comment here")
raise UnexpectedResponseError(f"Unexpected HTTP status code received ({response.status_code})")
def delete_comment(comment, session):
"""Deletes the specified comment
Args:
comment (AO3.Comment): Comment object
session (AO3.Session): Session object
Raises:
PermissionError: You don't have permission to delete the comment
utils.AuthError: Invalid auth token
utils.UnexpectedResponseError: Unknown error
"""
if session is None or not session.is_authed:
raise PermissionError("You don't have permission to do this")
if comment.authenticity_token is not None:
at = comment.authenticity_token
else:
at = session.authenticity_token
data = {
"authenticity_token": at,
"_method": "delete"
}
req = session.post(f"https://archiveofourown.org/comments/{comment.id}", data=data)
if req.status_code == 429:
raise HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
else:
soup = BeautifulSoup(req.content, "lxml")
if "auth error" in soup.title.getText().lower():
raise AuthError("Invalid authentication token. Try calling session.refresh_auth_token()")
else:
error = soup.find("div", {"id": "main"}).getText()
if "you don't have permission" in error.lower():
raise PermissionError("You don't have permission to do this")
def kudos(work, session):
"""Leave a 'kudos' in a specific work
Args:
work (Work): Work object
Raises:
utils.UnexpectedResponseError: Unexpected response received
utils.InvalidIdError: Invalid ID (work doesn't exist)
utils.AuthError: Invalid authenticity token
Returns:
bool: True if successful, False if you already left kudos there
"""
if work.authenticity_token is not None:
at = work.authenticity_token
else:
at = session.authenticity_token
data = {
"authenticity_token": at,
"kudo[commentable_id]": work.id,
"kudo[commentable_type]": "Work"
}
headers = {
"x-csrf-token": work.authenticity_token,
"x-requested-with": "XMLHttpRequest",
"referer": f"https://archiveofourown.org/work/{work.id}"
}
response = session.post("https://archiveofourown.org/kudos.js", headers=headers, data=data)
if response.status_code == 429:
raise HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
if response.status_code == 201:
return True # Success
elif response.status_code == 422:
json = response.json()
if "errors" in json:
if "auth_error" in json["errors"]:
raise AuthError("Invalid authentication token. Try calling session.refresh_auth_token()")
elif "user_id" in json["errors"] or "ip_address" in json["errors"]:
return False # User has already left kudos
elif "no_commentable" in json["errors"]:
raise InvalidIdError("Invalid ID")
raise UnexpectedResponseError(f"Unexpected json received:\n"+str(json))
else:
raise UnexpectedResponseError(f"Unexpected HTTP status code received ({response.status_code})")
def subscribe(subscribable, worktype, session, unsubscribe=False, subid=None):
"""Subscribes to a work. Be careful, you can subscribe to a work multiple times
Args:
subscribable (Work/Series/User): AO3 object
worktype (str): Type of the work (Series/Work/User)
session (AO3.Session): Session object
unsubscribe (bool, optional): Unsubscribe instead of subscribing. Defaults to False.
subid (str/int, optional): Subscription ID, used when unsubscribing. Defaults to None.
Raises:
AuthError: Invalid auth token
AuthError: Invalid session
InvalidIdError: Invalid ID / worktype
InvalidIdError: Invalid subid
"""
if session is None: session = subscribable.session
if session is None or not session.is_authed:
raise AuthError("Invalid session")
if subscribable.authenticity_token is not None:
at = subscribable.authenticity_token
else:
at = session.authenticity_token
data = {
"authenticity_token": at,
"subscription[subscribable_id]": subscribable.id,
"subscription[subscribable_type]": worktype.capitalize()
}
url = f"https://archiveofourown.org/users/{session.username}/subscriptions"
if unsubscribe:
if subid is None:
raise InvalidIdError("When unsubscribing, subid cannot be None")
url += f"/{subid}"
data["_method"] = "delete"
req = session.session.post(url, data=data, allow_redirects=False)
if unsubscribe:
return req
if req.status_code == 302:
if req.headers["Location"] == AO3_AUTH_ERROR_URL:
raise AuthError("Invalid authentication token. Try calling session.refresh_auth_token()")
else:
raise InvalidIdError(f"Invalid ID / worktype")
def bookmark(bookmarkable, session=None, notes="", tags=None, collections=None, private=False, recommend=False, pseud=None):
"""Adds a bookmark to a work/series. Be careful, you can bookmark a work multiple times
Args:
bookmarkable (Work/Series): AO3 object
session (AO3.Session): Session object
notes (str, optional): Bookmark notes. Defaults to "".
tags (list, optional): What tags to add. Defaults to None.
collections (list, optional): What collections to add this bookmark to. Defaults to None.
private (bool, optional): Whether this bookmark should be private. Defaults to False.
recommend (bool, optional): Whether to recommend this bookmark. Defaults to False.
pseud (str, optional): What pseud to add the bookmark under. Defaults to default pseud.
"""
if session is None: session = bookmarkable.session
if session == None or not session.is_authed:
raise AuthError("Invalid session")
if bookmarkable.authenticity_token is not None:
at = bookmarkable.authenticity_token
else:
at = session.authenticity_token
if tags is None: tags = []
if collections is None: collections = []
pseud_id = get_pseud_id(bookmarkable, session, pseud)
if pseud_id is None:
raise PseudError("Couldn't find your pseud's id")
data = {
"authenticity_token": at,
"bookmark[pseud_id]": pseud_id,
"bookmark[tag_string]": ",".join(tags),
"bookmark[collection_names]": ",".join(collections),
"bookmark[private]": int(private),
"bookmark[rec]" : int(recommend),
"commit": "Create"
}
if notes != "": data["bookmark[bookmarker_notes]"] = notes
url = url_join(bookmarkable.url, "bookmarks")
req = session.session.post(url, data=data, allow_redirects=False)
handle_bookmark_errors(req)
def delete_bookmark(bookmarkid, session, auth_token=None):
"""Remove a bookmark from the work/series
Args:
bookmarkid (Work/Series): AO3 object
session (AO3.Session): Session object
auth_token (str, optional): Authenticity token. Defaults to None.
"""
if session == None or not session.is_authed:
raise AuthError("Invalid session")
data = {
"authenticity_token": session.authenticity_token if auth_token is None else auth_token,
"_method": "delete"
}
url = f"https://archiveofourown.org/bookmarks/{bookmarkid}"
req = session.session.post(url, data=data, allow_redirects=False)
handle_bookmark_errors(req)
def handle_bookmark_errors(request):
if request.status_code == 302:
if request.headers["Location"] == AO3_AUTH_ERROR_URL:
raise AuthError("Invalid authentication token. Try calling session.refresh_auth_token()")
else:
if request.status_code == 200:
soup = BeautifulSoup(request.content, "lxml")
error_div = soup.find("div", {"id": "error", "class": "error"})
if error_div is None:
raise UnexpectedResponseError("An unknown error occurred")
errors = [item.getText() for item in error_div.findAll("li")]
if len(errors) == 0:
raise BookmarkError("An unknown error occurred")
raise BookmarkError("Error(s) creating bookmark:" + " ".join(errors))
raise UnexpectedResponseError(f"Unexpected HTTP status code received ({request.status_code})")
def get_pseud_id(ao3object, session=None, specified_pseud=None):
if session is None:
session = ao3object.session
if session is None or not session.is_authed:
raise AuthError("Invalid session")
soup = session.request(ao3object.url)
pseud = soup.find("input", {"name": re.compile(".+\\[pseud_id\\]")})
if pseud is None:
pseud = soup.find("select", {"name": re.compile(".+\\[pseud_id\\]")})
if pseud is None:
return None
pseud_id = None
if specified_pseud:
for option in pseud.findAll("option"):
if option.string == specified_pseud:
pseud_id = option.attrs["value"]
break
else:
for option in pseud.findAll("option"):
if "selected" in option.attrs and option.attrs["selected"] == "selected":
pseud_id = option.attrs["value"]
break
else:
pseud_id = pseud.attrs["value"]
return pseud_id
def collect(collectable, session, collections):
"""Invites a work to a collection. Be careful, you can collect a work multiple times
Args:
work (Work): Work object
session (AO3.Session): Session object
collections (list, optional): What collections to add this work to. Defaults to None.
"""
if session is None: session = collectable.session
if session == None or not session.is_authed:
raise AuthError("Invalid session")
if collectable.authenticity_token is not None:
at = collectable.authenticity_token
else:
at = session.authenticity_token
if collections is None: collections = []
data = {
"authenticity_token": at,
"collection_names": ",".join(collections),
"commit": "Add"
}
url = url_join(collectable.url, "collection_items")
req = session.session.post(url, data=data, allow_redirects=True)
if req.status_code == 302:
if req.headers["Location"] == AO3_AUTH_ERROR_URL:
raise AuthError("Invalid authentication token. Try calling session.refresh_auth_token()")
elif req.status_code == 200:
soup = BeautifulSoup(req.content, "lxml")
notice_div = soup.find("div", {"class": "notice"})
error_div = soup.find("div", {"class": "error"})
if error_div is None and notice_div is None:
raise UnexpectedResponseError("An unknown error occurred")
if error_div is not None:
errors = [item.getText() for item in error_div.findAll("ul")]
if len(errors) == 0:
raise CollectError("An unknown error occurred")
raise CollectError("We couldn't add your submission to the following collection(s): " + " ".join(errors))
else:
raise UnexpectedResponseError(f"Unexpected HTTP status code received ({req.status_code})")
================================================
FILE: AO3/works.py
================================================
import warnings
from datetime import datetime
from functools import cached_property
from bs4 import BeautifulSoup
from . import threadable, utils
from .chapters import Chapter
from .comments import Comment
from .requester import requester
from .users import User
class Work:
"""
AO3 work object
"""
def __init__(self, workid, session=None, load=True, load_chapters=True):
"""Creates a new AO3 work object
Args:
workid (int): AO3 work ID
session (AO3.Session, optional): Used to access restricted works
load (bool, optional): If true, the work is loaded on initialization. Defaults to True.
load_chapters (bool, optional): If false, chapter text won't be parsed, and Work.load_chapters() will have to be called. Defaults to True.
Raises:
utils.InvalidIdError: Raised if the work wasn't found
"""
self._session = session
self.chapters = []
self.id = workid
self._soup = None
if load:
self.reload(load_chapters)
def __repr__(self):
try:
return f"<Work [{self.title}]>"
except:
return f"<Work [{self.id}]>"
def __eq__(self, other):
return isinstance(other, __class__) and other.id == self.id
def __getstate__(self):
d = {}
for attr in self.__dict__:
if isinstance(self.__dict__[attr], BeautifulSoup):
d[attr] = (self.__dict__[attr].encode(), True)
else:
d[attr] = (self.__dict__[attr], False)
return d
def __setstate__(self, d):
for attr in d:
value, issoup = d[attr]
if issoup:
self.__dict__[attr] = BeautifulSoup(value, "lxml")
else:
self.__dict__[attr] = value
@threadable.threadable
def reload(self, load_chapters=True):
"""
Loads information about this work.
This function is threadable.
Args:
load_chapters (bool, optional): If false, chapter text won't be parsed, and Work.load_chapters() will have to be called. Defaults to True.
"""
for attr in self.__class__.__dict__:
if isinstance(getattr(self.__class__, attr), cached_property):
if attr in self.__dict__:
delattr(self, attr)
self._soup = self.request(f"https://archiveofourown.org/works/{self.id}?view_adult=true&view_full_work=true")
if "Error 404" in self._soup.find("h2", {"class", "heading"}).text:
raise utils.InvalidIdError("Cannot find work")
if load_chapters:
self.load_chapters()
def set_session(self, session):
"""Sets the session used to make requests for this work
Args:
session (AO3.Session/AO3.GuestSession): session object
"""
self._session = session
def load_chapters(self):
"""Loads chapter objects for each one of this work's chapters
"""
self.chapters = []
chapters_div = self._soup.find(attrs={"id": "chapters"})
if chapters_div is None:
return
if self.nchapters > 1:
for n in range(1, self.nchapters+1):
chapter = chapters_div.find("div", {"id": f"chapter-{n}"})
if chapter is None:
continue
chapter.extract()
preface_group = chapter.find("div", {"class": ("chapter", "preface", "group")})
if preface_group is None:
continue
title = preface_group.find("h3", {"class": "title"})
if title is None:
continue
id_ = int(title.a["href"].split("/")[-1])
c = Chapter(id_, self, self._session, False)
c._soup = chapter
self.chapters.append(c)
else:
c = Chapter(None, self, self._session, False)
c._soup = chapters_div
self.chapters.append(c)
def get_images(self):
"""Gets all images from this work
Raises:
utils.UnloadedError: Raises this error if the work isn't loaded
Returns:
dict: key = chapter_n; value = chapter.get_images()
"""
if not self.loaded:
raise utils.UnloadedError("Work isn't loaded. Have you tried calling Work.reload()?")
chapters = {}
for chapter in self.chapters:
images = chapter.get_images()
if len(images) != 0:
chapters[chapter.number] = images
return chapters
def download(self, filetype="PDF"):
"""Downloads this work
Args:
filetype (str, optional): Desired filetype. Defaults to "PDF".
Known filetypes are: AZW3, EPUB, HTML, MOBI, PDF.
Raises:
utils.DownloadError: Raised if there was an error with the download
utils.UnexpectedResponseError: Raised if the filetype is not available for download
Returns:
bytes: File content
"""
if not self.loaded:
raise utils.UnloadedError("Work isn't loaded. Have you tried calling Work.reload()?")
download_btn = self._soup.find("li", {"class": "download"})
for download_type in download_btn.findAll("li"):
if download_type.a.getText() == filetype.upper():
url = f"https://archiveofourown.org/{download_type.a.attrs['href']}"
req = self.get(url)
if req.status_code == 429:
raise utils.HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
if not req.ok:
raise utils.DownloadError("An error occurred while downloading the work")
return req.content
raise utils.UnexpectedResponseError(f"Filetype '{filetype}' is not available for download")
@threadable.threadable
def download_to_file(self, filename, filetype="PDF"):
"""Downloads this work and saves it in the specified file.
This function is threadable.
Args:
filename (str): Name of the resulting file
filetype (str, optional): Desired filetype. Defaults to "PDF".
Known filetypes are: AZW3, EPUB, HTML, MOBI, PDF.
Raises:
utils.DownloadError: Raised if there was an error with the download
utils.UnexpectedResponseError: Raised if the filetype is not available for download
"""
with open(filename, "wb") as file:
file.write(self.download(filetype))
@property
def metadata(self):
metadata = {}
normal_fields = (
"bookmarks",
"categories",
"nchapters",
"characters",
"complete",
"comments",
"expected_chapters",
"fandoms",
"hits",
"kudos",
"language",
"rating",
"relationships",
"restricted",
"status",
"summary",
"tags",
"title",
"warnings",
"id",
"words",
"collections"
)
string_fields = (
"date_edited",
"date_published",
"date_updated",
)
for field in string_fields:
try:
metadata[field] = str(getattr(self, field))
except AttributeError:
pass
for field in normal_fields:
try:
metadata[field] = getattr(self, field)
except AttributeError:
pass
try:
metadata["authors"] = list(map(lambda author: author.username, self.authors))
except AttributeError:
pass
try:
metadata["series"] = list(map(lambda series: series.name, self.series))
except AttributeError:
pass
try:
metadata["chapter_titles"] = list(map(lambda chapter: chapter.title, self.chapters))
except AttributeError:
pass
return metadata
def get_comments(self, maximum=None):
"""Returns a list of all threads of comments in the work. This operation can take a very long time.
Because of that, it is recomended that you set a maximum number of comments.
Duration: ~ (0.13 * n_comments) seconds or 2.9 seconds per comment page
Args:
maximum (int, optional): Maximum number of comments to be returned. None -> No maximum
Raises:
ValueError: Invalid chapter number
IndexError: Invalid chapter number
utils.UnloadedError: Work isn't loaded
Returns:
list: List of comments
"""
if not self.loaded:
raise utils.UnloadedError("Work isn't loaded. Have you tried calling Work.reload()?")
url = f"https://archiveofourown.org/works/{self.id}?page=%d&show_comments=true&view_adult=true&view_full_work=true"
soup = self.request(url%1)
pages = 0
div = soup.find("div", {"id": "comments_placeholder"})
ol = div.find("ol", {"class": "pagination actions"})
if ol is None:
pages = 1
else:
for li in ol.findAll("li"):
if li.getText().isdigit():
pages = int(li.getText())
comments = []
for page in range(pages):
if page != 0:
soup = self.request(url%(page+1))
ol = soup.find("ol", {"class": "thread"})
for li in ol.findAll("li", {"role": "article"}, recursive=False):
if maximum is not None and len(comments) >= maximum:
return comments
id_ = int(li.attrs["id"][8:])
header = li.find("h4", {"class": ("heading", "byline")})
if header is None or header.a is None:
author = None
else:
author = User(str(header.a.text), self._session, False)
if li.blockquote is not None:
text = li.blockquote.getText()
else:
text = ""
comment = Comment(id_, self, session=self._session, load=False)
setattr(comment, "authenticity_token", self.authenticity_token)
setattr(comment, "author", author)
setattr(comment, "text", text)
comment._thread = None
comments.append(comment)
return comments
@threadable.threadable
def subscribe(self):
"""Subscribes to this work.
This function is threadable.
Raises:
utils.AuthError: Invalid session
"""
if self._session is None or not self._session.is_authed:
raise utils.AuthError("You can only subscribe to a work using an authenticated session")
utils.subscribe(self, "Work", self._session)
@threadable.threadable
def unsubscribe(self):
"""Unubscribes from this user.
This function is threadable.
Raises:
utils.AuthError: Invalid session
"""
if not self.is_subscribed:
raise Exception("You are not subscribed to this work")
if self._session is None or not self._session.is_authed:
raise utils.AuthError("You can only unsubscribe from a work using an authenticated session")
utils.subscribe(self, "Work", self._session, True, self._sub_id)
@cached_property
def text(self):
"""This work's text"""
text = ""
for chapter in self.chapters:
text += chapter.text
text += "\n"
return text
@cached_property
def authenticity_token(self):
"""Token used to take actions that involve this work"""
if not self.loaded:
return None
token = self._soup.find("meta", {"name": "csrf-token"})
return token["content"]
@cached_property
def is_subscribed(self):
"""True if you're subscribed to this work"""
if self._session is None or not self._session.is_authed:
raise utils.AuthError("You can only get a user ID using an authenticated session")
ul = self._soup.find("ul", {"class": "work navigation actions"})
input_ = ul.find("li", {"class": "subscribe"}).find("input", {"name": "commit", "value": "Unsubscribe"})
return input_ is not None
@cached_property
def _sub_id(self):
"""Returns the subscription ID. Used for unsubscribing"""
if self._session is None or not self._session.is_authed:
raise utils.AuthError("You can only get a user ID using an authenticated session")
ul = self._soup.find("ul", {"class": "work navigation actions"})
id_ = ul.find("li", {"class": "subscribe"}).form.attrs["action"].split("/")[-1]
return int(id_)
@threadable.threadable
def leave_kudos(self):
"""Leave a "kudos" in this work.
This function is threadable.
Raises:
utils.UnexpectedResponseError: Unexpected response received
utils.InvalidIdError: Invalid ID (work doesn't exist)
utils.AuthError: Invalid session or authenticity token
Returns:
bool: True if successful, False if you already left kudos there
"""
if self._session is None:
raise utils.AuthError("Invalid session")
return utils.kudos(self, self._session)
@threadable.threadable
def comment(self, comment_text, email="", name="", pseud=None):
"""Leaves a comment on this work.
This function is threadable.
Args:
comment_text (str): Comment text
email (str, optional): Email to add comment. Needed if not logged in.
name (str, optional): Name to add comment under. Needed if not logged in.
pseud (str, optional): Pseud to add the comment under. Defaults to default pseud.
Raises:
utils.UnloadedError: Couldn't load chapters
utils.AuthError: Invalid session
Returns:
requests.models.Response: Response object
"""
if not self.loaded:
raise utils.UnloadedError("Work isn't loaded. Have you tried calling Work.reload()?")
if self._session is None:
raise utils.AuthError("Invalid session")
return utils.comment(self, comment_text, self._session, True, email=email, name=name, pseud=pseud)
@threadable.threadable
def bookmark(self, notes="", tags=None, collections=None, private=False, recommend=False, pseud=None):
"""Bookmarks this work
This function is threadable
Args:
notes (str, optional): Bookmark notes. Defaults to "".
tags (list, optional): What tags to add. Defaults to None.
collections (list, optional): What collections to add this bookmark to. Defaults to None.
private (bool, optional): Whether this bookmark should be private. Defaults to False.
recommend (bool, optional): Whether to recommend this bookmark. Defaults to False.
pseud (str, optional): What pseud to add the bookmark under. Defaults to default pseud.
Raises:
utils.UnloadedError: Work isn't loaded
utils.AuthError: Invalid session
"""
if not self.loaded:
raise utils.UnloadedError("Work isn't loaded. Have you tried calling Work.reload()?")
if self._session is None:
raise utils.AuthError("Invalid session")
utils.bookmark(self, self._session, notes, tags, collections, private, recommend, pseud)
@threadable.threadable
def delete_bookmark(self):
"""Removes a bookmark from this work
This function is threadable
Raises:
utils.UnloadedError: Work isn't loaded
utils.AuthError: Invalid session
"""
if not self.loaded:
raise utils.UnloadedError("Work isn't loaded. Have you tried calling Work.reload()?")
if self._session is None:
raise utils.AuthError("Invalid session")
if self._bookmarkid is None:
raise utils.BookmarkError("You don't have a bookmark here")
utils.delete_bookmark(self._bookmarkid, self._session, self.authenticity_token)
@threadable.threadable
def collect(self, collections):
"""Invites/collects this work to a collection or collections
This function is threadable
Args:
collections (list): What collections to add this work to. Defaults to None.
Raises:
utils.UnloadedError: Work isn't loaded
utils.AuthError: Invalid session
"""
if not self.loaded:
raise utils.UnloadedError("Work isn't loaded. Have you tried calling Work.reload()?")
if self._session is None:
raise utils.AuthError("Invalid session")
utils.collect(self, self._session, collections)
@cached_property
def _bookmarkid(self):
form_div = self._soup.find("div", {"id": "bookmark-form"})
if form_div is None:
return None
if form_div.form is None:
return None
if "action" in form_div.form.attrs and form_div.form["action"].startswith("/bookmarks"):
text = form_div.form["action"].split("/")[-1]
if text.isdigit():
return int(text)
return None
return None
@property
def loaded(self):
"""Returns True if this work has been loaded"""
return self._soup is not None
@property
def oneshot(self):
"""Returns True if this work has only one chapter"""
return self.nchapters == 1
@cached_property
def series(self):
"""Returns the series this work belongs to"""
from .series import Series
dd = self._soup.find("dd", {"class": "series"})
if dd is None:
return []
s = []
for span in dd.find_all("span", {"class": "position"}):
seriesid = int(span.a.attrs["href"].split("/")[-1])
seriesname = span.a.getText()
series = Series(seriesid, self._session, False)
setattr(series, "name", seriesname)
s.append(series)
return s
@cached_property
def authors(self):
"""Returns the list of the work's author
Returns:
list: list of authors
"""
from .users import User
authors = self._soup.find_all("h3", {"class": "byline heading"})
if len(authors) == 0:
return []
formatted_authors = authors[0].text.replace("\n", "").split(", ")
author_list = []
if authors is not None:
for author in formatted_authors:
user = User(author, load=False)
author_list.append(user)
return author_list
@cached_property
def nchapters(self):
"""Returns the number of chapters of this work
Returns:
int: number of chapters
"""
chapters = self._soup.find("dd", {"class": "chapters"})
if chapters is not None:
return int(self.str_format(chapters.string.split("/")[0]))
return 0
@cached_property
def expected_chapters(self):
"""Returns the number of expected chapters for this work, or None if
the author hasn't provided an expected number
Returns:
int: number of chapters
"""
chapters = self._soup.find("dd", {"class": "chapters"})
if chapters is not None:
n = self.str_format(chapters.string.split("/")[-1])
if n.isdigit():
return int(n)
return None
@property
def status(self):
"""Returns the status of this work
Returns:
str: work status
"""
return "Completed" if self.nchapters == self.expected_chapters else "Work in Progress"
@cached_property
def hits(self):
"""Returns the number of hits this work has
Returns:
int: number of hits
"""
hits = self._soup.find("dd", {"class": "hits"})
if hits is not None:
return int(self.str_format(hits.string))
return 0
@cached_property
def kudos(self):
"""Returns the number of kudos this work has
Returns:
int: number of kudos
"""
kudos = self._soup.find("dd", {"class": "kudos"})
if kudos is not None:
return int(self.str_format(kudos.string))
return 0
@cached_property
def comments(self):
"""Returns the number of comments this work has
Returns:
int: number of comments
"""
comments = self._soup.find("dd", {"class": "comments"})
if comments is not None:
return int(self.str_format(comments.string))
return 0
@cached_property
def restricted(self):
"""Whether this is a restricted work or not
Returns:
int: True if work is restricted
"""
return self._soup.find("img", {"title": "Restricted"}) is not None
@cached_property
def words(self):
"""Returns the this work's word count
Returns:
int: number of words
"""
words = self._soup.find("dd", {"class": "words"})
if words is not None:
return int(self.str_format(words.string))
return 0
@cached_property
def language(self):
"""Returns this work's language
Returns:
str: Language
"""
language = self._soup.find("dd", {"class": "language"})
if language is not None:
return language.string.strip()
else:
return "Unknown"
@cached_property
def bookmarks(self):
"""Returns the number of bookmarks this work has
Returns:
int: number of bookmarks
"""
bookmarks = self._soup.find("dd", {"class": "bookmarks"})
if bookmarks is not None:
return int(self.str_format(bookmarks.string))
return 0
@cached_property
def title(self):
"""Returns the title of this work
Returns:
str: work title
"""
title = self._soup.find("div", {"class": "preface group"})
if title is not None:
return str(title.h2.text.strip())
return ""
@cached_property
def date_published(self):
"""Returns the date this work was published
Returns:
datetime.date: publish date
"""
dp = self._soup.find("dd", {"class": "published"}).string
return datetime(*list(map(int, dp.split("-"))))
@cached_property
def date_edited(self):
"""Returns the date this work was last edited
Returns:
datetime.datetime: edit date
"""
download = self._soup.find("li", {"class": "download"})
if download is not None and download.ul is not None:
timestamp = int(download.ul.a["href"].split("=")[-1])
return datetime.fromtimestamp(timestamp)
return datetime(self.date_published)
@cached_property
def date_updated(self):
"""Returns the date this work was last updated
Returns:
datetime.datetime: update date
"""
update = self._soup.find("dd", {"class": "status"})
if update is not None:
split = update.string.split("-")
return datetime(*list(map(int, split)))
return self.date_published
@cached_property
def tags(self):
"""Returns all the work's tags
Returns:
list: List of tags
"""
html = self._soup.find("dd", {"class": "freeform tags"})
tags = []
if html is not None:
for tag in html.find_all("li"):
tags.append(tag.a.string)
return tags
@cached_property
def characters(self):
"""Returns all the work's characters
Returns:
list: List of characters
"""
html = self._soup.find("dd", {"class": "character tags"})
characters = []
if html is not None:
for character in html.find_all("li"):
characters.append(character.a.string)
return characters
@cached_property
def relationships(self):
"""Returns all the work's relationships
Returns:
list: List of relationships
"""
html = self._soup.find("dd", {"class": "relationship tags"})
relationships = []
if html is not None:
for relationship in html.find_all("li"):
relationships.append(relationship.a.string)
return relationships
@cached_property
def fandoms(self):
"""Returns all the work's fandoms
Returns:
list: List of fandoms
"""
html = self._soup.find("dd", {"class": "fandom tags"})
fandoms = []
if html is not None:
for fandom in html.find_all("li"):
fandoms.append(fandom.a.string)
return fandoms
@cached_property
def categories(self):
"""Returns all the work's categories
Returns:
list: List of categories
"""
html = self._soup.find("dd", {"class": "category tags"})
categories = []
if html is not None:
for category in html.find_all("li"):
categories.append(category.a.string)
return categories
@cached_property
def warnings(self):
"""Returns all the work's warnings
Returns:
list: List of warnings
"""
html = self._soup.find("dd", {"class": "warning tags"})
warnings = []
if html is not None:
for warning in html.find_all("li"):
warnings.append(warning.a.string)
return warnings
@cached_property
def rating(self):
"""Returns this work's rating
Returns:
str: Rating
"""
html = self._soup.find("dd", {"class": "rating tags"})
if html is not None:
rating = html.a.string
return rating
return None
@cached_property
def summary(self):
"""Returns this work's summary
Returns:
str: Summary
"""
div = self._soup.find("div", {"class": "preface group"})
if div is None:
return ""
html = div.find("blockquote", {"class": "userstuff"})
if html is None:
return ""
return str(BeautifulSoup.getText(html))
@cached_property
def start_notes(self):
"""Text from this work's start notes"""
notes = self._soup.find("div", {"class": "notes module"})
if notes is None:
return ""
text = ""
for p in notes.findAll("p"):
text += p.getText().strip() + "\n"
return text
@cached_property
def end_notes(self):
"""Text from this work's end notes"""
notes = self._soup.find("div", {"id": "work_endnotes"})
if notes is None:
return ""
text = ""
for p in notes.findAll("p"):
text += p.getText() + "\n"
return text
@cached_property
def url(self):
"""Returns the URL to this work
Returns:
str: work URL
"""
return f"https://archiveofourown.org/works/{self.id}"
@cached_property
def complete(self):
"""
Return True if the work is complete
Retuns:
bool: True if a work is complete
"""
chapterStatus = self._soup.find("dd", {"class": "chapters"}).string.split("/")
return chapterStatus[0] == chapterStatus[1]
@cached_property
def collections(self):
"""Returns all the collections the work belongs to
Returns:
list: List of collections
"""
html = self._soup.find("dd", {"class": "collections"})
collections = []
if html is not None:
for collection in html.find_all("a"):
collections.append(collection.get_text())
return collections
def get(self, *args, **kwargs):
"""Request a web page and return a Response object"""
if self._session is None:
req = requester.request("get", *args, **kwargs)
else:
req = requester.request("get", *args, **kwargs, session=self._session.session)
if req.status_code == 429:
raise utils.HTTPError("We are being rate-limited. Try again in a while or reduce the number of requests")
return req
def request(self, url):
"""Request a web page and return a BeautifulSoup object.
Args:
url (str): Url to request
Returns:
bs4.BeautifulSoup: BeautifulSoup object representing the requested page's html
"""
req = self.get(url)
if len(req.content) > 650000:
warnings.warn("This work is very big and might take a very long time to load")
soup = BeautifulSoup(req.content, "lxml")
return soup
@staticmethod
def str_format(string):
"""Formats a given string
Args:
string (str): String to format
Returns:
str: Formatted string
"""
return string.replace(",", "")
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2019 Francisco Patrcio Rodrigues
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
[](https://ao3-api.readthedocs.io/en/latest/?badge=latest)
# AO3 API
This is an unofficial API that lets you access some of AO3's (archiveofourown.org) data through Python.
## Installation
Use the package manager [pip](https://pip.pypa.io/en/stable/) to install AO3 API.
```bash
pip install ao3_api
```
# Github
https://github.com/wendytg/ao3_api
# Usage
This package is divided in 9 core modules: works, chapters, users, series, search, session, comments, extra, and utils.
## Works
One of the most basic things you might want to do with this package is loading a work and checking its statistics and information. To do that, you'll need the `AO3.Work` class.
We start by finding the _workid_ of the work we want to load. We do that either by using `AO3.utils.workid_from_url(url)` or by just looking at the url ourselves. Let's take a look:
```py3
import AO3
url = "https://archiveofourown.org/works/14392692/chapters/33236241"
workid = AO3.utils.workid_from_url(url)
print(f"Work ID: {workid}")
work = AO3.Work(workid)
print(f"Chapters: {work.nchapters}")
```
After running this snippet, we get the output:
```
Work ID: 14392692
Chapters: 46
```
It's important to note that some works may not be accessible to guest users, and in this case you will get 0 chapters as an output, and the error `AO3.utils.AuthError: This work is only available to registered users of the Archive` if you try to load it. Nontheless, we can still do a lot more with this Work object: Lets try to get the first 20 words of the second chapter.
```py3
import AO3
work = AO3.Work(14392692)
print(work.chapters[1].title) # Second chapter name
text = work.chapters[1].text # Second chapter text
print(' '.join(text.split(" ")[:20]))
```
```
What Branches Grow Meaning
December 27, 2018
Christmas sucked this year, and Shouto’s got the black eye to prove it.
Things had started out well enough,
```
The objects in work.chapters are of type `AO3.Chapter`. They have a lot of the same properties as a `Work` object would.
Another thing you can do with the work object is download the entire work as a pdf or e-book. At the moment you can download works as AZW3, EPUB, HTML, MOBI, and PDF files.
```py3
import AO3
work = AO3.Work(14392692)
with open(f"{work.title}.pdf", "wb") as file:
file.write(work.download("PDF"))
```
__Advanced functionality__
Usually, when you call the constructor for the `Work` class, all info about it is loaded in the `__init__()` function. However, this process takes quite some time (~1-1.5 seconds) and if you want to load a list of works from a series, for example, you might be waiting for upwards of 30 seconds. To avoid this problem, the `Work.reload()` function, called on initialization, is a "threadable" function, which means that if you call it with the argument `threaded=True`, it will return a `Thread` object and work in parallel, meaning you can load multiple works at the same time. Let's take a look at an implementation:
```py3
import AO3
import time
series = AO3.Series(1295090)
works = []
threads = []
start = time.time()
for work in series.work_list:
works.append(work)
threads.append(work.reload(threaded=True))
for thread in threads:
thread.join()
print(f"Loaded {len(works)} works in {round(time.time()-start, 1)} seconds.")
```
`Loaded 29 works in 2.2 seconds.`
The `load=False` inside the `Work` constructor makes sure we don't load the work as soon as we create an instance of the class. In the end, we iterate over every thread and wait for the last one to finish using `.join()`. Let's compare this method with the standard way of loading AO3 works:
```py3
import AO3
import time
series = AO3.Series(1295090)
works = []
start = time.time()
for work in series.work_list:
work.reload()
works.append(work)
print(f"Loaded {len(works)} works in {round(time.time()-start, 1)} seconds.")
```
`Loaded 29 works in 21.6 seconds.`
As we can see, there is a significant performance increase. There are other functions in this package which have this functionality. To see if a function is "threadable", either use `hasattr(function, "_threadable")` or check its `__doc__` string.
To save even more time, if you're only interested in metadata, you can load a work with the `load_chapters` option set to False. Also, be aware that some functions (like `Series.work_list` or `Search.results`) might return semi-loaded `Work` objects. This means that no requests have been made to load this work (so you don't have access to chapter text, notes, etc...) but almost all of its metadata will already have been cached, and you might not need to call `Work.reload()` at all.
The last important information about the `Work` class is that most of its properties (like the number of bookmarks, kudos, the authors' names, etc...) are cached properties. That means that once you check them once, the value is stored and it won't ever change, even if those values change. To update these values, you will need to call `Work.reload()`. See the example below:
```py3
import AO3
sess = AO3.GuestSession()
work = AO3.Work(16721367, sess)
print(work.kudos)
work.leave_kudos()
work.reload()
print(work.kudos)
```
```
392
393
```
## Users
Another useful thing you might want to do is get information on who wrote which works / comments. For that, we use the `AO3.User` class.
```py3
import AO3
user = AO3.User("bothersomepotato")
print(user.url)
print(user.bio)
print(user.works) # Number of works published
```
```
https://archiveofourown.org/users/bothersomepotato
University student, opening documents to write essays but writing this stuff instead. No regrets though. My Tumblr, come chat with -or yell at- me if you feel like it! :)
2
```
## Search
To search for works, you can either use the `AO3.search()` function and parse the BeautifulSoup object returned yourself, or use the `AO3.Search` class to automatically do that for you.
```py3
import AO3
search = AO3.Search(any_field="Clarke Lexa", word_count=AO3.utils.Constraint(5000, 15000))
search.update()
print(search.total_results)
for result in search.results:
print(result)
```
```
3074
<Work [five times lexa falls for clarke]>
<Work [an incomplete list of reasons (why Clarke loves Lexa)]>
<Work [five times clarke and lexa aren’t sure if they're a couple or not]>
<Work [Chemistry]>
<Work [The New Commander (Lexa Joining Camp Jaha)]>
<Work [Ode to Clarke]>
<Work [it's always been (right in front of me)]>
<Work [The Girlfriend Tag]>
<Work [The After-Heda Chronicles]>
<Work [The Counter]>
<Work [May We Meet Again]>
<Work [No Filter]>
<Work [The Games We Play]>
<Work [A l'épreuve des balles]>
<Work [Celebration]>
<Work [Another level of fucked up]>
<Work [(Don't Ever Want to Tame) This Wild Heart]>
<Work [Self Control]>
<Work [Winter]>
<Work [My only wish]>
```
You can then use the workid to load one of the works you searched for. To get more then the first 20 works, change the page number using
```py3
search.page = 2
```
## Session
A lot of actions you might want to take might require an AO3 account. If you already have one, you can access those actions using an AO3.Session object. You start by logging in using your username and password, and then you can use that object to access restricted content.
```py3
import AO3
session = AO3.Session("username", "password")
print(f"Bookmarks: {session.bookmarks}")
session.refresh_auth_token()
print(session.kudos(AO3.Work(18001499, load=False))
```
```
Bookmarks: 67
True
```
We successfully left kudos in a work and checked our bookmarks. The `session.refresh_auth_token()` is needed for some activities such as leaving kudos and comments. If it is expired or you forget to call this function, the error `AO3.utils.AuthError: Invalid authentication token. Try calling session.refresh_auth_token()` will be raised.
You can also comment / leave kudos in a work by calling `Work.leave_kudos()`/`Work.comment()` and provided you have instantiated that object with a session already (`AO3.Work(xxxxxx, session=sess)` or using `Work.set_session()`). This is probably the best way to do so because you will run into less authentication issues (as the work's authenticity token will be used instead).
If you would prefer to leave a comment or kudos anonymously, you can use an `AO3.GuestSession` in the same way you'd use a normal session, except you won't be able to check your bookmarks, subscriptions, etc. because you're not actually logged in.
## Comments
To retrieve and process comment threads, you might want to look at the `Work.get_comments()` method. It returns all the comments in a specific chapter and their respective threads. You can then process them however you want. Let's take a look:
```py3
from time import time
import AO3
work = AO3.Work(24560008)
work.load_chapters()
start = time()
comments = work.get_comments(5)
print(f"Loaded {len(comments)} comment threads in {round(time()-start, 1)} seconds\n")
for comment in comments:
print(f"Comment ID: {comment.id}\nReplies: {len(comment.get_thread())}")
```
```
Loaded 5 comment threads in 1.8 seconds
Comment ID: 312237184
Replies: 1
Comment ID: 312245032
Replies: 1
Comment ID: 312257098
Replies: 1
Comment ID: 312257860
Replies: 1
Comment ID: 312285673
Replies: 2
```
Loading comments takes a very long time so you should try and use it as little as possible. It also causes lots of requests to be sent to the AO3 servers, which might result in getting the error `utils.HTTPError: We are being rate-limited. Try again in a while or reduce the number of requests`. If that happens, you should try to space out your requests or reduce their number. There is also the option to enable request limiting using `AO3.utils.limit_requests()`, which make it so you can't make more than x requests in a certain time window.
You can also reply to comments using the `Comment.reply()` function, or delete one (if it's yours) using `Comment.delete()`.
## Extra
AO3.extra contains the the code to download some extra resources that are not core to the functionality of this package and don't change very often. One example would be the list of fandoms recognized by AO3.
To download a resource, simply use `AO3.extra.download(resource_name)`. To download every resource, you can use `AO3.extra.download_all()`. To see the list of available resources, use `AO3.extra.get_resources()`.
# Contact info
For information or bug reports, please create an issue or start a discussion.
# License
[MIT](https://choosealicense.com/licenses/mit/)
================================================
FILE: docs/index.md
================================================
# AO3 API
This is an unofficial python library that lets you access some of AO3's (archiveofourown.org) data using webscraping and some other tools.
__Documentation__
https://ao3-api.readthedocs.io
__Source code repository and issue tracker__
https://github.com/wendytg/ao3_api
__License__
[MIT](https://choosealicense.com/licenses/mit/)
================================================
FILE: docs/install.md
================================================
# Installation
You can install this package using pip
```pip install ao3_api```
or by cloning the repository and building it from source.
__Requirements__
- BeautifulSoup4
- Requests
- LXML
================================================
FILE: docs/use.md
================================================
# Usage
This package is divided in 9 core modules: works, chapters, users, series, search, session, comments, extra, and utils.
## Works
One of the most basic things you might want to do with this package is loading a work and checking its statistics and informations. To do that, you'll need the `AO3.Work` class.
We start by finding the _workid_ of the work we want to load. We do that either by using `AO3.utils.workid_from_url(url)` or by just looking at the url ourselves. Let's take a look:
```python
import AO3
url = "https://archiveofourown.org/works/14392692/chapters/33236241"
workid = AO3.utils.workid_from_url(url)
print(f"Work ID: {workid}")
work = AO3.Work(workid)
print(f"Chapters: {work.nchapters}")
```
After running this snippet, we get the output:
```
Work ID: 14392692
Chapters: 46
```
It's important to note that some works may not be accessible to guest users, and in this case you will get 0 chapters as an output, and the error `AO3.utils.AuthError: This work is only available to registered users of the Archive` if you try to load it. Nontheless, we can still do a lot more with this Work object: Lets try to get the first 20 words of the second chapter.
```python
import AO3
work = AO3.Work(14392692)
print(work.chapters[1].title) # Second chapter name
text = work.chapters[1].text # Second chapter text
print(' '.join(text.split(" ")[:20]))
```
```
What Branches Grow Meaning
December 27, 2018
Christmas sucked this year, and Shouto’s got the black eye to prove it.
Things had started out well enough,
```
The objects in work.chapters are of type `AO3.Chapter`. They have a lot of the same properties as a `Work` object would.
Another thing you can do with the work object is download the entire work as a pdf or e-book. At the moment you can download works as AZW3, EPUB, HTML, MOBI, and PDF files.
```python
import AO3
work = AO3.Work(14392692)
with open(f"{work.title}.pdf", "wb") as file:
file.write(work.download("PDF"))
```
__Advanced functionality__
Usually, when you call the constructor for the `Work` class, all info about it is loaded in the `__init__()` function. However, this process takes quite some time (~1-1.5 seconds) and if you want to load a list of works from a series, for example, you might be waiting for upwards of 30 seconds. To avoid this problem, the `Work.reload()` function, called on initialization, is a "threadable" function, which means that if you call it with the argument `threaded=True`, it will return a `Thread` object and work in parallel, meaning you can load multiple works at the same time. Let's take a look at an implementation:
```python
import AO3
import time
series = AO3.Series(1295090)
works = []
threads = []
start = time.time()
for work in series.work_list:
works.append(work)
threads.append(work.reload(threaded=True))
for thread in threads:
thread.join()
print(f"Loaded {len(works)} works in {round(time.time()-start, 1)} seconds.")
```
`Loaded 29 works in 2.2 seconds.`
The `load=False` inside the `Work` constructor makes sure we don't load the work as soon as we create an instance of the class. In the end, we iterate over every thread and wait for the last one to finish using `.join()`. Let's compare this method with the standard way of loading AO3 works:
```python
import AO3
import time
series = AO3.Series(1295090)
works = []
start = time.time()
for work in series.work_list:
work.reload()
works.append(work)
print(f"Loaded {len(works)} works in {round(time.time()-start, 1)} seconds.")
```
`Loaded 29 works in 21.6 seconds.`
As we can see, there is a significant performance increase. There are other functions in this package which have this functionality. To see if a function is "threadable", either use `hasattr(function, "_threadable")` or check its `__doc__` string.
To save even more time, if you're only interested in metadata, you can load a work with the `load_chapters` option set to False. Also, be aware that some functions (like `Series.work_list` or `Search.results`) might return semi-loaded `Work` objects. This means that no requests have been made to load this work (so you don't have access to chapter text, notes, etc...) but almost all of its metadata will already have been cached, and you might not need to call `Work.reload()` at all.
The last important information about the `Work` class is that most of its properties (like the number of bookmarks, kudos, the authors' names, etc...) are cached properties. That means that once you check them once, the value is stored and it won't ever change, even if those values change. To update these values, you will need to call `Work.reload()`. See the example below:
```python
import AO3
sess = AO3.GuestSession()
work = AO3.Work(16721367, sess)
print(work.kudos)
work.leave_kudos()
work.reload()
print(work.kudos)
```
```
392
393
```
## Users
Another useful thing you might want to do is get information on who wrote which works / comments. For that, we use the `AO3.User` class.
```python
import AO3
user = AO3.User("bothersomepotato")
print(user.url)
print(user.bio)
print(user.works) # Number of works published
```
```
https://archiveofourown.org/users/bothersomepotato
University student, opening documents to write essays but writing this stuff instead. No regrets though. My Tumblr, come chat with -or yell at- me if you feel like it! :)
2
```
## Search
To search for works, you can either use the `AO3.search()` function and parse the BeautifulSoup object returned yourself, or use the `AO3.Search` class to automatically do that for you
```python
import AO3
search = AO3.Search(any_field="Clarke Lexa", word_count=AO3.utils.Constraint(5000, 15000))
search.update()
print(search.total_results)
for result in search.results:
print(result)
```
```
3074
<Work [five times lexa falls for clarke]>
<Work [an incomplete list of reasons (why Clarke loves Lexa)]>
<Work [five times clarke and lexa aren’t sure if they're a couple or not]>
<Work [Chemistry]>
<Work [The New Commander (Lexa Joining Camp Jaha)]>
<Work [Ode to Clarke]>
<Work [it's always been (right in front of me)]>
<Work [The Girlfriend Tag]>
<Work [The After-Heda Chronicles]>
<Work [The Counter]>
<Work [May We Meet Again]>
<Work [No Filter]>
<Work [The Games We Play]>
<Work [A l'épreuve des balles]>
<Work [Celebration]>
<Work [Another level of fucked up]>
<Work [(Don't Ever Want to Tame) This Wild Heart]>
<Work [Self Control]>
<Work [Winter]>
<Work [My only wish]>
```
You can then use the workid to load one of the works you searched for. To get more then the first 20 works, change the page number using
```python
search.page = 2
```
## Session
A lot of actions you might want to take might require an AO3 account, and if you have one, you can get access to those actions using an AO3.Session object. You start by logging in using your username and password, and then you can use that object to access restricted content.
```python
import AO3
session = AO3.Session("username", "password")
print(f"Bookmarks: {session.bookmarks}")
session.refresh_auth_token()
print(session.kudos(AO3.Work(18001499, load=False))
```
```
Bookmarks: 67
True
```
We successfully left kudos in a work and checked our bookmarks. The `session.refresh_auth_token()` is needed for some activities such as leaving kudos and comments. If it is expired or you forget to call this function, the error `AO3.utils.AuthError: Invalid authentication token. Try calling session.refresh_auth_token()` will be raised.
You can also comment / leave kudos in a work by calling `Work.leave_kudos()`/`Work.comment()` and provided you have instantiated that object with a session already (`AO3.Work(xxxxxx, session=sess)` or using `Work.set_session()`). This is probably the best way to do so because you will run into less authentication issues (as the work's authenticity token will be used instead).
If you would prefer to leave a comment or kudos anonimously, you can use an `AO3.GuestSession` in the same way you'd use a normal session, except you won't be able to check your bookmarks, subscriptions, etc... because you're not actually logged in.
## Comments
To retrieve and process comment threads, you might want to look at the `Work.get_comments()` method. It returns all the comments in a specific chapter and their respective threads. You can then process them however you want. Let's take a look:
```python
from time import time
import AO3
work = AO3.Work(24560008)
work.load_chapters()
start = time()
comments = work.get_comments(5)
print(f"Loaded {len(comments)} comment threads in {round(time()-start, 1)} seconds\n")
for comment in comments:
print(f"Comment ID: {comment.id}\nReplies: {len(comment.get_thread())}")
```
```
Loaded 5 comment threads in 1.8 seconds
Comment ID: 312237184
Replies: 1
Comment ID: 312245032
Replies: 1
Comment ID: 312257098
Replies: 1
Comment ID: 312257860
Replies: 1
Comment ID: 312285673
Replies: 2
```
Loading comments takes a very long time so you should try and use it as little as possible. It also causes lots of requests to be sent to the AO3 servers, which might result in getting the error `utils.HTTPError: We are being rate-limited. Try again in a while or reduce the number of requests`. If it happens, you should try to space out your requests or reduce their number. There is also the option to enable request limiting using `AO3.utils.limit_requests()`, which make it so you can't make more than x requests in a certain time window.
You can also reply to comments using the `Comment.reply()` function, or delete one (if it's yours) using `Comment.delete()`.
## Extra
AO3.extra contains the the code to download some extra resources that are not core to the functionality of this package and don't change very often. One example would be the list of fandoms recognized by AO3.
To download a resource, simply use `AO3.extra.download(resource_name)`. To download every resource, you can use `AO3.extra.download_all()`. To see the list of available resources, `AO3.extra.get_resources()` will help you.
================================================
FILE: mkdocs.yml
================================================
site_name: AO3 API
docs_dir: docs
theme: readthedocs
nav:
- Home: index.md
- Installation: install.md
- Usage: use.md
================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "ao3-api"
version = "2.3.1"
authors = [
{ name="Wendy" },
]
description = "An unofficial AO3 (archiveofourown.org) API"
readme = "README.md"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
keywords = ["ao3", "fanfiction", "Archive of Our Own"]
dependencies = [
"BeautifulSoup4",
"lxml",
"requests"
]
[project.urls]
Homepage = "https://github.com/wendytg/ao3_api"
Issues = "https://github.com/wendytg/ao3_api/issues"
Documentation = "https://ao3-api.readthedocs.io/"
gitextract_gr15a5tc/ ├── AO3/ │ ├── __init__.py │ ├── chapters.py │ ├── comments.py │ ├── common.py │ ├── extra.py │ ├── requester.py │ ├── search.py │ ├── series.py │ ├── session.py │ ├── threadable.py │ ├── users.py │ ├── utils.py │ └── works.py ├── LICENSE ├── README.md ├── dist/ │ ├── ao3_api-2.0.0-py3-none-any.whl │ ├── ao3_api-2.0.1-py3-none-any.whl │ ├── ao3_api-2.0.2-py3-none-any.whl │ ├── ao3_api-2.0.3-py3-none-any.whl │ ├── ao3_api-2.0.4-py3-none-any.whl │ ├── ao3_api-2.0.5-py3-none-any.whl │ ├── ao3_api-2.0.6-py3-none-any.whl │ ├── ao3_api-2.0.7-py3-none-any.whl │ ├── ao3_api-2.0.8-py3-none-any.whl │ ├── ao3_api-2.1.0-py3-none-any.whl │ ├── ao3_api-2.1.1-py3-none-any.whl │ ├── ao3_api-2.1.2-py3-none-any.whl │ ├── ao3_api-2.2.0-py3-none-any.whl │ ├── ao3_api-2.2.1-py3-none-any.whl │ ├── ao3_api-2.3.0-py3-none-any.whl │ └── ao3_api-2.3.1-py3-none-any.whl ├── docs/ │ ├── index.md │ ├── install.md │ └── use.md ├── mkdocs.yml └── pyproject.toml
SYMBOL INDEX (268 symbols across 12 files)
FILE: AO3/chapters.py
class Chapter (line 12) | class Chapter:
method __init__ (line 17) | def __init__(self, chapterid, work, session=None, load=True):
method __repr__ (line 25) | def __repr__(self):
method __eq__ (line 33) | def __eq__(self, other):
method __getstate__ (line 36) | def __getstate__(self):
method __setstate__ (line 45) | def __setstate__(self, d):
method set_session (line 53) | def set_session(self, session):
method reload (line 63) | def reload(self):
method comment (line 89) | def comment(self, comment_text, email="", name="", pseud=None):
method get_comments (line 116) | def get_comments(self, maximum=None):
method get_images (line 181) | def get_images(self):
method loaded (line 202) | def loaded(self):
method authenticity_token (line 207) | def authenticity_token(self):
method work (line 212) | def work(self):
method text (line 217) | def text(self):
method title (line 231) | def title(self):
method number (line 244) | def number(self):
method words (line 251) | def words(self):
method summary (line 256) | def summary(self):
method start_notes (line 267) | def start_notes(self):
method end_notes (line 278) | def end_notes(self):
method url (line 289) | def url(self):
method request (line 298) | def request(self, url):
method get (line 312) | def get(self, *args, **kwargs):
FILE: AO3/comments.py
class Comment (line 10) | class Comment:
method __init__ (line 15) | def __init__(self, comment_id, parent=None, parent_comment=None, sessi...
method __repr__ (line 36) | def __repr__(self):
method _soup (line 40) | def _soup(self):
method first_parent_comment (line 48) | def first_parent_comment(self):
method fullwork (line 55) | def fullwork(self):
method author (line 62) | def author(self):
method text (line 73) | def text(self):
method get_thread (line 82) | def get_thread(self):
method _get_thread (line 125) | def _get_thread(self, parent, soup):
method get_thread_iterator (line 164) | def get_thread_iterator(self):
method reply (line 174) | def reply(self, comment_text, email="", name=""):
method reload (line 200) | def reload(self):
method delete (line 233) | def delete(self):
method get (line 245) | def get(self, *args, **kwargs):
function threadIterator (line 256) | def threadIterator(comment):
FILE: AO3/common.py
function __setifnotnone (line 6) | def __setifnotnone(obj, attr, value):
function get_work_from_banner (line 10) | def get_work_from_banner(work):
function url_join (line 159) | def url_join(base, *args):
FILE: AO3/extra.py
function _download_languages (line 12) | def _download_languages():
function _download_fandom (line 38) | def _download_fandom(fandom_key, name):
function download (line 116) | def download(resource):
function get_resources (line 133) | def get_resources():
function has_resource (line 141) | def has_resource(resource):
function download_all (line 147) | def download_all(redownload=False):
function download_all_threaded (line 158) | def download_all_threaded(redownload=False):
FILE: AO3/requester.py
class Requester (line 7) | class Requester:
method __init__ (line 10) | def __init__(self, rqtw=-1, timew=60):
method setRQTW (line 25) | def setRQTW(self, value):
method setTimeW (line 28) | def setTimeW(self, value):
method request (line 31) | def request(self, *args, **kwargs):
FILE: AO3/search.py
class Search (line 29) | class Search:
method __init__ (line 30) | def __init__(
method update (line 86) | def update(self):
function search (line 119) | def search(
FILE: AO3/series.py
class Series (line 13) | class Series:
method __init__ (line 14) | def __init__(self, seriesid, session=None, load=True):
method __eq__ (line 32) | def __eq__(self, other):
method __repr__ (line 35) | def __repr__(self):
method __getstate__ (line 41) | def __getstate__(self):
method __setstate__ (line 50) | def __setstate__(self, d):
method set_session (line 58) | def set_session(self, session):
method reload (line 68) | def reload(self):
method subscribe (line 84) | def subscribe(self):
method unsubscribe (line 98) | def unsubscribe(self):
method bookmark (line 114) | def bookmark(self, notes="", tags=None, collections=None, private=Fals...
method delete_bookmark (line 140) | def delete_bookmark(self):
method _bookmarkid (line 161) | def _bookmarkid(self):
method url (line 175) | def url(self):
method loaded (line 185) | def loaded(self):
method authenticity_token (line 190) | def authenticity_token(self):
method is_subscribed (line 200) | def is_subscribed(self):
method _sub_id (line 211) | def _sub_id(self):
method name (line 222) | def name(self):
method creators (line 227) | def creators(self):
method series_begun (line 232) | def series_begun(self):
method series_updated (line 245) | def series_updated(self):
method words (line 258) | def words(self):
method nworks (line 271) | def nworks(self):
method complete (line 284) | def complete(self):
method description (line 297) | def description(self):
method notes (line 311) | def notes(self):
method nbookmarks (line 325) | def nbookmarks(self):
method work_list (line 339) | def work_list(self):
method get (line 362) | def get(self, *args, **kwargs):
method request (line 373) | def request(self, url):
FILE: AO3/session.py
class GuestSession (line 16) | class GuestSession:
method __init__ (line 21) | def __init__(self):
method user (line 28) | def user(self):
method comment (line 32) | def comment(self, commentable, comment_text, oneshot=False, commentid=...
method kudos (line 58) | def kudos(self, work):
method refresh_auth_token (line 76) | def refresh_auth_token(self):
method get (line 102) | def get(self, *args, **kwargs):
method request (line 113) | def request(self, url):
method post (line 127) | def post(self, *args, **kwargs):
method __del__ (line 139) | def __del__(self):
class Session (line 142) | class Session(GuestSession):
method __init__ (line 147) | def __init__(self, username, password):
method __getstate__ (line 182) | def __getstate__(self):
method __setstate__ (line 191) | def __setstate__(self, d):
method clear_cache (line 199) | def clear_cache(self):
method _subscription_pages (line 208) | def _subscription_pages(self):
method get_work_subscriptions (line 221) | def get_work_subscriptions(self, use_threading=False):
method get_series_subscriptions (line 232) | def get_series_subscriptions(self, use_threading=False):
method get_user_subscriptions (line 243) | def get_user_subscriptions(self, use_threading=False):
method get_subscriptions (line 254) | def get_subscriptions(self, use_threading=False):
method load_subscriptions_threaded (line 272) | def load_subscriptions_threaded(self):
method _load_subscriptions (line 286) | def _load_subscriptions(self, page=1):
method _history_pages (line 325) | def _history_pages(self):
method get_history (line 338) | def get_history(self, hist_sleep=3, start_page=0, max_pages=None, time...
method _load_history (line 385) | def _load_history(self, page=1):
method _bookmark_pages (line 423) | def _bookmark_pages(self):
method get_bookmarks (line 436) | def get_bookmarks(self, use_threading=False):
method load_bookmarks_threaded (line 454) | def load_bookmarks_threaded(self):
method _load_bookmarks (line 468) | def _load_bookmarks(self, page=1):
method bookmarks (line 501) | def bookmarks(self):
method get_statistics (line 515) | def get_statistics(self, year=None):
method str_format (line 532) | def str_format(string):
method get_marked_for_later (line 544) | def get_marked_for_later(self, sleep=1, timeout_sleep=60):
FILE: AO3/threadable.py
function threadable (line 4) | def threadable(func):
class ThreadPool (line 20) | class ThreadPool:
method __init__ (line 21) | def __init__(self, maximum=None):
method add_task (line 26) | def add_task(self, task):
method start (line 30) | def start(self):
FILE: AO3/users.py
class User (line 12) | class User:
method __init__ (line 17) | def __init__(self, username, session=None, load=True):
method __repr__ (line 36) | def __repr__(self):
method __eq__ (line 39) | def __eq__(self, other):
method __getstate__ (line 42) | def __getstate__(self):
method __setstate__ (line 51) | def __setstate__(self, d):
method set_session (line 59) | def set_session(self, session):
method reload (line 69) | def reload(self):
method get_avatar (line 107) | def get_avatar(self):
method subscribe (line 121) | def subscribe(self):
method unsubscribe (line 135) | def unsubscribe(self):
method id (line 151) | def id(self):
method is_subscribed (line 156) | def is_subscribed(self):
method loaded (line 167) | def loaded(self):
method user_id (line 182) | def user_id(self):
method _sub_id (line 193) | def _sub_id(self):
method works (line 204) | def works(self):
method _works_pages (line 216) | def _works_pages(self):
method get_works (line 227) | def get_works(self, use_threading=False):
method load_works_threaded (line 245) | def load_works_threaded(self):
method _load_works (line 259) | def _load_works(self, page=1):
method bookmarks (line 271) | def bookmarks(self):
method _bookmarks_pages (line 283) | def _bookmarks_pages(self):
method get_bookmarks (line 294) | def get_bookmarks(self, use_threading=False):
method load_bookmarks_threaded (line 312) | def load_bookmarks_threaded(self):
method _load_bookmarks (line 326) | def _load_bookmarks(self, page=1):
method bio (line 339) | def bio(self):
method url (line 353) | def url(self):
method get (line 362) | def get(self, *args, **kwargs):
method request (line 373) | def request(self, url):
method str_format (line 388) | def str_format(string):
method work_pages (line 401) | def work_pages(self):
FILE: AO3/utils.py
class LoginError (line 16) | class LoginError(Exception):
method __init__ (line 17) | def __init__(self, message, errors=[]):
class UnloadedError (line 21) | class UnloadedError(Exception):
method __init__ (line 22) | def __init__(self, message, errors=[]):
class UnexpectedResponseError (line 26) | class UnexpectedResponseError(Exception):
method __init__ (line 27) | def __init__(self, message, errors=[]):
class InvalidIdError (line 31) | class InvalidIdError(Exception):
method __init__ (line 32) | def __init__(self, message, errors=[]):
class DownloadError (line 36) | class DownloadError(Exception):
method __init__ (line 37) | def __init__(self, message, errors=[]):
class AuthError (line 41) | class AuthError(Exception):
method __init__ (line 42) | def __init__(self, message, errors=[]):
class DuplicateCommentError (line 46) | class DuplicateCommentError(Exception):
method __init__ (line 47) | def __init__(self, message, errors=[]):
class PseudError (line 51) | class PseudError(Exception):
method __init__ (line 52) | def __init__(self, message, errors=[]):
class HTTPError (line 56) | class HTTPError(Exception):
method __init__ (line 57) | def __init__(self, message, errors=[]):
class BookmarkError (line 61) | class BookmarkError(Exception):
method __init__ (line 62) | def __init__(self, message, errors=[]):
class CollectError (line 66) | class CollectError(Exception):
method __init__ (line 67) | def __init__(self, message, errors=[]):
class Query (line 71) | class Query:
method __init__ (line 72) | def __init__(self):
method add_field (line 75) | def add_field(self, text):
method string (line 79) | def string(self):
class Constraint (line 83) | class Constraint:
method __init__ (line 87) | def __init__(self, lowerbound=0, upperbound=None):
method string (line 99) | def string(self):
method __str__ (line 115) | def __str__(self):
function word_count (line 118) | def word_count(text):
function set_rqtw (line 121) | def set_rqtw(value):
function set_timew (line 125) | def set_timew(value):
function limit_requests (line 129) | def limit_requests(limit=True):
function load_fandoms (line 136) | def load_fandoms():
function load_languages (line 154) | def load_languages():
function get_languages (line 172) | def get_languages():
function search_fandom (line 176) | def search_fandom(fandom_string):
function workid_from_url (line 200) | def workid_from_url(url):
function comment (line 220) | def comment(commentable, comment_text, session, fullwork=False, commenti...
function delete_comment (line 309) | def delete_comment(comment, session):
function kudos (line 347) | def kudos(work, session):
function subscribe (line 395) | def subscribe(subscribable, worktype, session, unsubscribe=False, subid=...
function bookmark (line 442) | def bookmark(bookmarkable, session=None, notes="", tags=None, collection...
function delete_bookmark (line 488) | def delete_bookmark(bookmarkid, session, auth_token=None):
function handle_bookmark_errors (line 508) | def handle_bookmark_errors(request):
function get_pseud_id (line 526) | def get_pseud_id(ao3object, session=None, specified_pseud=None):
function collect (line 553) | def collect(collectable, session, collections):
FILE: AO3/works.py
class Work (line 14) | class Work:
method __init__ (line 19) | def __init__(self, workid, session=None, load=True, load_chapters=True):
method __repr__ (line 39) | def __repr__(self):
method __eq__ (line 45) | def __eq__(self, other):
method __getstate__ (line 48) | def __getstate__(self):
method __setstate__ (line 57) | def __setstate__(self, d):
method reload (line 66) | def reload(self, load_chapters=True):
method set_session (line 86) | def set_session(self, session):
method load_chapters (line 95) | def load_chapters(self):
method get_images (line 125) | def get_images(self):
method download (line 145) | def download(self, filetype="PDF"):
method download_to_file (line 175) | def download_to_file(self, filename, filetype="PDF"):
method metadata (line 192) | def metadata(self):
method get_comments (line 251) | def get_comments(self, maximum=None):
method subscribe (line 314) | def subscribe(self):
method unsubscribe (line 328) | def unsubscribe(self):
method text (line 344) | def text(self):
method authenticity_token (line 354) | def authenticity_token(self):
method is_subscribed (line 364) | def is_subscribed(self):
method _sub_id (line 375) | def _sub_id(self):
method leave_kudos (line 386) | def leave_kudos(self):
method comment (line 404) | def comment(self, comment_text, email="", name="", pseud=None):
method bookmark (line 431) | def bookmark(self, notes="", tags=None, collections=None, private=Fals...
method delete_bookmark (line 457) | def delete_bookmark(self):
method collect (line 478) | def collect(self, collections):
method _bookmarkid (line 499) | def _bookmarkid(self):
method loaded (line 513) | def loaded(self):
method oneshot (line 518) | def oneshot(self):
method series (line 523) | def series(self):
method authors (line 541) | def authors(self):
method nchapters (line 562) | def nchapters(self):
method expected_chapters (line 575) | def expected_chapters(self):
method status (line 590) | def status(self):
method hits (line 600) | def hits(self):
method kudos (line 613) | def kudos(self):
method comments (line 626) | def comments(self):
method restricted (line 639) | def restricted(self):
method words (line 648) | def words(self):
method language (line 661) | def language(self):
method bookmarks (line 675) | def bookmarks(self):
method title (line 688) | def title(self):
method date_published (line 701) | def date_published(self):
method date_edited (line 712) | def date_edited(self):
method date_updated (line 726) | def date_updated(self):
method tags (line 739) | def tags(self):
method characters (line 754) | def characters(self):
method relationships (line 769) | def relationships(self):
method fandoms (line 784) | def fandoms(self):
method categories (line 799) | def categories(self):
method warnings (line 814) | def warnings(self):
method rating (line 829) | def rating(self):
method summary (line 843) | def summary(self):
method start_notes (line 859) | def start_notes(self):
method end_notes (line 870) | def end_notes(self):
method url (line 881) | def url(self):
method complete (line 891) | def complete(self):
method collections (line 903) | def collections(self):
method get (line 917) | def get(self, *args, **kwargs):
method request (line 928) | def request(self, url):
method str_format (line 945) | def str_format(string):
Condensed preview — 36 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (183K chars).
[
{
"path": "AO3/__init__.py",
"chars": 261,
"preview": "from . import extra, utils\r\nfrom .chapters import Chapter\r\nfrom .comments import Comment\r\nfrom .search import Search\r\nfr"
},
{
"path": "AO3/chapters.py",
"chars": 10635,
"preview": "from functools import cached_property\n\nimport bs4\nfrom bs4 import BeautifulSoup\n\nfrom . import threadable, utils\nfrom .c"
},
{
"path": "AO3/comments.py",
"chars": 9518,
"preview": "from functools import cached_property\n\nfrom bs4 import BeautifulSoup\n\nfrom . import threadable, utils\nfrom .requester im"
},
{
"path": "AO3/common.py",
"chars": 6159,
"preview": "import datetime\n\nfrom . import utils\n\n\ndef __setifnotnone(obj, attr, value):\n if value is not None:\n setattr(o"
},
{
"path": "AO3/extra.py",
"chars": 5795,
"preview": "import functools\nimport os\nimport pathlib\nimport pickle\n\nfrom bs4 import BeautifulSoup\n\nfrom . import threadable, utils\n"
},
{
"path": "AO3/requester.py",
"chars": 2551,
"preview": "import threading\nimport time\n\nimport requests\n\n\nclass Requester:\n \"\"\"Requester object\"\"\"\n \n def __init__(self, "
},
{
"path": "AO3/search.py",
"chars": 8696,
"preview": "from math import ceil\r\n\r\nfrom bs4 import BeautifulSoup\r\n\r\nfrom . import threadable, utils\r\nfrom .common import get_work_"
},
{
"path": "AO3/series.py",
"chars": 13481,
"preview": "from datetime import date\nfrom functools import cached_property\n\nfrom bs4 import BeautifulSoup\n\nfrom . import threadable"
},
{
"path": "AO3/session.py",
"chars": 21777,
"preview": "import datetime\r\nimport re\r\nimport time\r\nfrom functools import cached_property\r\n\r\nimport requests\r\nfrom bs4 import Beaut"
},
{
"path": "AO3/threadable.py",
"chars": 1068,
"preview": "import threading\n\n\ndef threadable(func):\n \"\"\"Allows the function to be ran as a thread using the 'threaded' argument\""
},
{
"path": "AO3/users.py",
"chars": 13456,
"preview": "import datetime\r\nfrom functools import cached_property\r\n\r\nimport requests\r\nfrom bs4 import BeautifulSoup\r\n\r\nfrom . impor"
},
{
"path": "AO3/utils.py",
"chars": 21780,
"preview": "import os\r\nimport pickle\r\nimport re\r\n\r\nfrom bs4 import BeautifulSoup\r\n\r\nfrom .requester import requester\r\nfrom .common i"
},
{
"path": "AO3/works.py",
"chars": 31546,
"preview": "import warnings\r\nfrom datetime import datetime\r\nfrom functools import cached_property\r\n\r\nfrom bs4 import BeautifulSoup\r\n"
},
{
"path": "LICENSE",
"chars": 1103,
"preview": "MIT License\r\n\r\nCopyright (c) 2019 Francisco Patrcio Rodrigues\r\n\r\nPermission is hereby granted, free of charge, to any pe"
},
{
"path": "README.md",
"chars": 10904,
"preview": "[](https://ao3-api.readthedocs.io"
},
{
"path": "docs/index.md",
"chars": 350,
"preview": "# AO3 API\n\nThis is an unofficial python library that lets you access some of AO3's (archiveofourown.org) data using webs"
},
{
"path": "docs/install.md",
"chars": 199,
"preview": "# Installation\n\nYou can install this package using pip\n\n```pip install ao3_api```\n\nor by cloning the repository and buil"
},
{
"path": "docs/use.md",
"chars": 10065,
"preview": "# Usage\n\nThis package is divided in 9 core modules: works, chapters, users, series, search, session, comments, extra, an"
},
{
"path": "mkdocs.yml",
"chars": 130,
"preview": "site_name: AO3 API\ndocs_dir: docs\ntheme: readthedocs\n\nnav:\n - Home: index.md\n - Installation: install.md\n - Usa"
},
{
"path": "pyproject.toml",
"chars": 714,
"preview": "[build-system]\nrequires = [\"setuptools>=61.0\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[project]\nname = \"ao3-api\"\nversi"
}
]
// ... and 16 more files (download for full content)
About this extraction
This page contains the full source code of the ArmindoFlores/ao3_api GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 36 files (166.2 KB), approximately 38.1k tokens, and a symbol index with 268 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.