Repository: goenning/google-indexing-script
Branch: main
Commit: c52c2f8c67ad
Files: 22
Total size: 37.8 KB
Directory structure:
gitextract_8h6dyk7u/
├── .changeset/
│ └── config.json
├── .github/
│ ├── pull_request_template.md
│ └── workflows/
│ ├── ci.yml
│ └── release.yml
├── .gitignore
├── .nvmrc
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── package.json
├── src/
│ ├── bin.ts
│ ├── cli.ts
│ ├── index.ts
│ └── shared/
│ ├── auth.ts
│ ├── gsc.ts
│ ├── index.ts
│ ├── sitemap.ts
│ ├── types.ts
│ └── utils.ts
├── tsconfig.json
└── tsup.config.ts
================================================
FILE CONTENTS
================================================
================================================
FILE: .changeset/config.json
================================================
{
"$schema": "https://unpkg.com/@changesets/config@3.0.0/schema.json",
"commit": false,
"fixed": [["google-indexing-script"]],
"changelog": [
"@changesets/changelog-github",
{ "repo": "goenning/google-indexing-script" }
],
"linked": [],
"access": "public",
"baseBranch": "main",
"updateInternalDependencies": "patch"
}
================================================
FILE: .github/pull_request_template.md
================================================
**What did I change?**
**Why did I change it?**
================================================
FILE: .github/workflows/ci.yml
================================================
name: CI
on: [push]
jobs:
build:
name: Build, lint, and test on Node ${{ matrix.node }} and ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
node: ["18.x", "20.x"]
os: [ubuntu-latest]
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Use Node ${{ matrix.node }}
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node }}
cache: "npm"
- name: Install Dependencies
run: npm install
- name: Build
run: npm run build
================================================
FILE: .github/workflows/release.yml
================================================
name: Release
on:
push:
branches:
- main
jobs:
release:
if: github.repository == 'goenning/google-indexing-script'
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Use Node
uses: actions/setup-node@v4
with:
cache: "npm"
- name: Install Dependencies
run: npm install
- name: Build
run: npm run build
- name: Create Release Pull Request or Publish to npm
uses: changesets/action@v1
with:
publish: npm run release
version: npm run version
commit: "release version"
title: "release version"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
================================================
FILE: .gitignore
================================================
service_account.json
.cache
node_modules
.vscode
dist
================================================
FILE: .nvmrc
================================================
20
================================================
FILE: CHANGELOG.md
================================================
# google-indexing-script
## 0.4.0
### Minor Changes
- [#68](https://github.com/goenning/google-indexing-script/pull/68) [`caa73f7`](https://github.com/goenning/google-indexing-script/commit/caa73f765b5d494d65a894a83bb8faf351e6d8ae) Thanks [@AntoineKM](https://github.com/AntoineKM)! - Improve CLI with commander
## 0.3.0
### Minor Changes
- [#65](https://github.com/goenning/google-indexing-script/pull/65) [`e0c31f8`](https://github.com/goenning/google-indexing-script/commit/e0c31f837acfe2083843436050b40f21f3806838) Thanks [@AntoineKM](https://github.com/AntoineKM)! - Add custom URLs option
### Patch Changes
- [#65](https://github.com/goenning/google-indexing-script/pull/65) [`e0c31f8`](https://github.com/goenning/google-indexing-script/commit/e0c31f837acfe2083843436050b40f21f3806838) Thanks [@AntoineKM](https://github.com/AntoineKM)! - Fix siteUrls convertions
## 0.2.0
### Minor Changes
- [#62](https://github.com/goenning/google-indexing-script/pull/62) [`93dd956`](https://github.com/goenning/google-indexing-script/commit/93dd956dca4065b97d6076db772560fba57aec50) Thanks [@hasanafzal8485](https://github.com/hasanafzal8485)! - Don't want the same URL use my API limit again until his previous cache limit is completed
## 0.1.0
### Minor Changes
- [#55](https://github.com/goenning/google-indexing-script/pull/55) [`908938a`](https://github.com/goenning/google-indexing-script/commit/908938a701d964b75331e322fbea8d77e6db976e) Thanks [@AntoineKM](https://github.com/AntoineKM)! - feat(get-publish-metadata): optional retries if rate limited
## 0.0.5
### Patch Changes
- [#44](https://github.com/goenning/google-indexing-script/pull/44) [`77b94ed`](https://github.com/goenning/google-indexing-script/commit/77b94edeef863721c07bd3e12d6d38052723f422) Thanks [@AntoineKM](https://github.com/AntoineKM)! - Add site url checker
## 0.0.4
### Patch Changes
- [#40](https://github.com/goenning/google-indexing-script/pull/40) [`074f2c7`](https://github.com/goenning/google-indexing-script/commit/074f2c7ebbafff3a03ebf07baf7b21922a98698d) Thanks [@AntoineKM](https://github.com/AntoineKM)! - Add documentation comments
## 0.0.3
### Patch Changes
- [#39](https://github.com/goenning/google-indexing-script/pull/39) [`9467e82`](https://github.com/goenning/google-indexing-script/commit/9467e82496170aeaa42ecd8ab6b8de4ba8f8315f) Thanks [@AntoineKM](https://github.com/AntoineKM)! - Fix index function to handle options passed
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Google Indexing Script
Before jumping into a PR be sure to search [existing PRs](/goenning/google-indexing-script/pulls) or [issues](/goenning/google-indexing-script/issues) for an open or closed item that relates to your submission.
# Developing
All pull requests should be opened against `main`.
1. Clone the repository
```bash
git clone https://github.com/goenning/google-indexing-script.git
```
2. Install dependencies
```bash
npm install
```
3. Install the cli globally
```bash
npm install -g .
```
4. Run the development bundle
```bash
npm run dev
```
5. See how to [use it](/README.md#installation) and make your changes !
# Building
After making your changes, you can build the project with the following command:
```bash
npm run build
```
# Pull Request
1. Make sure your code is formatted with `prettier`
2. Make sure your code passes the tests
3. Make sure you added the changes with `npm run changeset`
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2024 Guilherme Oenning
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# Google Indexing Script
Use this script to get your entire site indexed on Google in less than 48 hours. No tricks, no hacks, just a simple script and a Google API.
> [!IMPORTANT]
>
> 1. This script uses [Google Indexing API](https://developers.google.com/search/apis/indexing-api/v3/quickstart) and it only works on pages with either `JobPosting` or `BroadcastEvent` structured data.
> 2. Indexing != Ranking. This will not help your page rank on Google, it'll just let Google know about the existence of your pages.
## Requirements
- Install [Node.js](https://nodejs.org/en/download)
- An account on [Google Search Console](https://search.google.com/search-console/about) with the verified sites you want to index
- An account on [Google Cloud](https://console.cloud.google.com/)
## Preparation
1. Follow this [guide](https://developers.google.com/search/apis/indexing-api/v3/prereqs) from Google. By the end of it, you should have a project on Google Cloud with the Indexing API enabled, a service account with the `Owner` permission on your sites.
2. Make sure you enable both [`Google Search Console API`](https://console.cloud.google.com/apis/api/searchconsole.googleapis.com) and [`Web Search Indexing API`](https://console.cloud.google.com/apis/api/indexing.googleapis.com) on your [Google Project ➤ API Services ➤ Enabled API & Services](https://console.cloud.google.com/apis/dashboard).
3. [Download the JSON](https://github.com/goenning/google-indexing-script/issues/2) file with the credentials of your service account and save it in the same folder as the script. The file should be named `service_account.json`
## Installation
### Using CLI
Install the cli globally on your machine.
```bash
npm i -g google-indexing-script
```
### Using the repository
Clone the repository to your machine.
```bash
git clone https://github.com/goenning/google-indexing-script.git
cd google-indexing-script
```
Install and build the project.
```bash
npm install
npm run build
npm i -g .
```
> [!NOTE]
> Ensure you are using an up-to-date Node.js version, with a preference for v20 or later. Check your current version with `node -v`.
## Usage
With service_account.json (recommended)
Create a `.gis` directory in your home folder and move the `service_account.json` file there.
```bash
mkdir ~/.gis
mv service_account.json ~/.gis
```
Run the script with the domain or url you want to index.
```bash
gis
# example
gis seogets.com
```
Here are some other ways to run the script:
```bash
# custom path to service_account.json
gis seogets.com --path /path/to/service_account.json
# long version command
google-indexing-script seogets.com
# cloned repository
npm run index seogets.com
```
With environment variables
Open `service_account.json` and copy the `client_email` and `private_key` values.
Run the script with the domain or url you want to index.
```bash
GIS_CLIENT_EMAIL=your-client-email GIS_PRIVATE_KEY=your-private-key gis seogets.com
```
With arguments (not recommended)
Open `service_account.json` and copy the `client_email` and `private_key` values.
Once you have the values, run the script with the domain or url you want to index, the client email and the private key.
```bash
gis seogets.com --client-email your-client-email --private-key your-private-key
```
As a npm module
You can also use the script as a [npm module](https://www.npmjs.com/package/google-indexing-script) in your own project.
```bash
npm i google-indexing-script
```
```javascript
import { index } from "google-indexing-script";
import serviceAccount from "./service_account.json";
index("seogets.com", {
client_email: serviceAccount.client_email,
private_key: serviceAccount.private_key,
})
.then(console.log)
.catch(console.error);
```
Read the [API documentation](https://jsdocs.io/package/google-indexing-script) for more details.
Here's an example of what you should expect:

> [!IMPORTANT]
>
> - Your site must have 1 or more sitemaps submitted to Google Search Console. Otherwise, the script will not be able to find the pages to index.
> - You can run the script as many times as you want. It will only index the pages that are not already indexed.
> - Sites with a large number of pages might take a while to index, be patient.
## Quota
Depending on your account several quotas are configured for the API (see [docs](https://developers.google.com/search/apis/indexing-api/v3/quota-pricing#quota)). By default the script exits as soon as the rate limit is exceeded. You can configure a retry mechanism for the read requests that apply on a per minute time frame.
With environment variables
```bash
export GIS_QUOTA_RPM_RETRY=true
```
As a npm module
```javascript
import { index } from 'google-indexing-script'
import serviceAccount from './service_account.json'
index('seogets.com', {
client_email: serviceAccount.client_email,
private_key: serviceAccount.private_key
quota: {
rpmRetry: true
}
})
.then(console.log)
.catch(console.error)
```
## 📄 License
MIT License
## 💖 Sponsor
This project is sponsored by [SEO Gets](https://seogets.com)

================================================
FILE: package.json
================================================
{
"name": "google-indexing-script",
"description": "Script to get your site indexed on Google in less than 48 hours",
"version": "0.4.0",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"bin": {
"google-indexing-script": "./dist/bin.js",
"gis": "./dist/bin.js"
},
"keywords": [
"google",
"indexing",
"search-console",
"sitemap",
"seo",
"google-search",
"cli",
"typescript"
],
"license": "MIT",
"scripts": {
"index": "ts-node ./src/cli.ts",
"build": "tsup",
"dev": "tsup --watch",
"changeset": "changeset",
"version": "changeset version",
"release": "changeset publish"
},
"dependencies": {
"commander": "^12.1.0",
"googleapis": "131.0.0",
"picocolors": "^1.0.1",
"sitemapper": "3.2.8"
},
"prettier": {
"printWidth": 120
},
"devDependencies": {
"@changesets/changelog-github": "^0.5.0",
"@changesets/cli": "^2.27.1",
"ts-node": "^10.9.2",
"tsup": "^8.0.2",
"typescript": "^5.3.3"
}
}
================================================
FILE: src/bin.ts
================================================
#!/usr/bin/env node
require("./cli");
================================================
FILE: src/cli.ts
================================================
import { index } from ".";
import { Command } from "commander";
import packageJson from "../package.json";
import { green } from "picocolors";
const program = new Command(packageJson.name);
program
.alias("gis")
.version(packageJson.version, "-v, --version", "Output the current version.")
.description(packageJson.description)
.argument("[input]")
.usage(`${green("[input]")} [options]`)
.helpOption("-h, --help", "Output usage information.")
.option("-c, --client-email ", "The client email for the Google service account.")
.option("-k, --private-key ", "The private key for the Google service account.")
.option("-p, --path ", "The path to the Google service account credentials file.")
.option("-u, --urls ", "A comma-separated list of URLs to index.")
.option("--rpm-retry", "Retry when the rate limit is exceeded.")
.action((input, options) => {
index(input, {
client_email: options.clientEmail,
private_key: options.privateKey,
path: options.path,
urls: options.urls ? options.urls.split(",") : undefined,
quota: {
rpmRetry: options.rpmRetry,
},
});
})
.parse(process.argv);
================================================
FILE: src/index.ts
================================================
import { getAccessToken } from "./shared/auth";
import {
convertToSiteUrl,
getPublishMetadata,
requestIndexing,
getEmojiForStatus,
getPageIndexingStatus,
convertToFilePath,
checkSiteUrl,
checkCustomUrls,
} from "./shared/gsc";
import { getSitemapPages } from "./shared/sitemap";
import { Status } from "./shared/types";
import { batch } from "./shared/utils";
import { readFileSync, existsSync, mkdirSync, writeFileSync } from "fs";
import path from "path";
const CACHE_TIMEOUT = 1000 * 60 * 60 * 24 * 14; // 14 days
export const QUOTA = {
rpm: {
retries: 3,
waitingTime: 60000, // 1 minute
},
};
export type IndexOptions = {
client_email?: string;
private_key?: string;
path?: string;
urls?: string[];
quota?: {
rpmRetry?: boolean; // read requests per minute: retry after waiting time
};
};
/**
* Indexes the specified domain or site URL.
* @param input - The domain or site URL to index.
* @param options - (Optional) Additional options for indexing.
*/
export const index = async (input: string = process.argv[2], options: IndexOptions = {}) => {
if (!input) {
console.error("❌ Please provide a domain or site URL as the first argument.");
console.error("");
process.exit(1);
}
if (!options.client_email) {
options.client_email = process.env.GIS_CLIENT_EMAIL;
}
if (!options.private_key) {
options.private_key = process.env.GIS_PRIVATE_KEY;
}
if (!options.path) {
options.path = process.env.GIS_PATH;
}
if (!options.urls) {
options.urls = process.env.GIS_URLS ? process.env.GIS_URLS.split(",") : undefined;
}
if (!options.quota) {
options.quota = {
rpmRetry: process.env.GIS_QUOTA_RPM_RETRY === "true",
};
}
const accessToken = await getAccessToken(options.client_email, options.private_key, options.path);
let siteUrl = convertToSiteUrl(input);
console.log(`🔎 Processing site: ${siteUrl}`);
const cachePath = path.join(".cache", `${convertToFilePath(siteUrl)}.json`);
if (!accessToken) {
console.error("❌ Failed to get access token, check your service account credentials.");
console.error("");
process.exit(1);
}
siteUrl = await checkSiteUrl(accessToken, siteUrl);
let pages = options.urls || [];
if (pages.length === 0) {
console.log(`🔎 Fetching sitemaps and pages...`);
const [sitemaps, pagesFromSitemaps] = await getSitemapPages(accessToken, siteUrl);
if (sitemaps.length === 0) {
console.error("❌ No sitemaps found, add them to Google Search Console and try again.");
console.error("");
process.exit(1);
}
pages = pagesFromSitemaps;
console.log(`👉 Found ${pages.length} URLs in ${sitemaps.length} sitemap`);
} else {
pages = checkCustomUrls(siteUrl, pages);
console.log(`👉 Found ${pages.length} URLs in the provided list`);
}
const statusPerUrl: Record = existsSync(cachePath)
? JSON.parse(readFileSync(cachePath, "utf8"))
: {};
const pagesPerStatus: Record = {
[Status.SubmittedAndIndexed]: [],
[Status.DuplicateWithoutUserSelectedCanonical]: [],
[Status.CrawledCurrentlyNotIndexed]: [],
[Status.DiscoveredCurrentlyNotIndexed]: [],
[Status.PageWithRedirect]: [],
[Status.URLIsUnknownToGoogle]: [],
[Status.RateLimited]: [],
[Status.Forbidden]: [],
[Status.Error]: [],
};
const indexableStatuses = [
Status.DiscoveredCurrentlyNotIndexed,
Status.CrawledCurrentlyNotIndexed,
Status.URLIsUnknownToGoogle,
Status.Forbidden,
Status.Error,
Status.RateLimited,
];
const shouldRecheck = (status: Status, lastCheckedAt: string) => {
const shouldIndexIt = indexableStatuses.includes(status);
const isOld = new Date(lastCheckedAt) < new Date(Date.now() - CACHE_TIMEOUT);
return shouldIndexIt && isOld;
};
await batch(
async (url) => {
let result = statusPerUrl[url];
if (!result || shouldRecheck(result.status, result.lastCheckedAt)) {
const status = await getPageIndexingStatus(accessToken, siteUrl, url);
result = { status, lastCheckedAt: new Date().toISOString() };
statusPerUrl[url] = result;
}
pagesPerStatus[result.status] = pagesPerStatus[result.status] ? [...pagesPerStatus[result.status], url] : [url];
},
pages,
50,
(batchIndex, batchCount) => {
console.log(`📦 Batch ${batchIndex + 1} of ${batchCount} complete`);
}
);
console.log(``);
console.log(`👍 Done, here's the status of all ${pages.length} pages:`);
mkdirSync(".cache", { recursive: true });
writeFileSync(cachePath, JSON.stringify(statusPerUrl, null, 2));
for (const status of Object.keys(pagesPerStatus)) {
const pages = pagesPerStatus[status as Status];
if (pages.length === 0) continue;
console.log(`• ${getEmojiForStatus(status as Status)} ${status}: ${pages.length} pages`);
}
console.log("");
const indexablePages = Object.entries(pagesPerStatus).flatMap(([status, pages]) =>
indexableStatuses.includes(status as Status) ? pages : []
);
if (indexablePages.length === 0) {
console.log(`✨ There are no pages that can be indexed. Everything is already indexed!`);
} else {
console.log(`✨ Found ${indexablePages.length} pages that can be indexed.`);
indexablePages.forEach((url) => console.log(`• ${url}`));
}
console.log(``);
for (const url of indexablePages) {
console.log(`📄 Processing url: ${url}`);
const status = await getPublishMetadata(accessToken, url, {
retriesOnRateLimit: options.quota.rpmRetry ? QUOTA.rpm.retries : 0,
});
if (status === 404) {
await requestIndexing(accessToken, url);
console.log("🚀 Indexing requested successfully. It may take a few days for Google to process it.");
} else if (status < 400) {
console.log(`🕛 Indexing already requested previously. It may take a few days for Google to process it.`);
}
console.log(``);
}
console.log(`👍 All done!`);
console.log(`💖 Brought to you by https://seogets.com - SEO Analytics.`);
console.log(``);
};
export * from "./shared";
================================================
FILE: src/shared/auth.ts
================================================
import { google } from "googleapis";
import fs from "fs";
import path from "path";
import os from "os";
/**
* Retrieves an access token for Google APIs using service account credentials.
* @param client_email - The client email of the service account.
* @param private_key - The private key of the service account.
* @param customPath - (Optional) Custom path to the service account JSON file.
* @returns The access token.
*/
export async function getAccessToken(client_email?: string, private_key?: string, customPath?: string) {
if (!client_email && !private_key) {
const filePath = "service_account.json";
const filePathFromHome = path.join(os.homedir(), ".gis", "service_account.json");
const isFile = fs.existsSync(filePath);
const isFileFromHome = fs.existsSync(filePathFromHome);
const isCustomFile = !!customPath && fs.existsSync(customPath);
if (!isFile && !isFileFromHome && !isCustomFile) {
console.error(`❌ ${filePath} not found, please follow the instructions in README.md`);
console.error("");
process.exit(1);
}
const key = JSON.parse(
fs.readFileSync(!!customPath && isCustomFile ? customPath : isFile ? filePath : filePathFromHome, "utf8")
);
client_email = key.client_email;
private_key = key.private_key;
} else {
if (!client_email) {
console.error("❌ Missing client_email in service account credentials.");
console.error("");
process.exit(1);
}
if (!private_key) {
console.error("❌ Missing private_key in service account credentials.");
console.error("");
process.exit(1);
}
}
const jwtClient = new google.auth.JWT(
client_email,
undefined,
private_key,
["https://www.googleapis.com/auth/webmasters.readonly", "https://www.googleapis.com/auth/indexing"],
undefined
);
const tokens = await jwtClient.authorize();
return tokens.access_token;
}
================================================
FILE: src/shared/gsc.ts
================================================
import { webmasters_v3 } from "googleapis";
import { QUOTA } from "..";
import { Status } from "./types";
import { fetchRetry } from "./utils";
/**
* Converts a given input string to a valid Google Search Console site URL format.
* @param input - The input string to be converted.
* @returns The converted site URL (domain.com or https://domain.com/)
*/
export function convertToSiteUrl(input: string) {
if (input.startsWith("http://") || input.startsWith("https://")) {
return input.endsWith("/") ? input : `${input}/`;
}
return `sc-domain:${input}`;
}
/**
* Converts a given file path to a formatted version suitable for use as a file name.
* @param path - The url to be converted as a file name
* @returns The converted file path
*/
export function convertToFilePath(path: string) {
return path.replace("http://", "http_").replace("https://", "https_").replaceAll("/", "_");
}
/**
* Converts an HTTP URL to a sc-domain URL format.
* @param httpUrl The HTTP URL to be converted.
* @returns The sc-domain formatted URL.
*/
export function convertToSCDomain(httpUrl: string) {
return `sc-domain:${httpUrl.replace("http://", "").replace("https://", "").replace("/", "")}`;
}
/**
* Converts a domain to an HTTP URL.
* @param domain The domain to be converted.
* @returns The HTTP URL.
*/
export function convertToHTTP(domain: string) {
return `http://${domain}/`;
}
/**
* Converts a domain to an HTTPS URL.
* @param domain The domain to be converted.
* @returns The HTTPS URL.
*/
export function convertToHTTPS(domain: string) {
return `https://${domain}/`;
}
/**
* Retrieves a list of sites associated with the specified service account from the Google Webmasters API.
* @param accessToken - The access token for authentication.
* @returns An array containing the site URLs associated with the service account.
*/
export async function getSites(accessToken: string) {
const sitesResponse = await fetchRetry("https://www.googleapis.com/webmasters/v3/sites", {
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
});
if (sitesResponse.status === 403) {
console.error("🔐 This service account doesn't have access to any sites.");
return [];
}
const sitesBody: webmasters_v3.Schema$SitesListResponse = await sitesResponse.json();
if (!sitesBody.siteEntry) {
console.error("❌ No sites found, add them to Google Search Console and try again.");
return [];
}
return sitesBody.siteEntry.map((x) => x.siteUrl);
}
/**
* Checks if the site URL is valid and accessible by the service account.
* @param accessToken - The access token for authentication.
* @param siteUrl - The URL of the site to check.
* @returns The corrected URL if found, otherwise the original site URL.
*/
export async function checkSiteUrl(accessToken: string, siteUrl: string) {
const sites = await getSites(accessToken);
let formattedUrls: string[] = [];
// Convert the site URL into all possible formats
if (siteUrl.startsWith("https://")) {
formattedUrls.push(siteUrl);
formattedUrls.push(convertToHTTP(siteUrl.replace("https://", "")));
formattedUrls.push(convertToSCDomain(siteUrl));
} else if (siteUrl.startsWith("http://")) {
formattedUrls.push(siteUrl);
formattedUrls.push(convertToHTTPS(siteUrl.replace("http://", "")));
formattedUrls.push(convertToSCDomain(siteUrl));
} else if (siteUrl.startsWith("sc-domain:")) {
formattedUrls.push(siteUrl);
formattedUrls.push(convertToHTTP(siteUrl.replace("sc-domain:", "")));
formattedUrls.push(convertToHTTPS(siteUrl.replace("sc-domain:", "")));
} else {
console.error("❌ Unknown site URL format.");
console.error("");
process.exit(1);
}
// Check if any of the formatted URLs are accessible
for (const formattedUrl of formattedUrls) {
if (sites.includes(formattedUrl)) {
return formattedUrl;
}
}
// If none of the formatted URLs are accessible
console.error("❌ This service account doesn't have access to this site.");
console.error("");
process.exit(1);
}
/**
* Checks if the given URLs are valid.
* @param siteUrl - The URL of the site.
* @param urls - The URLs to check.
* @returns An array containing the corrected URLs if found, otherwise the original URLs
*/
export function checkCustomUrls(siteUrl: string, urls: string[]) {
const protocol = siteUrl.startsWith("http://") ? "http://" : "https://";
const domain = siteUrl.replace("https://", "").replace("http://", "").replace("sc-domain:", "");
const formattedUrls: string[] = urls.map((url) => {
url = url.trim();
if (url.startsWith("/")) {
// the url is a relative path (e.g. /about)
return `${protocol}${domain}${url}`;
} else if (url.startsWith("http://") || url.startsWith("https://")) {
// the url is already a full url (e.g. https://domain.com/about)
return url;
} else if (url.startsWith(domain)) {
// the url is a full url without the protocol (e.g. domain.com/about)
return `${protocol}${url}`;
} else {
// the url is a relative path without the leading slash (e.g. about)
return `${protocol}${domain}/${url}`;
}
});
return formattedUrls;
}
/**
* Retrieves the indexing status of a page.
* @param accessToken - The access token for authentication.
* @param siteUrl - The URL of the site.
* @param inspectionUrl - The URL of the page to inspect.
* @returns A promise resolving to the status of indexing.
*/
export async function getPageIndexingStatus(
accessToken: string,
siteUrl: string,
inspectionUrl: string
): Promise {
try {
const response = await fetchRetry(`https://searchconsole.googleapis.com/v1/urlInspection/index:inspect`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
body: JSON.stringify({
inspectionUrl,
siteUrl,
}),
});
if (response.status === 403) {
console.error(`🔐 This service account doesn't have access to this site.`);
console.error(await response.text());
return Status.Forbidden;
}
if (response.status >= 300) {
if (response.status === 429) {
return Status.RateLimited;
} else {
console.error(`❌ Failed to get indexing status.`);
console.error(`Response was: ${response.status}`);
console.error(await response.text());
return Status.Error;
}
}
const body = await response.json();
return body.inspectionResult.indexStatusResult.coverageState;
} catch (error) {
console.error(`❌ Failed to get indexing status.`);
console.error(`Error was: ${error}`);
throw error;
}
}
/**
* Retrieves an emoji representation corresponding to the given status.
* @param status - The status for which to retrieve the emoji.
* @returns The emoji representing the status.
*/
export function getEmojiForStatus(status: Status) {
switch (status) {
case Status.SubmittedAndIndexed:
return "✅";
case Status.DuplicateWithoutUserSelectedCanonical:
return "😵";
case Status.CrawledCurrentlyNotIndexed:
case Status.DiscoveredCurrentlyNotIndexed:
return "👀";
case Status.PageWithRedirect:
return "🔀";
case Status.URLIsUnknownToGoogle:
return "❓";
case Status.RateLimited:
return "🚦";
default:
return "❌";
}
}
/**
* Retrieves metadata for publishing from the given URL.
* @param accessToken - The access token for authentication.
* @param url - The URL for which to retrieve metadata.
* @param options - The options for the request.
* @returns The status of the request.
*/
export async function getPublishMetadata(accessToken: string, url: string, options?: { retriesOnRateLimit: number }) {
const response = await fetchRetry(
`https://indexing.googleapis.com/v3/urlNotifications/metadata?url=${encodeURIComponent(url)}`,
{
method: "GET",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
}
);
if (response.status === 403) {
console.error(`🔐 This service account doesn't have access to this site.`);
console.error(`Response was: ${response.status}`);
console.error(await response.text());
}
if (response.status === 429) {
if (options?.retriesOnRateLimit && options?.retriesOnRateLimit > 0) {
const RPM_WATING_TIME = (QUOTA.rpm.retries - options.retriesOnRateLimit + 1) * QUOTA.rpm.waitingTime; // increase waiting time for each retry
console.log(
`🚦 Rate limit exceeded for read requests. Retries left: ${options.retriesOnRateLimit}. Waiting for ${
RPM_WATING_TIME / 1000
}sec.`
);
await new Promise((resolve) => setTimeout(resolve, RPM_WATING_TIME));
await getPublishMetadata(accessToken, url, { retriesOnRateLimit: options.retriesOnRateLimit - 1 });
} else {
console.error("🚦 Rate limit exceeded, try again later.");
console.error("");
console.error(" Quota: https://developers.google.com/search/apis/indexing-api/v3/quota-pricing#quota");
console.error(" Usage: https://console.cloud.google.com/apis/enabled");
console.error("");
process.exit(1);
}
}
if (response.status >= 500) {
console.error(`❌ Failed to get publish metadata.`);
console.error(`Response was: ${response.status}`);
console.error(await response.text());
}
return response.status;
}
/**
* Requests indexing for the given URL.
* @param accessToken - The access token for authentication.
* @param url - The URL to be indexed.
*/
export async function requestIndexing(accessToken: string, url: string) {
const response = await fetchRetry("https://indexing.googleapis.com/v3/urlNotifications:publish", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
body: JSON.stringify({
url: url,
type: "URL_UPDATED",
}),
});
if (response.status === 403) {
console.error(`🔐 This service account doesn't have access to this site.`);
console.error(`Response was: ${response.status}`);
}
if (response.status >= 300) {
if (response.status === 429) {
console.error("🚦 Rate limit exceeded, try again later.");
console.error("");
console.error(" Quota: https://developers.google.com/search/apis/indexing-api/v3/quota-pricing#quota");
console.error(" Usage: https://console.cloud.google.com/apis/enabled");
console.error("");
process.exit(1);
} else {
console.error(`❌ Failed to request indexing.`);
console.error(`Response was: ${response.status}`);
console.error(await response.text());
}
}
}
================================================
FILE: src/shared/index.ts
================================================
export * from "./auth";
export * from "./gsc";
export * from "./sitemap";
export * from "./types";
export * from "./utils";
================================================
FILE: src/shared/sitemap.ts
================================================
import Sitemapper from "sitemapper";
import { fetchRetry } from "./utils";
import { webmasters_v3 } from "googleapis";
/**
* Retrieves a list of sitemaps associated with the specified site URL from the Google Webmasters API.
* @param accessToken The access token for authentication.
* @param siteUrl The URL of the site for which to retrieve the list of sitemaps.
* @returns An array containing the paths of the sitemaps associated with the site URL.
*/
async function getSitemapsList(accessToken: string, siteUrl: string) {
const url = `https://www.googleapis.com/webmasters/v3/sites/${encodeURIComponent(siteUrl)}/sitemaps`;
const response = await fetchRetry(url, {
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${accessToken}`,
},
});
if (response.status === 403) {
console.error(`🔐 This service account doesn't have access to this site.`);
return [];
}
if (response.status >= 300) {
console.error(`❌ Failed to get list of sitemaps.`);
console.error(`Response was: ${response.status}`);
console.error(await response.text());
return [];
}
const body: webmasters_v3.Schema$SitemapsListResponse = await response.json();
if (!body.sitemap) {
console.error("❌ No sitemaps found, add them to Google Search Console and try again.");
return [];
}
return body.sitemap.filter((x) => x.path !== undefined && x.path !== null).map((x) => x.path as string);
}
/**
* Retrieves a list of pages from all sitemaps associated with the specified site URL.
* @param accessToken The access token for authentication.
* @param siteUrl The URL of the site for which to retrieve the sitemap pages.
* @returns An array containing the list of sitemaps and an array of unique page URLs extracted from those sitemaps.
*/
export async function getSitemapPages(accessToken: string, siteUrl: string) {
const sitemaps = await getSitemapsList(accessToken, siteUrl);
let pages: string[] = [];
for (const url of sitemaps) {
const Google = new Sitemapper({
url,
});
const { sites } = await Google.fetch();
pages = [...pages, ...sites];
}
return [sitemaps, [...new Set(pages)]];
}
================================================
FILE: src/shared/types.ts
================================================
/**
* Enum representing indexing status of a URL
*/
export enum Status {
SubmittedAndIndexed = "Submitted and indexed",
DuplicateWithoutUserSelectedCanonical = "Duplicate without user-selected canonical",
CrawledCurrentlyNotIndexed = "Crawled - currently not indexed",
DiscoveredCurrentlyNotIndexed = "Discovered - currently not indexed",
PageWithRedirect = "Page with redirect",
URLIsUnknownToGoogle = "URL is unknown to Google",
RateLimited = "RateLimited",
Forbidden = "Forbidden",
Error = "Error",
}
================================================
FILE: src/shared/utils.ts
================================================
/**
* Creates an array of chunks from the given array with a specified size.
* @param arr The array to be chunked.
* @param size The size of each chunk.
* @returns An array of chunks.
*/
const createChunks = (arr: any[], size: number) =>
Array.from({ length: Math.ceil(arr.length / size) }, (_, i) => arr.slice(i * size, i * size + size));
/**
* Executes tasks on items in batches and invokes a callback upon completion of each batch.
* @param task The task function to be executed on each item.
* @param items The array of items on which the task is to be executed.
* @param batchSize The size of each batch.
* @param onBatchComplete The callback function invoked upon completion of each batch.
*/
export async function batch(
task: (url: string) => void,
items: string[],
batchSize: number,
onBatchComplete: (batchIndex: number, batchCount: number) => void
) {
const chunks = createChunks(items, batchSize);
for (let i = 0; i < chunks.length; i++) {
await Promise.all(chunks[i].map(task));
onBatchComplete(i, chunks.length);
}
}
/**
* Fetches a resource from a URL with retry logic.
* @param url The URL of the resource to fetch.
* @param options The options for the fetch request.
* @param retries The number of retry attempts (default is 5).
* @returns A Promise resolving to the fetched response.
* @throws Error when retries are exhausted or server error occurs.
*/
export async function fetchRetry(url: string, options: RequestInit, retries: number = 5) {
try {
const response = await fetch(url, options);
if (response.status >= 500) {
const body = await response.text();
throw new Error(`Server error code ${response.status}\n${body}`);
}
return response;
} catch (err) {
if (retries <= 0) {
throw err;
}
return fetchRetry(url, options, retries - 1);
}
}
================================================
FILE: tsconfig.json
================================================
{
"compilerOptions": {
"target": "esnext",
"module": "commonjs",
"lib": ["dom", "es6", "es2021", "esnext.asynciterable"],
"skipLibCheck": true,
"sourceMap": true,
"outDir": "./dist",
"moduleResolution": "node",
"removeComments": false,
"noImplicitAny": false,
"strictNullChecks": true,
"strictFunctionTypes": true,
"noImplicitThis": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noImplicitReturns": true,
"noFallthroughCasesInSwitch": true,
"allowSyntheticDefaultImports": true,
"esModuleInterop": true,
"emitDecoratorMetadata": true,
"experimentalDecorators": true,
"resolveJsonModule": true
},
"include": ["**/*.ts"],
"exclude": ["node_modules/", "dist/"]
}
================================================
FILE: tsup.config.ts
================================================
import { defineConfig, Options } from "tsup";
const config: Options = {
entry: ["src/**/*.ts"],
splitting: true,
sourcemap: true,
clean: true,
platform: "node",
dts: true,
minify: true,
};
export default defineConfig(config);